btrfs-progs: check: introduce function to check inode_extref
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 {
118         return container_of(back, struct data_backref, node);
119 }
120
121 /*
122  * Much like data_backref, just removed the undetermined members
123  * and change it to use list_head.
124  * During extent scan, it is stored in root->orphan_data_extent.
125  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126  */
127 struct orphan_data_extent {
128         struct list_head list;
129         u64 root;
130         u64 objectid;
131         u64 offset;
132         u64 disk_bytenr;
133         u64 disk_len;
134 };
135
136 struct tree_backref {
137         struct extent_backref node;
138         union {
139                 u64 parent;
140                 u64 root;
141         };
142 };
143
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 {
146         return container_of(back, struct tree_backref, node);
147 }
148
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
151
152 struct extent_record {
153         struct list_head backrefs;
154         struct list_head dups;
155         struct list_head list;
156         struct cache_extent cache;
157         struct btrfs_disk_key parent_key;
158         u64 start;
159         u64 max_size;
160         u64 nr;
161         u64 refs;
162         u64 extent_item_refs;
163         u64 generation;
164         u64 parent_generation;
165         u64 info_objectid;
166         u32 num_duplicates;
167         u8 info_level;
168         unsigned int flag_block_full_backref:2;
169         unsigned int found_rec:1;
170         unsigned int content_checked:1;
171         unsigned int owner_ref_checked:1;
172         unsigned int is_root:1;
173         unsigned int metadata:1;
174         unsigned int bad_full_backref:1;
175         unsigned int crossing_stripes:1;
176         unsigned int wrong_chunk_type:1;
177 };
178
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 {
181         return container_of(entry, struct extent_record, list);
182 }
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         u8 filetype;
190         u8 ref_type;
191         int errors;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 {
200         return list_entry(entry, struct inode_backref, list);
201 }
202
203 struct root_item_record {
204         struct list_head list;
205         u64 objectid;
206         u64 bytenr;
207         u64 last_snapshot;
208         u8 level;
209         u8 drop_level;
210         int level_size;
211         struct btrfs_key drop_key;
212 };
213
214 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
216 #define REF_ERR_NO_INODE_REF            (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
219 #define REF_ERR_DUP_INODE_REF           (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF             (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
227
228 struct file_extent_hole {
229         struct rb_node node;
230         u64 start;
231         u64 len;
232 };
233
234 struct inode_record {
235         struct list_head backrefs;
236         unsigned int checked:1;
237         unsigned int merging:1;
238         unsigned int found_inode_item:1;
239         unsigned int found_dir_item:1;
240         unsigned int found_file_extent:1;
241         unsigned int found_csum_item:1;
242         unsigned int some_csum_missing:1;
243         unsigned int nodatasum:1;
244         int errors;
245
246         u64 ino;
247         u32 nlink;
248         u32 imode;
249         u64 isize;
250         u64 nbytes;
251
252         u32 found_link;
253         u64 found_size;
254         u64 extent_start;
255         u64 extent_end;
256         struct rb_root holes;
257         struct list_head orphan_extents;
258
259         u32 refs;
260 };
261
262 #define I_ERR_NO_INODE_ITEM             (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
277
278 struct root_backref {
279         struct list_head list;
280         unsigned int found_dir_item:1;
281         unsigned int found_dir_index:1;
282         unsigned int found_back_ref:1;
283         unsigned int found_forward_ref:1;
284         unsigned int reachable:1;
285         int errors;
286         u64 ref_root;
287         u64 dir;
288         u64 index;
289         u16 namelen;
290         char name[0];
291 };
292
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 {
295         return list_entry(entry, struct root_backref, list);
296 }
297
298 struct root_record {
299         struct list_head backrefs;
300         struct cache_extent cache;
301         unsigned int found_root_item:1;
302         u64 objectid;
303         u32 found_ref;
304 };
305
306 struct ptr_node {
307         struct cache_extent cache;
308         void *data;
309 };
310
311 struct shared_node {
312         struct cache_extent cache;
313         struct cache_tree root_cache;
314         struct cache_tree inode_cache;
315         struct inode_record *current;
316         u32 refs;
317 };
318
319 struct block_info {
320         u64 start;
321         u32 size;
322 };
323
324 struct walk_control {
325         struct cache_tree shared;
326         struct shared_node *nodes[BTRFS_MAX_LEVEL];
327         int active_node;
328         int root_level;
329 };
330
331 struct bad_item {
332         struct btrfs_key key;
333         u64 root_id;
334         struct list_head list;
335 };
336
337 struct extent_entry {
338         u64 bytenr;
339         u64 bytes;
340         int count;
341         int broken;
342         struct list_head list;
343 };
344
345 struct root_item_info {
346         /* level of the root */
347         u8 level;
348         /* number of nodes at this level, must be 1 for a root */
349         int node_count;
350         u64 bytenr;
351         u64 gen;
352         struct cache_extent cache_extent;
353 };
354
355 /*
356  * Error bit for low memory mode check.
357  *
358  * Currently no caller cares about it yet.  Just internal use for error
359  * classification.
360  */
361 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH     (1 << 8)
371
372 static void *print_status_check(void *p)
373 {
374         struct task_ctx *priv = p;
375         const char work_indicator[] = { '.', 'o', 'O', 'o' };
376         uint32_t count = 0;
377         static char *task_position_string[] = {
378                 "checking extents",
379                 "checking free space cache",
380                 "checking fs roots",
381         };
382
383         task_period_start(priv->info, 1000 /* 1s */);
384
385         if (priv->tp == TASK_NOTHING)
386                 return NULL;
387
388         while (1) {
389                 printf("%s [%c]\r", task_position_string[priv->tp],
390                                 work_indicator[count % 4]);
391                 count++;
392                 fflush(stdout);
393                 task_period_wait(priv->info);
394         }
395         return NULL;
396 }
397
398 static int print_status_return(void *p)
399 {
400         printf("\n");
401         fflush(stdout);
402
403         return 0;
404 }
405
406 static enum btrfs_check_mode parse_check_mode(const char *str)
407 {
408         if (strcmp(str, "lowmem") == 0)
409                 return CHECK_MODE_LOWMEM;
410         if (strcmp(str, "orig") == 0)
411                 return CHECK_MODE_ORIGINAL;
412         if (strcmp(str, "original") == 0)
413                 return CHECK_MODE_ORIGINAL;
414
415         return CHECK_MODE_UNKNOWN;
416 }
417
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
420 {
421         struct file_extent_hole *hole;
422
423         if (RB_EMPTY_ROOT(holes))
424                 return (u64)-1;
425
426         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
427         return hole->start;
428 }
429
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 {
432         struct file_extent_hole *hole1;
433         struct file_extent_hole *hole2;
434
435         hole1 = rb_entry(node1, struct file_extent_hole, node);
436         hole2 = rb_entry(node2, struct file_extent_hole, node);
437
438         if (hole1->start > hole2->start)
439                 return -1;
440         if (hole1->start < hole2->start)
441                 return 1;
442         /* Now hole1->start == hole2->start */
443         if (hole1->len >= hole2->len)
444                 /*
445                  * Hole 1 will be merge center
446                  * Same hole will be merged later
447                  */
448                 return -1;
449         /* Hole 2 will be merge center */
450         return 1;
451 }
452
453 /*
454  * Add a hole to the record
455  *
456  * This will do hole merge for copy_file_extent_holes(),
457  * which will ensure there won't be continuous holes.
458  */
459 static int add_file_extent_hole(struct rb_root *holes,
460                                 u64 start, u64 len)
461 {
462         struct file_extent_hole *hole;
463         struct file_extent_hole *prev = NULL;
464         struct file_extent_hole *next = NULL;
465
466         hole = malloc(sizeof(*hole));
467         if (!hole)
468                 return -ENOMEM;
469         hole->start = start;
470         hole->len = len;
471         /* Since compare will not return 0, no -EEXIST will happen */
472         rb_insert(holes, &hole->node, compare_hole);
473
474         /* simple merge with previous hole */
475         if (rb_prev(&hole->node))
476                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477                                 node);
478         if (prev && prev->start + prev->len >= hole->start) {
479                 hole->len = hole->start + hole->len - prev->start;
480                 hole->start = prev->start;
481                 rb_erase(&prev->node, holes);
482                 free(prev);
483                 prev = NULL;
484         }
485
486         /* iterate merge with next holes */
487         while (1) {
488                 if (!rb_next(&hole->node))
489                         break;
490                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491                                         node);
492                 if (hole->start + hole->len >= next->start) {
493                         if (hole->start + hole->len <= next->start + next->len)
494                                 hole->len = next->start + next->len -
495                                             hole->start;
496                         rb_erase(&next->node, holes);
497                         free(next);
498                         next = NULL;
499                 } else
500                         break;
501         }
502         return 0;
503 }
504
505 static int compare_hole_range(struct rb_node *node, void *data)
506 {
507         struct file_extent_hole *hole;
508         u64 start;
509
510         hole = (struct file_extent_hole *)data;
511         start = hole->start;
512
513         hole = rb_entry(node, struct file_extent_hole, node);
514         if (start < hole->start)
515                 return -1;
516         if (start >= hole->start && start < hole->start + hole->len)
517                 return 0;
518         return 1;
519 }
520
521 /*
522  * Delete a hole in the record
523  *
524  * This will do the hole split and is much restrict than add.
525  */
526 static int del_file_extent_hole(struct rb_root *holes,
527                                 u64 start, u64 len)
528 {
529         struct file_extent_hole *hole;
530         struct file_extent_hole tmp;
531         u64 prev_start = 0;
532         u64 prev_len = 0;
533         u64 next_start = 0;
534         u64 next_len = 0;
535         struct rb_node *node;
536         int have_prev = 0;
537         int have_next = 0;
538         int ret = 0;
539
540         tmp.start = start;
541         tmp.len = len;
542         node = rb_search(holes, &tmp, compare_hole_range, NULL);
543         if (!node)
544                 return -EEXIST;
545         hole = rb_entry(node, struct file_extent_hole, node);
546         if (start + len > hole->start + hole->len)
547                 return -EEXIST;
548
549         /*
550          * Now there will be no overlap, delete the hole and re-add the
551          * split(s) if they exists.
552          */
553         if (start > hole->start) {
554                 prev_start = hole->start;
555                 prev_len = start - hole->start;
556                 have_prev = 1;
557         }
558         if (hole->start + hole->len > start + len) {
559                 next_start = start + len;
560                 next_len = hole->start + hole->len - start - len;
561                 have_next = 1;
562         }
563         rb_erase(node, holes);
564         free(hole);
565         if (have_prev) {
566                 ret = add_file_extent_hole(holes, prev_start, prev_len);
567                 if (ret < 0)
568                         return ret;
569         }
570         if (have_next) {
571                 ret = add_file_extent_hole(holes, next_start, next_len);
572                 if (ret < 0)
573                         return ret;
574         }
575         return 0;
576 }
577
578 static int copy_file_extent_holes(struct rb_root *dst,
579                                   struct rb_root *src)
580 {
581         struct file_extent_hole *hole;
582         struct rb_node *node;
583         int ret = 0;
584
585         node = rb_first(src);
586         while (node) {
587                 hole = rb_entry(node, struct file_extent_hole, node);
588                 ret = add_file_extent_hole(dst, hole->start, hole->len);
589                 if (ret)
590                         break;
591                 node = rb_next(node);
592         }
593         return ret;
594 }
595
596 static void free_file_extent_holes(struct rb_root *holes)
597 {
598         struct rb_node *node;
599         struct file_extent_hole *hole;
600
601         node = rb_first(holes);
602         while (node) {
603                 hole = rb_entry(node, struct file_extent_hole, node);
604                 rb_erase(node, holes);
605                 free(hole);
606                 node = rb_first(holes);
607         }
608 }
609
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613                                  struct btrfs_root *root)
614 {
615         if (root->last_trans != trans->transid) {
616                 root->track_dirty = 1;
617                 root->last_trans = trans->transid;
618                 root->commit_root = root->node;
619                 extent_buffer_get(root->node);
620         }
621 }
622
623 static u8 imode_to_type(u32 imode)
624 {
625 #define S_SHIFT 12
626         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
628                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
629                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
630                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
631                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
632                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
633                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
634         };
635
636         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
637 #undef S_SHIFT
638 }
639
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 {
642         struct device_record *rec1;
643         struct device_record *rec2;
644
645         rec1 = rb_entry(node1, struct device_record, node);
646         rec2 = rb_entry(node2, struct device_record, node);
647         if (rec1->devid > rec2->devid)
648                 return -1;
649         else if (rec1->devid < rec2->devid)
650                 return 1;
651         else
652                 return 0;
653 }
654
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 {
657         struct inode_record *rec;
658         struct inode_backref *backref;
659         struct inode_backref *orig;
660         struct inode_backref *tmp;
661         struct orphan_data_extent *src_orphan;
662         struct orphan_data_extent *dst_orphan;
663         struct rb_node *rb;
664         size_t size;
665         int ret;
666
667         rec = malloc(sizeof(*rec));
668         if (!rec)
669                 return ERR_PTR(-ENOMEM);
670         memcpy(rec, orig_rec, sizeof(*rec));
671         rec->refs = 1;
672         INIT_LIST_HEAD(&rec->backrefs);
673         INIT_LIST_HEAD(&rec->orphan_extents);
674         rec->holes = RB_ROOT;
675
676         list_for_each_entry(orig, &orig_rec->backrefs, list) {
677                 size = sizeof(*orig) + orig->namelen + 1;
678                 backref = malloc(size);
679                 if (!backref) {
680                         ret = -ENOMEM;
681                         goto cleanup;
682                 }
683                 memcpy(backref, orig, size);
684                 list_add_tail(&backref->list, &rec->backrefs);
685         }
686         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687                 dst_orphan = malloc(sizeof(*dst_orphan));
688                 if (!dst_orphan) {
689                         ret = -ENOMEM;
690                         goto cleanup;
691                 }
692                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694         }
695         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
696         if (ret < 0)
697                 goto cleanup_rb;
698
699         return rec;
700
701 cleanup_rb:
702         rb = rb_first(&rec->holes);
703         while (rb) {
704                 struct file_extent_hole *hole;
705
706                 hole = rb_entry(rb, struct file_extent_hole, node);
707                 rb = rb_next(rb);
708                 free(hole);
709         }
710
711 cleanup:
712         if (!list_empty(&rec->backrefs))
713                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714                         list_del(&orig->list);
715                         free(orig);
716                 }
717
718         if (!list_empty(&rec->orphan_extents))
719                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720                         list_del(&orig->list);
721                         free(orig);
722                 }
723
724         free(rec);
725
726         return ERR_PTR(ret);
727 }
728
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
730                                       u64 objectid)
731 {
732         struct orphan_data_extent *orphan;
733
734         if (list_empty(orphan_extents))
735                 return;
736         printf("The following data extent is lost in tree %llu:\n",
737                objectid);
738         list_for_each_entry(orphan, orphan_extents, list) {
739                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
741                        orphan->disk_len);
742         }
743 }
744
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 {
747         u64 root_objectid = root->root_key.objectid;
748         int errors = rec->errors;
749
750         if (!errors)
751                 return;
752         /* reloc root errors, we print its corresponding fs root objectid*/
753         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754                 root_objectid = root->root_key.offset;
755                 fprintf(stderr, "reloc");
756         }
757         fprintf(stderr, "root %llu inode %llu errors %x",
758                 (unsigned long long) root_objectid,
759                 (unsigned long long) rec->ino, rec->errors);
760
761         if (errors & I_ERR_NO_INODE_ITEM)
762                 fprintf(stderr, ", no inode item");
763         if (errors & I_ERR_NO_ORPHAN_ITEM)
764                 fprintf(stderr, ", no orphan item");
765         if (errors & I_ERR_DUP_INODE_ITEM)
766                 fprintf(stderr, ", dup inode item");
767         if (errors & I_ERR_DUP_DIR_INDEX)
768                 fprintf(stderr, ", dup dir index");
769         if (errors & I_ERR_ODD_DIR_ITEM)
770                 fprintf(stderr, ", odd dir item");
771         if (errors & I_ERR_ODD_FILE_EXTENT)
772                 fprintf(stderr, ", odd file extent");
773         if (errors & I_ERR_BAD_FILE_EXTENT)
774                 fprintf(stderr, ", bad file extent");
775         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776                 fprintf(stderr, ", file extent overlap");
777         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778                 fprintf(stderr, ", file extent discount");
779         if (errors & I_ERR_DIR_ISIZE_WRONG)
780                 fprintf(stderr, ", dir isize wrong");
781         if (errors & I_ERR_FILE_NBYTES_WRONG)
782                 fprintf(stderr, ", nbytes wrong");
783         if (errors & I_ERR_ODD_CSUM_ITEM)
784                 fprintf(stderr, ", odd csum item");
785         if (errors & I_ERR_SOME_CSUM_MISSING)
786                 fprintf(stderr, ", some csum missing");
787         if (errors & I_ERR_LINK_COUNT_WRONG)
788                 fprintf(stderr, ", link count wrong");
789         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790                 fprintf(stderr, ", orphan file extent");
791         fprintf(stderr, "\n");
792         /* Print the orphan extents if needed */
793         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795
796         /* Print the holes if needed */
797         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798                 struct file_extent_hole *hole;
799                 struct rb_node *node;
800                 int found = 0;
801
802                 node = rb_first(&rec->holes);
803                 fprintf(stderr, "Found file extent holes:\n");
804                 while (node) {
805                         found = 1;
806                         hole = rb_entry(node, struct file_extent_hole, node);
807                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
808                                 hole->start, hole->len);
809                         node = rb_next(node);
810                 }
811                 if (!found)
812                         fprintf(stderr, "\tstart: 0, len: %llu\n",
813                                 round_up(rec->isize, root->sectorsize));
814         }
815 }
816
817 static void print_ref_error(int errors)
818 {
819         if (errors & REF_ERR_NO_DIR_ITEM)
820                 fprintf(stderr, ", no dir item");
821         if (errors & REF_ERR_NO_DIR_INDEX)
822                 fprintf(stderr, ", no dir index");
823         if (errors & REF_ERR_NO_INODE_REF)
824                 fprintf(stderr, ", no inode ref");
825         if (errors & REF_ERR_DUP_DIR_ITEM)
826                 fprintf(stderr, ", dup dir item");
827         if (errors & REF_ERR_DUP_DIR_INDEX)
828                 fprintf(stderr, ", dup dir index");
829         if (errors & REF_ERR_DUP_INODE_REF)
830                 fprintf(stderr, ", dup inode ref");
831         if (errors & REF_ERR_INDEX_UNMATCH)
832                 fprintf(stderr, ", index mismatch");
833         if (errors & REF_ERR_FILETYPE_UNMATCH)
834                 fprintf(stderr, ", filetype mismatch");
835         if (errors & REF_ERR_NAME_TOO_LONG)
836                 fprintf(stderr, ", name too long");
837         if (errors & REF_ERR_NO_ROOT_REF)
838                 fprintf(stderr, ", no root ref");
839         if (errors & REF_ERR_NO_ROOT_BACKREF)
840                 fprintf(stderr, ", no root backref");
841         if (errors & REF_ERR_DUP_ROOT_REF)
842                 fprintf(stderr, ", dup root ref");
843         if (errors & REF_ERR_DUP_ROOT_BACKREF)
844                 fprintf(stderr, ", dup root backref");
845         fprintf(stderr, "\n");
846 }
847
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
849                                           u64 ino, int mod)
850 {
851         struct ptr_node *node;
852         struct cache_extent *cache;
853         struct inode_record *rec = NULL;
854         int ret;
855
856         cache = lookup_cache_extent(inode_cache, ino, 1);
857         if (cache) {
858                 node = container_of(cache, struct ptr_node, cache);
859                 rec = node->data;
860                 if (mod && rec->refs > 1) {
861                         node->data = clone_inode_rec(rec);
862                         if (IS_ERR(node->data))
863                                 return node->data;
864                         rec->refs--;
865                         rec = node->data;
866                 }
867         } else if (mod) {
868                 rec = calloc(1, sizeof(*rec));
869                 if (!rec)
870                         return ERR_PTR(-ENOMEM);
871                 rec->ino = ino;
872                 rec->extent_start = (u64)-1;
873                 rec->refs = 1;
874                 INIT_LIST_HEAD(&rec->backrefs);
875                 INIT_LIST_HEAD(&rec->orphan_extents);
876                 rec->holes = RB_ROOT;
877
878                 node = malloc(sizeof(*node));
879                 if (!node) {
880                         free(rec);
881                         return ERR_PTR(-ENOMEM);
882                 }
883                 node->cache.start = ino;
884                 node->cache.size = 1;
885                 node->data = rec;
886
887                 if (ino == BTRFS_FREE_INO_OBJECTID)
888                         rec->found_link = 1;
889
890                 ret = insert_cache_extent(inode_cache, &node->cache);
891                 if (ret)
892                         return ERR_PTR(-EEXIST);
893         }
894         return rec;
895 }
896
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 {
899         struct orphan_data_extent *orphan;
900
901         while (!list_empty(orphan_extents)) {
902                 orphan = list_entry(orphan_extents->next,
903                                     struct orphan_data_extent, list);
904                 list_del(&orphan->list);
905                 free(orphan);
906         }
907 }
908
909 static void free_inode_rec(struct inode_record *rec)
910 {
911         struct inode_backref *backref;
912
913         if (--rec->refs > 0)
914                 return;
915
916         while (!list_empty(&rec->backrefs)) {
917                 backref = to_inode_backref(rec->backrefs.next);
918                 list_del(&backref->list);
919                 free(backref);
920         }
921         free_orphan_data_extents(&rec->orphan_extents);
922         free_file_extent_holes(&rec->holes);
923         free(rec);
924 }
925
926 static int can_free_inode_rec(struct inode_record *rec)
927 {
928         if (!rec->errors && rec->checked && rec->found_inode_item &&
929             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
930                 return 1;
931         return 0;
932 }
933
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935                                  struct inode_record *rec)
936 {
937         struct cache_extent *cache;
938         struct inode_backref *tmp, *backref;
939         struct ptr_node *node;
940         u8 filetype;
941
942         if (!rec->found_inode_item)
943                 return;
944
945         filetype = imode_to_type(rec->imode);
946         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947                 if (backref->found_dir_item && backref->found_dir_index) {
948                         if (backref->filetype != filetype)
949                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950                         if (!backref->errors && backref->found_inode_ref &&
951                             rec->nlink == rec->found_link) {
952                                 list_del(&backref->list);
953                                 free(backref);
954                         }
955                 }
956         }
957
958         if (!rec->checked || rec->merging)
959                 return;
960
961         if (S_ISDIR(rec->imode)) {
962                 if (rec->found_size != rec->isize)
963                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964                 if (rec->found_file_extent)
965                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
966         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967                 if (rec->found_dir_item)
968                         rec->errors |= I_ERR_ODD_DIR_ITEM;
969                 if (rec->found_size != rec->nbytes)
970                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971                 if (rec->nlink > 0 && !no_holes &&
972                     (rec->extent_end < rec->isize ||
973                      first_extent_gap(&rec->holes) < rec->isize))
974                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
975         }
976
977         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978                 if (rec->found_csum_item && rec->nodatasum)
979                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
980                 if (rec->some_csum_missing && !rec->nodatasum)
981                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
982         }
983
984         BUG_ON(rec->refs != 1);
985         if (can_free_inode_rec(rec)) {
986                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987                 node = container_of(cache, struct ptr_node, cache);
988                 BUG_ON(node->data != rec);
989                 remove_cache_extent(inode_cache, &node->cache);
990                 free(node);
991                 free_inode_rec(rec);
992         }
993 }
994
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 {
997         struct btrfs_path path;
998         struct btrfs_key key;
999         int ret;
1000
1001         key.objectid = BTRFS_ORPHAN_OBJECTID;
1002         key.type = BTRFS_ORPHAN_ITEM_KEY;
1003         key.offset = ino;
1004
1005         btrfs_init_path(&path);
1006         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007         btrfs_release_path(&path);
1008         if (ret > 0)
1009                 ret = -ENOENT;
1010         return ret;
1011 }
1012
1013 static int process_inode_item(struct extent_buffer *eb,
1014                               int slot, struct btrfs_key *key,
1015                               struct shared_node *active_node)
1016 {
1017         struct inode_record *rec;
1018         struct btrfs_inode_item *item;
1019
1020         rec = active_node->current;
1021         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022         if (rec->found_inode_item) {
1023                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1024                 return 1;
1025         }
1026         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027         rec->nlink = btrfs_inode_nlink(eb, item);
1028         rec->isize = btrfs_inode_size(eb, item);
1029         rec->nbytes = btrfs_inode_nbytes(eb, item);
1030         rec->imode = btrfs_inode_mode(eb, item);
1031         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032                 rec->nodatasum = 1;
1033         rec->found_inode_item = 1;
1034         if (rec->nlink == 0)
1035                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036         maybe_free_inode_rec(&active_node->inode_cache, rec);
1037         return 0;
1038 }
1039
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041                                                 const char *name,
1042                                                 int namelen, u64 dir)
1043 {
1044         struct inode_backref *backref;
1045
1046         list_for_each_entry(backref, &rec->backrefs, list) {
1047                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048                         break;
1049                 if (backref->dir != dir || backref->namelen != namelen)
1050                         continue;
1051                 if (memcmp(name, backref->name, namelen))
1052                         continue;
1053                 return backref;
1054         }
1055
1056         backref = malloc(sizeof(*backref) + namelen + 1);
1057         if (!backref)
1058                 return NULL;
1059         memset(backref, 0, sizeof(*backref));
1060         backref->dir = dir;
1061         backref->namelen = namelen;
1062         memcpy(backref->name, name, namelen);
1063         backref->name[namelen] = '\0';
1064         list_add_tail(&backref->list, &rec->backrefs);
1065         return backref;
1066 }
1067
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069                              u64 ino, u64 dir, u64 index,
1070                              const char *name, int namelen,
1071                              u8 filetype, u8 itemtype, int errors)
1072 {
1073         struct inode_record *rec;
1074         struct inode_backref *backref;
1075
1076         rec = get_inode_rec(inode_cache, ino, 1);
1077         BUG_ON(IS_ERR(rec));
1078         backref = get_inode_backref(rec, name, namelen, dir);
1079         BUG_ON(!backref);
1080         if (errors)
1081                 backref->errors |= errors;
1082         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083                 if (backref->found_dir_index)
1084                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085                 if (backref->found_inode_ref && backref->index != index)
1086                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1087                 if (backref->found_dir_item && backref->filetype != filetype)
1088                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089
1090                 backref->index = index;
1091                 backref->filetype = filetype;
1092                 backref->found_dir_index = 1;
1093         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094                 rec->found_link++;
1095                 if (backref->found_dir_item)
1096                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097                 if (backref->found_dir_index && backref->filetype != filetype)
1098                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099
1100                 backref->filetype = filetype;
1101                 backref->found_dir_item = 1;
1102         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104                 if (backref->found_inode_ref)
1105                         backref->errors |= REF_ERR_DUP_INODE_REF;
1106                 if (backref->found_dir_index && backref->index != index)
1107                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1108                 else
1109                         backref->index = index;
1110
1111                 backref->ref_type = itemtype;
1112                 backref->found_inode_ref = 1;
1113         } else {
1114                 BUG_ON(1);
1115         }
1116
1117         maybe_free_inode_rec(inode_cache, rec);
1118         return 0;
1119 }
1120
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122                             struct cache_tree *dst_cache)
1123 {
1124         struct inode_backref *backref;
1125         u32 dir_count = 0;
1126         int ret = 0;
1127
1128         dst->merging = 1;
1129         list_for_each_entry(backref, &src->backrefs, list) {
1130                 if (backref->found_dir_index) {
1131                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1132                                         backref->index, backref->name,
1133                                         backref->namelen, backref->filetype,
1134                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1135                 }
1136                 if (backref->found_dir_item) {
1137                         dir_count++;
1138                         add_inode_backref(dst_cache, dst->ino,
1139                                         backref->dir, 0, backref->name,
1140                                         backref->namelen, backref->filetype,
1141                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1142                 }
1143                 if (backref->found_inode_ref) {
1144                         add_inode_backref(dst_cache, dst->ino,
1145                                         backref->dir, backref->index,
1146                                         backref->name, backref->namelen, 0,
1147                                         backref->ref_type, backref->errors);
1148                 }
1149         }
1150
1151         if (src->found_dir_item)
1152                 dst->found_dir_item = 1;
1153         if (src->found_file_extent)
1154                 dst->found_file_extent = 1;
1155         if (src->found_csum_item)
1156                 dst->found_csum_item = 1;
1157         if (src->some_csum_missing)
1158                 dst->some_csum_missing = 1;
1159         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1161                 if (ret < 0)
1162                         return ret;
1163         }
1164
1165         BUG_ON(src->found_link < dir_count);
1166         dst->found_link += src->found_link - dir_count;
1167         dst->found_size += src->found_size;
1168         if (src->extent_start != (u64)-1) {
1169                 if (dst->extent_start == (u64)-1) {
1170                         dst->extent_start = src->extent_start;
1171                         dst->extent_end = src->extent_end;
1172                 } else {
1173                         if (dst->extent_end > src->extent_start)
1174                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175                         else if (dst->extent_end < src->extent_start) {
1176                                 ret = add_file_extent_hole(&dst->holes,
1177                                         dst->extent_end,
1178                                         src->extent_start - dst->extent_end);
1179                         }
1180                         if (dst->extent_end < src->extent_end)
1181                                 dst->extent_end = src->extent_end;
1182                 }
1183         }
1184
1185         dst->errors |= src->errors;
1186         if (src->found_inode_item) {
1187                 if (!dst->found_inode_item) {
1188                         dst->nlink = src->nlink;
1189                         dst->isize = src->isize;
1190                         dst->nbytes = src->nbytes;
1191                         dst->imode = src->imode;
1192                         dst->nodatasum = src->nodatasum;
1193                         dst->found_inode_item = 1;
1194                 } else {
1195                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1196                 }
1197         }
1198         dst->merging = 0;
1199
1200         return 0;
1201 }
1202
1203 static int splice_shared_node(struct shared_node *src_node,
1204                               struct shared_node *dst_node)
1205 {
1206         struct cache_extent *cache;
1207         struct ptr_node *node, *ins;
1208         struct cache_tree *src, *dst;
1209         struct inode_record *rec, *conflict;
1210         u64 current_ino = 0;
1211         int splice = 0;
1212         int ret;
1213
1214         if (--src_node->refs == 0)
1215                 splice = 1;
1216         if (src_node->current)
1217                 current_ino = src_node->current->ino;
1218
1219         src = &src_node->root_cache;
1220         dst = &dst_node->root_cache;
1221 again:
1222         cache = search_cache_extent(src, 0);
1223         while (cache) {
1224                 node = container_of(cache, struct ptr_node, cache);
1225                 rec = node->data;
1226                 cache = next_cache_extent(cache);
1227
1228                 if (splice) {
1229                         remove_cache_extent(src, &node->cache);
1230                         ins = node;
1231                 } else {
1232                         ins = malloc(sizeof(*ins));
1233                         BUG_ON(!ins);
1234                         ins->cache.start = node->cache.start;
1235                         ins->cache.size = node->cache.size;
1236                         ins->data = rec;
1237                         rec->refs++;
1238                 }
1239                 ret = insert_cache_extent(dst, &ins->cache);
1240                 if (ret == -EEXIST) {
1241                         conflict = get_inode_rec(dst, rec->ino, 1);
1242                         BUG_ON(IS_ERR(conflict));
1243                         merge_inode_recs(rec, conflict, dst);
1244                         if (rec->checked) {
1245                                 conflict->checked = 1;
1246                                 if (dst_node->current == conflict)
1247                                         dst_node->current = NULL;
1248                         }
1249                         maybe_free_inode_rec(dst, conflict);
1250                         free_inode_rec(rec);
1251                         free(ins);
1252                 } else {
1253                         BUG_ON(ret);
1254                 }
1255         }
1256
1257         if (src == &src_node->root_cache) {
1258                 src = &src_node->inode_cache;
1259                 dst = &dst_node->inode_cache;
1260                 goto again;
1261         }
1262
1263         if (current_ino > 0 && (!dst_node->current ||
1264             current_ino > dst_node->current->ino)) {
1265                 if (dst_node->current) {
1266                         dst_node->current->checked = 1;
1267                         maybe_free_inode_rec(dst, dst_node->current);
1268                 }
1269                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270                 BUG_ON(IS_ERR(dst_node->current));
1271         }
1272         return 0;
1273 }
1274
1275 static void free_inode_ptr(struct cache_extent *cache)
1276 {
1277         struct ptr_node *node;
1278         struct inode_record *rec;
1279
1280         node = container_of(cache, struct ptr_node, cache);
1281         rec = node->data;
1282         free_inode_rec(rec);
1283         free(node);
1284 }
1285
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1289                                             u64 bytenr)
1290 {
1291         struct cache_extent *cache;
1292         struct shared_node *node;
1293
1294         cache = lookup_cache_extent(shared, bytenr, 1);
1295         if (cache) {
1296                 node = container_of(cache, struct shared_node, cache);
1297                 return node;
1298         }
1299         return NULL;
1300 }
1301
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1303 {
1304         int ret;
1305         struct shared_node *node;
1306
1307         node = calloc(1, sizeof(*node));
1308         if (!node)
1309                 return -ENOMEM;
1310         node->cache.start = bytenr;
1311         node->cache.size = 1;
1312         cache_tree_init(&node->root_cache);
1313         cache_tree_init(&node->inode_cache);
1314         node->refs = refs;
1315
1316         ret = insert_cache_extent(shared, &node->cache);
1317
1318         return ret;
1319 }
1320
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322                              struct walk_control *wc, int level)
1323 {
1324         struct shared_node *node;
1325         struct shared_node *dest;
1326         int ret;
1327
1328         if (level == wc->active_node)
1329                 return 0;
1330
1331         BUG_ON(wc->active_node <= level);
1332         node = find_shared_node(&wc->shared, bytenr);
1333         if (!node) {
1334                 ret = add_shared_node(&wc->shared, bytenr, refs);
1335                 BUG_ON(ret);
1336                 node = find_shared_node(&wc->shared, bytenr);
1337                 wc->nodes[level] = node;
1338                 wc->active_node = level;
1339                 return 0;
1340         }
1341
1342         if (wc->root_level == wc->active_node &&
1343             btrfs_root_refs(&root->root_item) == 0) {
1344                 if (--node->refs == 0) {
1345                         free_inode_recs_tree(&node->root_cache);
1346                         free_inode_recs_tree(&node->inode_cache);
1347                         remove_cache_extent(&wc->shared, &node->cache);
1348                         free(node);
1349                 }
1350                 return 1;
1351         }
1352
1353         dest = wc->nodes[wc->active_node];
1354         splice_shared_node(node, dest);
1355         if (node->refs == 0) {
1356                 remove_cache_extent(&wc->shared, &node->cache);
1357                 free(node);
1358         }
1359         return 1;
1360 }
1361
1362 static int leave_shared_node(struct btrfs_root *root,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int i;
1368
1369         if (level == wc->root_level)
1370                 return 0;
1371
1372         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1373                 if (wc->nodes[i])
1374                         break;
1375         }
1376         BUG_ON(i >= BTRFS_MAX_LEVEL);
1377
1378         node = wc->nodes[wc->active_node];
1379         wc->nodes[wc->active_node] = NULL;
1380         wc->active_node = i;
1381
1382         dest = wc->nodes[wc->active_node];
1383         if (wc->active_node < wc->root_level ||
1384             btrfs_root_refs(&root->root_item) > 0) {
1385                 BUG_ON(node->refs <= 1);
1386                 splice_shared_node(node, dest);
1387         } else {
1388                 BUG_ON(node->refs < 2);
1389                 node->refs--;
1390         }
1391         return 0;
1392 }
1393
1394 /*
1395  * Returns:
1396  * < 0 - on error
1397  * 1   - if the root with id child_root_id is a child of root parent_root_id
1398  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1399  *       has other root(s) as parent(s)
1400  * 2   - if the root child_root_id doesn't have any parent roots
1401  */
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1403                          u64 child_root_id)
1404 {
1405         struct btrfs_path path;
1406         struct btrfs_key key;
1407         struct extent_buffer *leaf;
1408         int has_parent = 0;
1409         int ret;
1410
1411         btrfs_init_path(&path);
1412
1413         key.objectid = parent_root_id;
1414         key.type = BTRFS_ROOT_REF_KEY;
1415         key.offset = child_root_id;
1416         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1417                                 0, 0);
1418         if (ret < 0)
1419                 return ret;
1420         btrfs_release_path(&path);
1421         if (!ret)
1422                 return 1;
1423
1424         key.objectid = child_root_id;
1425         key.type = BTRFS_ROOT_BACKREF_KEY;
1426         key.offset = 0;
1427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1428                                 0, 0);
1429         if (ret < 0)
1430                 goto out;
1431
1432         while (1) {
1433                 leaf = path.nodes[0];
1434                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1436                         if (ret)
1437                                 break;
1438                         leaf = path.nodes[0];
1439                 }
1440
1441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442                 if (key.objectid != child_root_id ||
1443                     key.type != BTRFS_ROOT_BACKREF_KEY)
1444                         break;
1445
1446                 has_parent = 1;
1447
1448                 if (key.offset == parent_root_id) {
1449                         btrfs_release_path(&path);
1450                         return 1;
1451                 }
1452
1453                 path.slots[0]++;
1454         }
1455 out:
1456         btrfs_release_path(&path);
1457         if (ret < 0)
1458                 return ret;
1459         return has_parent ? 0 : 2;
1460 }
1461
1462 static int process_dir_item(struct btrfs_root *root,
1463                             struct extent_buffer *eb,
1464                             int slot, struct btrfs_key *key,
1465                             struct shared_node *active_node)
1466 {
1467         u32 total;
1468         u32 cur = 0;
1469         u32 len;
1470         u32 name_len;
1471         u32 data_len;
1472         int error;
1473         int nritems = 0;
1474         u8 filetype;
1475         struct btrfs_dir_item *di;
1476         struct inode_record *rec;
1477         struct cache_tree *root_cache;
1478         struct cache_tree *inode_cache;
1479         struct btrfs_key location;
1480         char namebuf[BTRFS_NAME_LEN];
1481
1482         root_cache = &active_node->root_cache;
1483         inode_cache = &active_node->inode_cache;
1484         rec = active_node->current;
1485         rec->found_dir_item = 1;
1486
1487         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 nritems++;
1491                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492                 name_len = btrfs_dir_name_len(eb, di);
1493                 data_len = btrfs_dir_data_len(eb, di);
1494                 filetype = btrfs_dir_type(eb, di);
1495
1496                 rec->found_size += name_len;
1497                 if (name_len <= BTRFS_NAME_LEN) {
1498                         len = name_len;
1499                         error = 0;
1500                 } else {
1501                         len = BTRFS_NAME_LEN;
1502                         error = REF_ERR_NAME_TOO_LONG;
1503                 }
1504                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505
1506                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507                         add_inode_backref(inode_cache, location.objectid,
1508                                           key->objectid, key->offset, namebuf,
1509                                           len, filetype, key->type, error);
1510                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511                         add_inode_backref(root_cache, location.objectid,
1512                                           key->objectid, key->offset,
1513                                           namebuf, len, filetype,
1514                                           key->type, error);
1515                 } else {
1516                         fprintf(stderr, "invalid location in dir item %u\n",
1517                                 location.type);
1518                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519                                           key->objectid, key->offset, namebuf,
1520                                           len, filetype, key->type, error);
1521                 }
1522
1523                 len = sizeof(*di) + name_len + data_len;
1524                 di = (struct btrfs_dir_item *)((char *)di + len);
1525                 cur += len;
1526         }
1527         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1529
1530         return 0;
1531 }
1532
1533 static int process_inode_ref(struct extent_buffer *eb,
1534                              int slot, struct btrfs_key *key,
1535                              struct shared_node *active_node)
1536 {
1537         u32 total;
1538         u32 cur = 0;
1539         u32 len;
1540         u32 name_len;
1541         u64 index;
1542         int error;
1543         struct cache_tree *inode_cache;
1544         struct btrfs_inode_ref *ref;
1545         char namebuf[BTRFS_NAME_LEN];
1546
1547         inode_cache = &active_node->inode_cache;
1548
1549         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550         total = btrfs_item_size_nr(eb, slot);
1551         while (cur < total) {
1552                 name_len = btrfs_inode_ref_name_len(eb, ref);
1553                 index = btrfs_inode_ref_index(eb, ref);
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562                 add_inode_backref(inode_cache, key->objectid, key->offset,
1563                                   index, namebuf, len, 0, key->type, error);
1564
1565                 len = sizeof(*ref) + name_len;
1566                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1567                 cur += len;
1568         }
1569         return 0;
1570 }
1571
1572 static int process_inode_extref(struct extent_buffer *eb,
1573                                 int slot, struct btrfs_key *key,
1574                                 struct shared_node *active_node)
1575 {
1576         u32 total;
1577         u32 cur = 0;
1578         u32 len;
1579         u32 name_len;
1580         u64 index;
1581         u64 parent;
1582         int error;
1583         struct cache_tree *inode_cache;
1584         struct btrfs_inode_extref *extref;
1585         char namebuf[BTRFS_NAME_LEN];
1586
1587         inode_cache = &active_node->inode_cache;
1588
1589         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590         total = btrfs_item_size_nr(eb, slot);
1591         while (cur < total) {
1592                 name_len = btrfs_inode_extref_name_len(eb, extref);
1593                 index = btrfs_inode_extref_index(eb, extref);
1594                 parent = btrfs_inode_extref_parent(eb, extref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf,
1603                                    (unsigned long)(extref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, parent,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*extref) + name_len;
1608                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612
1613 }
1614
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616                             u64 len, u64 *found)
1617 {
1618         struct btrfs_key key;
1619         struct btrfs_path path;
1620         struct extent_buffer *leaf;
1621         int ret;
1622         size_t size;
1623         *found = 0;
1624         u64 csum_end;
1625         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626
1627         btrfs_init_path(&path);
1628
1629         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630         key.offset = start;
1631         key.type = BTRFS_EXTENT_CSUM_KEY;
1632
1633         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1634                                 &key, &path, 0, 0);
1635         if (ret < 0)
1636                 goto out;
1637         if (ret > 0 && path.slots[0] > 0) {
1638                 leaf = path.nodes[0];
1639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641                     key.type == BTRFS_EXTENT_CSUM_KEY)
1642                         path.slots[0]--;
1643         }
1644
1645         while (len > 0) {
1646                 leaf = path.nodes[0];
1647                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1649                         if (ret > 0)
1650                                 break;
1651                         else if (ret < 0)
1652                                 goto out;
1653                         leaf = path.nodes[0];
1654                 }
1655
1656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658                     key.type != BTRFS_EXTENT_CSUM_KEY)
1659                         break;
1660
1661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662                 if (key.offset >= start + len)
1663                         break;
1664
1665                 if (key.offset > start)
1666                         start = key.offset;
1667
1668                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670                 if (csum_end > start) {
1671                         size = min(csum_end - start, len);
1672                         len -= size;
1673                         start += size;
1674                         *found += size;
1675                 }
1676
1677                 path.slots[0]++;
1678         }
1679 out:
1680         btrfs_release_path(&path);
1681         if (ret < 0)
1682                 return ret;
1683         return 0;
1684 }
1685
1686 static int process_file_extent(struct btrfs_root *root,
1687                                 struct extent_buffer *eb,
1688                                 int slot, struct btrfs_key *key,
1689                                 struct shared_node *active_node)
1690 {
1691         struct inode_record *rec;
1692         struct btrfs_file_extent_item *fi;
1693         u64 num_bytes = 0;
1694         u64 disk_bytenr = 0;
1695         u64 extent_offset = 0;
1696         u64 mask = root->sectorsize - 1;
1697         int extent_type;
1698         int ret;
1699
1700         rec = active_node->current;
1701         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702         rec->found_file_extent = 1;
1703
1704         if (rec->extent_start == (u64)-1) {
1705                 rec->extent_start = key->offset;
1706                 rec->extent_end = key->offset;
1707         }
1708
1709         if (rec->extent_end > key->offset)
1710                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711         else if (rec->extent_end < key->offset) {
1712                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713                                            key->offset - rec->extent_end);
1714                 if (ret < 0)
1715                         return ret;
1716         }
1717
1718         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719         extent_type = btrfs_file_extent_type(eb, fi);
1720
1721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723                 if (num_bytes == 0)
1724                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725                 rec->found_size += num_bytes;
1726                 num_bytes = (num_bytes + mask) & ~mask;
1727         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731                 extent_offset = btrfs_file_extent_offset(eb, fi);
1732                 if (num_bytes == 0 || (num_bytes & mask))
1733                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734                 if (num_bytes + extent_offset >
1735                     btrfs_file_extent_ram_bytes(eb, fi))
1736                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738                     (btrfs_file_extent_compression(eb, fi) ||
1739                      btrfs_file_extent_encryption(eb, fi) ||
1740                      btrfs_file_extent_other_encoding(eb, fi)))
1741                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742                 if (disk_bytenr > 0)
1743                         rec->found_size += num_bytes;
1744         } else {
1745                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746         }
1747         rec->extent_end = key->offset + num_bytes;
1748
1749         /*
1750          * The data reloc tree will copy full extents into its inode and then
1751          * copy the corresponding csums.  Because the extent it copied could be
1752          * a preallocated extent that hasn't been written to yet there may be no
1753          * csums to copy, ergo we won't have csums for our file extent.  This is
1754          * ok so just don't bother checking csums if the inode belongs to the
1755          * data reloc tree.
1756          */
1757         if (disk_bytenr > 0 &&
1758             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759                 u64 found;
1760                 if (btrfs_file_extent_compression(eb, fi))
1761                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762                 else
1763                         disk_bytenr += extent_offset;
1764
1765                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1766                 if (ret < 0)
1767                         return ret;
1768                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769                         if (found > 0)
1770                                 rec->found_csum_item = 1;
1771                         if (found < num_bytes)
1772                                 rec->some_csum_missing = 1;
1773                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774                         if (found > 0)
1775                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782                             struct walk_control *wc)
1783 {
1784         struct btrfs_key key;
1785         u32 nritems;
1786         int i;
1787         int ret = 0;
1788         struct cache_tree *inode_cache;
1789         struct shared_node *active_node;
1790
1791         if (wc->root_level == wc->active_node &&
1792             btrfs_root_refs(&root->root_item) == 0)
1793                 return 0;
1794
1795         active_node = wc->nodes[wc->active_node];
1796         inode_cache = &active_node->inode_cache;
1797         nritems = btrfs_header_nritems(eb);
1798         for (i = 0; i < nritems; i++) {
1799                 btrfs_item_key_to_cpu(eb, &key, i);
1800
1801                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802                         continue;
1803                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1804                         continue;
1805
1806                 if (active_node->current == NULL ||
1807                     active_node->current->ino < key.objectid) {
1808                         if (active_node->current) {
1809                                 active_node->current->checked = 1;
1810                                 maybe_free_inode_rec(inode_cache,
1811                                                      active_node->current);
1812                         }
1813                         active_node->current = get_inode_rec(inode_cache,
1814                                                              key.objectid, 1);
1815                         BUG_ON(IS_ERR(active_node->current));
1816                 }
1817                 switch (key.type) {
1818                 case BTRFS_DIR_ITEM_KEY:
1819                 case BTRFS_DIR_INDEX_KEY:
1820                         ret = process_dir_item(root, eb, i, &key, active_node);
1821                         break;
1822                 case BTRFS_INODE_REF_KEY:
1823                         ret = process_inode_ref(eb, i, &key, active_node);
1824                         break;
1825                 case BTRFS_INODE_EXTREF_KEY:
1826                         ret = process_inode_extref(eb, i, &key, active_node);
1827                         break;
1828                 case BTRFS_INODE_ITEM_KEY:
1829                         ret = process_inode_item(eb, i, &key, active_node);
1830                         break;
1831                 case BTRFS_EXTENT_DATA_KEY:
1832                         ret = process_file_extent(root, eb, i, &key,
1833                                                   active_node);
1834                         break;
1835                 default:
1836                         break;
1837                 };
1838         }
1839         return ret;
1840 }
1841
1842 static void reada_walk_down(struct btrfs_root *root,
1843                             struct extent_buffer *node, int slot)
1844 {
1845         u64 bytenr;
1846         u64 ptr_gen;
1847         u32 nritems;
1848         u32 blocksize;
1849         int i;
1850         int level;
1851
1852         level = btrfs_header_level(node);
1853         if (level != 1)
1854                 return;
1855
1856         nritems = btrfs_header_nritems(node);
1857         blocksize = root->nodesize;
1858         for (i = slot; i < nritems; i++) {
1859                 bytenr = btrfs_node_blockptr(node, i);
1860                 ptr_gen = btrfs_node_ptr_generation(node, i);
1861                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1862         }
1863 }
1864
1865 /*
1866  * Check the child node/leaf by the following condition:
1867  * 1. the first item key of the node/leaf should be the same with the one
1868  *    in parent.
1869  * 2. block in parent node should match the child node/leaf.
1870  * 3. generation of parent node and child's header should be consistent.
1871  *
1872  * Or the child node/leaf pointed by the key in parent is not valid.
1873  *
1874  * We hope to check leaf owner too, but since subvol may share leaves,
1875  * which makes leaf owner check not so strong, key check should be
1876  * sufficient enough for that case.
1877  */
1878 static int check_child_node(struct btrfs_root *root,
1879                             struct extent_buffer *parent, int slot,
1880                             struct extent_buffer *child)
1881 {
1882         struct btrfs_key parent_key;
1883         struct btrfs_key child_key;
1884         int ret = 0;
1885
1886         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887         if (btrfs_header_level(child) == 0)
1888                 btrfs_item_key_to_cpu(child, &child_key, 0);
1889         else
1890                 btrfs_node_key_to_cpu(child, &child_key, 0);
1891
1892         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1893                 ret = -EINVAL;
1894                 fprintf(stderr,
1895                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896                         parent_key.objectid, parent_key.type, parent_key.offset,
1897                         child_key.objectid, child_key.type, child_key.offset);
1898         }
1899         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900                 ret = -EINVAL;
1901                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902                         btrfs_node_blockptr(parent, slot),
1903                         btrfs_header_bytenr(child));
1904         }
1905         if (btrfs_node_ptr_generation(parent, slot) !=
1906             btrfs_header_generation(child)) {
1907                 ret = -EINVAL;
1908                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909                         btrfs_header_generation(child),
1910                         btrfs_node_ptr_generation(parent, slot));
1911         }
1912         return ret;
1913 }
1914
1915 struct node_refs {
1916         u64 bytenr[BTRFS_MAX_LEVEL];
1917         u64 refs[BTRFS_MAX_LEVEL];
1918 };
1919
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                           struct walk_control *wc, int *level,
1922                           struct node_refs *nrefs)
1923 {
1924         enum btrfs_tree_block_status status;
1925         u64 bytenr;
1926         u64 ptr_gen;
1927         struct extent_buffer *next;
1928         struct extent_buffer *cur;
1929         u32 blocksize;
1930         int ret, err = 0;
1931         u64 refs;
1932
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937                 refs = nrefs->refs[*level];
1938                 ret = 0;
1939         } else {
1940                 ret = btrfs_lookup_extent_info(NULL, root,
1941                                        path->nodes[*level]->start,
1942                                        *level, 1, &refs, NULL);
1943                 if (ret < 0) {
1944                         err = ret;
1945                         goto out;
1946                 }
1947                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948                 nrefs->refs[*level] = refs;
1949         }
1950
1951         if (refs > 1) {
1952                 ret = enter_shared_node(root, path->nodes[*level]->start,
1953                                         refs, wc, *level);
1954                 if (ret > 0) {
1955                         err = ret;
1956                         goto out;
1957                 }
1958         }
1959
1960         while (*level >= 0) {
1961                 WARN_ON(*level < 0);
1962                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963                 cur = path->nodes[*level];
1964
1965                 if (btrfs_header_level(cur) != *level)
1966                         WARN_ON(1);
1967
1968                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1969                         break;
1970                 if (*level == 0) {
1971                         ret = process_one_leaf(root, cur, wc);
1972                         if (ret < 0)
1973                                 err = ret;
1974                         break;
1975                 }
1976                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978                 blocksize = root->nodesize;
1979
1980                 if (bytenr == nrefs->bytenr[*level - 1]) {
1981                         refs = nrefs->refs[*level - 1];
1982                 } else {
1983                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984                                         *level - 1, 1, &refs, NULL);
1985                         if (ret < 0) {
1986                                 refs = 0;
1987                         } else {
1988                                 nrefs->bytenr[*level - 1] = bytenr;
1989                                 nrefs->refs[*level - 1] = refs;
1990                         }
1991                 }
1992
1993                 if (refs > 1) {
1994                         ret = enter_shared_node(root, bytenr, refs,
1995                                                 wc, *level - 1);
1996                         if (ret > 0) {
1997                                 path->slots[*level]++;
1998                                 continue;
1999                         }
2000                 }
2001
2002                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004                         free_extent_buffer(next);
2005                         reada_walk_down(root, cur, path->slots[*level]);
2006                         next = read_tree_block(root, bytenr, blocksize,
2007                                                ptr_gen);
2008                         if (!extent_buffer_uptodate(next)) {
2009                                 struct btrfs_key node_key;
2010
2011                                 btrfs_node_key_to_cpu(path->nodes[*level],
2012                                                       &node_key,
2013                                                       path->slots[*level]);
2014                                 btrfs_add_corrupt_extent_record(root->fs_info,
2015                                                 &node_key,
2016                                                 path->nodes[*level]->start,
2017                                                 root->nodesize, *level);
2018                                 err = -EIO;
2019                                 goto out;
2020                         }
2021                 }
2022
2023                 ret = check_child_node(root, cur, path->slots[*level], next);
2024                 if (ret) {
2025                         err = ret;
2026                         goto out;
2027                 }
2028
2029                 if (btrfs_is_leaf(next))
2030                         status = btrfs_check_leaf(root, NULL, next);
2031                 else
2032                         status = btrfs_check_node(root, NULL, next);
2033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034                         free_extent_buffer(next);
2035                         err = -EIO;
2036                         goto out;
2037                 }
2038
2039                 *level = *level - 1;
2040                 free_extent_buffer(path->nodes[*level]);
2041                 path->nodes[*level] = next;
2042                 path->slots[*level] = 0;
2043         }
2044 out:
2045         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2046         return err;
2047 }
2048
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050                         struct walk_control *wc, int *level)
2051 {
2052         int i;
2053         struct extent_buffer *leaf;
2054
2055         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056                 leaf = path->nodes[i];
2057                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2058                         path->slots[i]++;
2059                         *level = i;
2060                         return 0;
2061                 } else {
2062                         free_extent_buffer(path->nodes[*level]);
2063                         path->nodes[*level] = NULL;
2064                         BUG_ON(*level > wc->active_node);
2065                         if (*level == wc->active_node)
2066                                 leave_shared_node(root, wc, *level);
2067                         *level = i + 1;
2068                 }
2069         }
2070         return 1;
2071 }
2072
2073 static int check_root_dir(struct inode_record *rec)
2074 {
2075         struct inode_backref *backref;
2076         int ret = -1;
2077
2078         if (!rec->found_inode_item || rec->errors)
2079                 goto out;
2080         if (rec->nlink != 1 || rec->found_link != 0)
2081                 goto out;
2082         if (list_empty(&rec->backrefs))
2083                 goto out;
2084         backref = to_inode_backref(rec->backrefs.next);
2085         if (!backref->found_inode_ref)
2086                 goto out;
2087         if (backref->index != 0 || backref->namelen != 2 ||
2088             memcmp(backref->name, "..", 2))
2089                 goto out;
2090         if (backref->found_dir_index || backref->found_dir_item)
2091                 goto out;
2092         ret = 0;
2093 out:
2094         return ret;
2095 }
2096
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098                               struct btrfs_root *root, struct btrfs_path *path,
2099                               struct inode_record *rec)
2100 {
2101         struct btrfs_inode_item *ei;
2102         struct btrfs_key key;
2103         int ret;
2104
2105         key.objectid = rec->ino;
2106         key.type = BTRFS_INODE_ITEM_KEY;
2107         key.offset = (u64)-1;
2108
2109         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2110         if (ret < 0)
2111                 goto out;
2112         if (ret) {
2113                 if (!path->slots[0]) {
2114                         ret = -ENOENT;
2115                         goto out;
2116                 }
2117                 path->slots[0]--;
2118                 ret = 0;
2119         }
2120         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121         if (key.objectid != rec->ino) {
2122                 ret = -ENOENT;
2123                 goto out;
2124         }
2125
2126         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127                             struct btrfs_inode_item);
2128         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129         btrfs_mark_buffer_dirty(path->nodes[0]);
2130         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132                root->root_key.objectid);
2133 out:
2134         btrfs_release_path(path);
2135         return ret;
2136 }
2137
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139                                     struct btrfs_root *root,
2140                                     struct btrfs_path *path,
2141                                     struct inode_record *rec)
2142 {
2143         int ret;
2144
2145         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146         btrfs_release_path(path);
2147         if (!ret)
2148                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2149         return ret;
2150 }
2151
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153                                struct btrfs_root *root,
2154                                struct btrfs_path *path,
2155                                struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret = 0;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = 0;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret) {
2167                 if (ret > 0)
2168                         ret = -ENOENT;
2169                 goto out;
2170         }
2171
2172         /* Since ret == 0, no need to check anything */
2173         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174                             struct btrfs_inode_item);
2175         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176         btrfs_mark_buffer_dirty(path->nodes[0]);
2177         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178         printf("reset nbytes for ino %llu root %llu\n",
2179                rec->ino, root->root_key.objectid);
2180 out:
2181         btrfs_release_path(path);
2182         return ret;
2183 }
2184
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186                                  struct cache_tree *inode_cache,
2187                                  struct inode_record *rec,
2188                                  struct inode_backref *backref)
2189 {
2190         struct btrfs_path path;
2191         struct btrfs_trans_handle *trans;
2192         struct btrfs_dir_item *dir_item;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_disk_key disk_key;
2196         struct inode_record *dir_rec;
2197         unsigned long name_ptr;
2198         u32 data_size = sizeof(*dir_item) + backref->namelen;
2199         int ret;
2200
2201         trans = btrfs_start_transaction(root, 1);
2202         if (IS_ERR(trans))
2203                 return PTR_ERR(trans);
2204
2205         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206                 (unsigned long long)rec->ino);
2207
2208         btrfs_init_path(&path);
2209         key.objectid = backref->dir;
2210         key.type = BTRFS_DIR_INDEX_KEY;
2211         key.offset = backref->index;
2212         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2213         BUG_ON(ret);
2214
2215         leaf = path.nodes[0];
2216         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217
2218         disk_key.objectid = cpu_to_le64(rec->ino);
2219         disk_key.type = BTRFS_INODE_ITEM_KEY;
2220         disk_key.offset = 0;
2221
2222         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224         btrfs_set_dir_data_len(leaf, dir_item, 0);
2225         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226         name_ptr = (unsigned long)(dir_item + 1);
2227         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228         btrfs_mark_buffer_dirty(leaf);
2229         btrfs_release_path(&path);
2230         btrfs_commit_transaction(trans, root);
2231
2232         backref->found_dir_index = 1;
2233         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234         BUG_ON(IS_ERR(dir_rec));
2235         if (!dir_rec)
2236                 return 0;
2237         dir_rec->found_size += backref->namelen;
2238         if (dir_rec->found_size == dir_rec->isize &&
2239             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241         if (dir_rec->found_size != dir_rec->isize)
2242                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2243
2244         return 0;
2245 }
2246
2247 static int delete_dir_index(struct btrfs_root *root,
2248                             struct cache_tree *inode_cache,
2249                             struct inode_record *rec,
2250                             struct inode_backref *backref)
2251 {
2252         struct btrfs_trans_handle *trans;
2253         struct btrfs_dir_item *di;
2254         struct btrfs_path path;
2255         int ret = 0;
2256
2257         trans = btrfs_start_transaction(root, 1);
2258         if (IS_ERR(trans))
2259                 return PTR_ERR(trans);
2260
2261         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262                 (unsigned long long)backref->dir,
2263                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264                 (unsigned long long)root->objectid);
2265
2266         btrfs_init_path(&path);
2267         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268                                     backref->name, backref->namelen,
2269                                     backref->index, -1);
2270         if (IS_ERR(di)) {
2271                 ret = PTR_ERR(di);
2272                 btrfs_release_path(&path);
2273                 btrfs_commit_transaction(trans, root);
2274                 if (ret == -ENOENT)
2275                         return 0;
2276                 return ret;
2277         }
2278
2279         if (!di)
2280                 ret = btrfs_del_item(trans, root, &path);
2281         else
2282                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283         BUG_ON(ret);
2284         btrfs_release_path(&path);
2285         btrfs_commit_transaction(trans, root);
2286         return ret;
2287 }
2288
2289 static int create_inode_item(struct btrfs_root *root,
2290                              struct inode_record *rec,
2291                              struct inode_backref *backref, int root_dir)
2292 {
2293         struct btrfs_trans_handle *trans;
2294         struct btrfs_inode_item inode_item;
2295         time_t now = time(NULL);
2296         int ret;
2297
2298         trans = btrfs_start_transaction(root, 1);
2299         if (IS_ERR(trans)) {
2300                 ret = PTR_ERR(trans);
2301                 return ret;
2302         }
2303
2304         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305                 "be incomplete, please check permissions and content after "
2306                 "the fsck completes.\n", (unsigned long long)root->objectid,
2307                 (unsigned long long)rec->ino);
2308
2309         memset(&inode_item, 0, sizeof(inode_item));
2310         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311         if (root_dir)
2312                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313         else
2314                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316         if (rec->found_dir_item) {
2317                 if (rec->found_file_extent)
2318                         fprintf(stderr, "root %llu inode %llu has both a dir "
2319                                 "item and extents, unsure if it is a dir or a "
2320                                 "regular file so setting it as a directory\n",
2321                                 (unsigned long long)root->objectid,
2322                                 (unsigned long long)rec->ino);
2323                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325         } else if (!rec->found_dir_item) {
2326                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328         }
2329         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337
2338         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339         BUG_ON(ret);
2340         btrfs_commit_transaction(trans, root);
2341         return 0;
2342 }
2343
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345                                  struct inode_record *rec,
2346                                  struct cache_tree *inode_cache,
2347                                  int delete)
2348 {
2349         struct inode_backref *tmp, *backref;
2350         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2351         int ret = 0;
2352         int repaired = 0;
2353
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 if (!delete && rec->ino == root_dirid) {
2356                         if (!rec->found_inode_item) {
2357                                 ret = create_inode_item(root, rec, backref, 1);
2358                                 if (ret)
2359                                         break;
2360                                 repaired++;
2361                         }
2362                 }
2363
2364                 /* Index 0 for root dir's are special, don't mess with it */
2365                 if (rec->ino == root_dirid && backref->index == 0)
2366                         continue;
2367
2368                 if (delete &&
2369                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2370                      (backref->found_dir_index && backref->found_inode_ref &&
2371                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372                         ret = delete_dir_index(root, inode_cache, rec, backref);
2373                         if (ret)
2374                                 break;
2375                         repaired++;
2376                         list_del(&backref->list);
2377                         free(backref);
2378                 }
2379
2380                 if (!delete && !backref->found_dir_index &&
2381                     backref->found_dir_item && backref->found_inode_ref) {
2382                         ret = add_missing_dir_index(root, inode_cache, rec,
2383                                                     backref);
2384                         if (ret)
2385                                 break;
2386                         repaired++;
2387                         if (backref->found_dir_item &&
2388                             backref->found_dir_index &&
2389                             backref->found_dir_index) {
2390                                 if (!backref->errors &&
2391                                     backref->found_inode_ref) {
2392                                         list_del(&backref->list);
2393                                         free(backref);
2394                                 }
2395                         }
2396                 }
2397
2398                 if (!delete && (!backref->found_dir_index &&
2399                                 !backref->found_dir_item &&
2400                                 backref->found_inode_ref)) {
2401                         struct btrfs_trans_handle *trans;
2402                         struct btrfs_key location;
2403
2404                         ret = check_dir_conflict(root, backref->name,
2405                                                  backref->namelen,
2406                                                  backref->dir,
2407                                                  backref->index);
2408                         if (ret) {
2409                                 /*
2410                                  * let nlink fixing routine to handle it,
2411                                  * which can do it better.
2412                                  */
2413                                 ret = 0;
2414                                 break;
2415                         }
2416                         location.objectid = rec->ino;
2417                         location.type = BTRFS_INODE_ITEM_KEY;
2418                         location.offset = 0;
2419
2420                         trans = btrfs_start_transaction(root, 1);
2421                         if (IS_ERR(trans)) {
2422                                 ret = PTR_ERR(trans);
2423                                 break;
2424                         }
2425                         fprintf(stderr, "adding missing dir index/item pair "
2426                                 "for inode %llu\n",
2427                                 (unsigned long long)rec->ino);
2428                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2429                                                     backref->namelen,
2430                                                     backref->dir, &location,
2431                                                     imode_to_type(rec->imode),
2432                                                     backref->index);
2433                         BUG_ON(ret);
2434                         btrfs_commit_transaction(trans, root);
2435                         repaired++;
2436                 }
2437
2438                 if (!delete && (backref->found_inode_ref &&
2439                                 backref->found_dir_index &&
2440                                 backref->found_dir_item &&
2441                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442                                 !rec->found_inode_item)) {
2443                         ret = create_inode_item(root, rec, backref, 0);
2444                         if (ret)
2445                                 break;
2446                         repaired++;
2447                 }
2448
2449         }
2450         return ret ? ret : repaired;
2451 }
2452
2453 /*
2454  * To determine the file type for nlink/inode_item repair
2455  *
2456  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457  * Return -ENOENT if file type is not found.
2458  */
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2460 {
2461         struct inode_backref *backref;
2462
2463         /* For inode item recovered case */
2464         if (rec->found_inode_item) {
2465                 *type = imode_to_type(rec->imode);
2466                 return 0;
2467         }
2468
2469         list_for_each_entry(backref, &rec->backrefs, list) {
2470                 if (backref->found_dir_index || backref->found_dir_item) {
2471                         *type = backref->filetype;
2472                         return 0;
2473                 }
2474         }
2475         return -ENOENT;
2476 }
2477
2478 /*
2479  * To determine the file name for nlink repair
2480  *
2481  * Return 0 if file name is found, set name and namelen.
2482  * Return -ENOENT if file name is not found.
2483  */
2484 static int find_file_name(struct inode_record *rec,
2485                           char *name, int *namelen)
2486 {
2487         struct inode_backref *backref;
2488
2489         list_for_each_entry(backref, &rec->backrefs, list) {
2490                 if (backref->found_dir_index || backref->found_dir_item ||
2491                     backref->found_inode_ref) {
2492                         memcpy(name, backref->name, backref->namelen);
2493                         *namelen = backref->namelen;
2494                         return 0;
2495                 }
2496         }
2497         return -ENOENT;
2498 }
2499
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502                        struct btrfs_root *root,
2503                        struct btrfs_path *path,
2504                        struct inode_record *rec)
2505 {
2506         struct inode_backref *backref;
2507         struct inode_backref *tmp;
2508         struct btrfs_key key;
2509         struct btrfs_inode_item *inode_item;
2510         int ret = 0;
2511
2512         /* We don't believe this either, reset it and iterate backref */
2513         rec->found_link = 0;
2514
2515         /* Remove all backref including the valid ones */
2516         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518                                    backref->index, backref->name,
2519                                    backref->namelen, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 /* remove invalid backref, so it won't be added back */
2524                 if (!(backref->found_dir_index &&
2525                       backref->found_dir_item &&
2526                       backref->found_inode_ref)) {
2527                         list_del(&backref->list);
2528                         free(backref);
2529                 } else {
2530                         rec->found_link++;
2531                 }
2532         }
2533
2534         /* Set nlink to 0 */
2535         key.objectid = rec->ino;
2536         key.type = BTRFS_INODE_ITEM_KEY;
2537         key.offset = 0;
2538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2539         if (ret < 0)
2540                 goto out;
2541         if (ret > 0) {
2542                 ret = -ENOENT;
2543                 goto out;
2544         }
2545         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546                                     struct btrfs_inode_item);
2547         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_release_path(path);
2550
2551         /*
2552          * Add back valid inode_ref/dir_item/dir_index,
2553          * add_link() will handle the nlink inc, so new nlink must be correct
2554          */
2555         list_for_each_entry(backref, &rec->backrefs, list) {
2556                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557                                      backref->name, backref->namelen,
2558                                      backref->filetype, &backref->index, 1);
2559                 if (ret < 0)
2560                         goto out;
2561         }
2562 out:
2563         btrfs_release_path(path);
2564         return ret;
2565 }
2566
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568                                struct btrfs_root *root,
2569                                struct btrfs_path *path,
2570                                struct inode_record *rec)
2571 {
2572         char *dir_name = "lost+found";
2573         char namebuf[BTRFS_NAME_LEN] = {0};
2574         u64 lost_found_ino;
2575         u32 mode = 0700;
2576         u8 type = 0;
2577         int namelen = 0;
2578         int name_recovered = 0;
2579         int type_recovered = 0;
2580         int ret = 0;
2581
2582         /*
2583          * Get file name and type first before these invalid inode ref
2584          * are deleted by remove_all_invalid_backref()
2585          */
2586         name_recovered = !find_file_name(rec, namebuf, &namelen);
2587         type_recovered = !find_file_type(rec, &type);
2588
2589         if (!name_recovered) {
2590                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591                        rec->ino, rec->ino);
2592                 namelen = count_digits(rec->ino);
2593                 sprintf(namebuf, "%llu", rec->ino);
2594                 name_recovered = 1;
2595         }
2596         if (!type_recovered) {
2597                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598                        rec->ino);
2599                 type = BTRFS_FT_REG_FILE;
2600                 type_recovered = 1;
2601         }
2602
2603         ret = reset_nlink(trans, root, path, rec);
2604         if (ret < 0) {
2605                 fprintf(stderr,
2606                         "Failed to reset nlink for inode %llu: %s\n",
2607                         rec->ino, strerror(-ret));
2608                 goto out;
2609         }
2610
2611         if (rec->found_link == 0) {
2612                 lost_found_ino = root->highest_inode;
2613                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2614                         ret = -EOVERFLOW;
2615                         goto out;
2616                 }
2617                 lost_found_ino++;
2618                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620                                   mode);
2621                 if (ret < 0) {
2622                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623                                 dir_name, strerror(-ret));
2624                         goto out;
2625                 }
2626                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627                                      namebuf, namelen, type, NULL, 1);
2628                 /*
2629                  * Add ".INO" suffix several times to handle case where
2630                  * "FILENAME.INO" is already taken by another file.
2631                  */
2632                 while (ret == -EEXIST) {
2633                         /*
2634                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635                          */
2636                         if (namelen + count_digits(rec->ino) + 1 >
2637                             BTRFS_NAME_LEN) {
2638                                 ret = -EFBIG;
2639                                 goto out;
2640                         }
2641                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642                                  ".%llu", rec->ino);
2643                         namelen += count_digits(rec->ino) + 1;
2644                         ret = btrfs_add_link(trans, root, rec->ino,
2645                                              lost_found_ino, namebuf,
2646                                              namelen, type, NULL, 1);
2647                 }
2648                 if (ret < 0) {
2649                         fprintf(stderr,
2650                                 "Failed to link the inode %llu to %s dir: %s\n",
2651                                 rec->ino, dir_name, strerror(-ret));
2652                         goto out;
2653                 }
2654                 /*
2655                  * Just increase the found_link, don't actually add the
2656                  * backref. This will make things easier and this inode
2657                  * record will be freed after the repair is done.
2658                  * So fsck will not report problem about this inode.
2659                  */
2660                 rec->found_link++;
2661                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662                        namelen, namebuf, dir_name);
2663         }
2664         printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 out:
2666         /*
2667          * Clear the flag anyway, or we will loop forever for the same inode
2668          * as it will not be removed from the bad inode list and the dead loop
2669          * happens.
2670          */
2671         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672         btrfs_release_path(path);
2673         return ret;
2674 }
2675
2676 /*
2677  * Check if there is any normal(reg or prealloc) file extent for given
2678  * ino.
2679  * This is used to determine the file type when neither its dir_index/item or
2680  * inode_item exists.
2681  *
2682  * This will *NOT* report error, if any error happens, just consider it does
2683  * not have any normal file extent.
2684  */
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_key key;
2689         struct btrfs_key found_key;
2690         struct btrfs_file_extent_item *fi;
2691         u8 type;
2692         int ret = 0;
2693
2694         btrfs_init_path(&path);
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705                 ret = btrfs_next_leaf(root, &path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713                                       path.slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path.nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_release_path(&path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         /*
2930          * For nlink repair, it may create a dir and add link, so
2931          * 2 for parent(256)'s dir_index and dir_item
2932          * 2 for lost+found dir's inode_item and inode_ref
2933          * 1 for the new inode_ref of the file
2934          * 2 for lost+found dir's dir_index and dir_item for the file
2935          */
2936         trans = btrfs_start_transaction(root, 7);
2937         if (IS_ERR(trans))
2938                 return PTR_ERR(trans);
2939
2940         btrfs_init_path(&path);
2941         if (rec->errors & I_ERR_NO_INODE_ITEM)
2942                 ret = repair_inode_no_item(trans, root, &path, rec);
2943         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948                 ret = repair_inode_isize(trans, root, &path, rec);
2949         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952                 ret = repair_inode_nlinks(trans, root, &path, rec);
2953         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954                 ret = repair_inode_nbytes(trans, root, &path, rec);
2955         btrfs_commit_transaction(trans, root);
2956         btrfs_release_path(&path);
2957         return ret;
2958 }
2959
2960 static int check_inode_recs(struct btrfs_root *root,
2961                             struct cache_tree *inode_cache)
2962 {
2963         struct cache_extent *cache;
2964         struct ptr_node *node;
2965         struct inode_record *rec;
2966         struct inode_backref *backref;
2967         int stage = 0;
2968         int ret = 0;
2969         int err = 0;
2970         u64 error = 0;
2971         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972
2973         if (btrfs_root_refs(&root->root_item) == 0) {
2974                 if (!cache_tree_empty(inode_cache))
2975                         fprintf(stderr, "warning line %d\n", __LINE__);
2976                 return 0;
2977         }
2978
2979         /*
2980          * We need to record the highest inode number for later 'lost+found'
2981          * dir creation.
2982          * We must select an ino not used/referred by any existing inode, or
2983          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984          * this may cause 'lost+found' dir has wrong nlinks.
2985          */
2986         cache = last_cache_extent(inode_cache);
2987         if (cache) {
2988                 node = container_of(cache, struct ptr_node, cache);
2989                 rec = node->data;
2990                 if (rec->ino > root->highest_inode)
2991                         root->highest_inode = rec->ino;
2992         }
2993
2994         /*
2995          * We need to repair backrefs first because we could change some of the
2996          * errors in the inode recs.
2997          *
2998          * We also need to go through and delete invalid backrefs first and then
2999          * add the correct ones second.  We do this because we may get EEXIST
3000          * when adding back the correct index because we hadn't yet deleted the
3001          * invalid index.
3002          *
3003          * For example, if we were missing a dir index then the directories
3004          * isize would be wrong, so if we fixed the isize to what we thought it
3005          * would be and then fixed the backref we'd still have a invalid fs, so
3006          * we need to add back the dir index and then check to see if the isize
3007          * is still wrong.
3008          */
3009         while (stage < 3) {
3010                 stage++;
3011                 if (stage == 3 && !err)
3012                         break;
3013
3014                 cache = search_cache_extent(inode_cache, 0);
3015                 while (repair && cache) {
3016                         node = container_of(cache, struct ptr_node, cache);
3017                         rec = node->data;
3018                         cache = next_cache_extent(cache);
3019
3020                         /* Need to free everything up and rescan */
3021                         if (stage == 3) {
3022                                 remove_cache_extent(inode_cache, &node->cache);
3023                                 free(node);
3024                                 free_inode_rec(rec);
3025                                 continue;
3026                         }
3027
3028                         if (list_empty(&rec->backrefs))
3029                                 continue;
3030
3031                         ret = repair_inode_backrefs(root, rec, inode_cache,
3032                                                     stage == 1);
3033                         if (ret < 0) {
3034                                 err = ret;
3035                                 stage = 2;
3036                                 break;
3037                         } if (ret > 0) {
3038                                 err = -EAGAIN;
3039                         }
3040                 }
3041         }
3042         if (err)
3043                 return err;
3044
3045         rec = get_inode_rec(inode_cache, root_dirid, 0);
3046         BUG_ON(IS_ERR(rec));
3047         if (rec) {
3048                 ret = check_root_dir(rec);
3049                 if (ret) {
3050                         fprintf(stderr, "root %llu root dir %llu error\n",
3051                                 (unsigned long long)root->root_key.objectid,
3052                                 (unsigned long long)root_dirid);
3053                         print_inode_error(root, rec);
3054                         error++;
3055                 }
3056         } else {
3057                 if (repair) {
3058                         struct btrfs_trans_handle *trans;
3059
3060                         trans = btrfs_start_transaction(root, 1);
3061                         if (IS_ERR(trans)) {
3062                                 err = PTR_ERR(trans);
3063                                 return err;
3064                         }
3065
3066                         fprintf(stderr,
3067                                 "root %llu missing its root dir, recreating\n",
3068                                 (unsigned long long)root->objectid);
3069
3070                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3071                         BUG_ON(ret);
3072
3073                         btrfs_commit_transaction(trans, root);
3074                         return -EAGAIN;
3075                 }
3076
3077                 fprintf(stderr, "root %llu root dir %llu not found\n",
3078                         (unsigned long long)root->root_key.objectid,
3079                         (unsigned long long)root_dirid);
3080         }
3081
3082         while (1) {
3083                 cache = search_cache_extent(inode_cache, 0);
3084                 if (!cache)
3085                         break;
3086                 node = container_of(cache, struct ptr_node, cache);
3087                 rec = node->data;
3088                 remove_cache_extent(inode_cache, &node->cache);
3089                 free(node);
3090                 if (rec->ino == root_dirid ||
3091                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092                         free_inode_rec(rec);
3093                         continue;
3094                 }
3095
3096                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097                         ret = check_orphan_item(root, rec->ino);
3098                         if (ret == 0)
3099                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100                         if (can_free_inode_rec(rec)) {
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104                 }
3105
3106                 if (!rec->found_inode_item)
3107                         rec->errors |= I_ERR_NO_INODE_ITEM;
3108                 if (rec->found_link != rec->nlink)
3109                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110                 if (repair) {
3111                         ret = try_repair_inode(root, rec);
3112                         if (ret == 0 && can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                         ret = 0;
3117                 }
3118
3119                 if (!(repair && ret == 0))
3120                         error++;
3121                 print_inode_error(root, rec);
3122                 list_for_each_entry(backref, &rec->backrefs, list) {
3123                         if (!backref->found_dir_item)
3124                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125                         if (!backref->found_dir_index)
3126                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127                         if (!backref->found_inode_ref)
3128                                 backref->errors |= REF_ERR_NO_INODE_REF;
3129                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130                                 " namelen %u name %s filetype %d errors %x",
3131                                 (unsigned long long)backref->dir,
3132                                 (unsigned long long)backref->index,
3133                                 backref->namelen, backref->name,
3134                                 backref->filetype, backref->errors);
3135                         print_ref_error(backref->errors);
3136                 }
3137                 free_inode_rec(rec);
3138         }
3139         return (error > 0) ? -1 : 0;
3140 }
3141
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3143                                         u64 objectid)
3144 {
3145         struct cache_extent *cache;
3146         struct root_record *rec = NULL;
3147         int ret;
3148
3149         cache = lookup_cache_extent(root_cache, objectid, 1);
3150         if (cache) {
3151                 rec = container_of(cache, struct root_record, cache);
3152         } else {
3153                 rec = calloc(1, sizeof(*rec));
3154                 if (!rec)
3155                         return ERR_PTR(-ENOMEM);
3156                 rec->objectid = objectid;
3157                 INIT_LIST_HEAD(&rec->backrefs);
3158                 rec->cache.start = objectid;
3159                 rec->cache.size = 1;
3160
3161                 ret = insert_cache_extent(root_cache, &rec->cache);
3162                 if (ret)
3163                         return ERR_PTR(-EEXIST);
3164         }
3165         return rec;
3166 }
3167
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169                                              u64 ref_root, u64 dir, u64 index,
3170                                              const char *name, int namelen)
3171 {
3172         struct root_backref *backref;
3173
3174         list_for_each_entry(backref, &rec->backrefs, list) {
3175                 if (backref->ref_root != ref_root || backref->dir != dir ||
3176                     backref->namelen != namelen)
3177                         continue;
3178                 if (memcmp(name, backref->name, namelen))
3179                         continue;
3180                 return backref;
3181         }
3182
3183         backref = calloc(1, sizeof(*backref) + namelen + 1);
3184         if (!backref)
3185                 return NULL;
3186         backref->ref_root = ref_root;
3187         backref->dir = dir;
3188         backref->index = index;
3189         backref->namelen = namelen;
3190         memcpy(backref->name, name, namelen);
3191         backref->name[namelen] = '\0';
3192         list_add_tail(&backref->list, &rec->backrefs);
3193         return backref;
3194 }
3195
3196 static void free_root_record(struct cache_extent *cache)
3197 {
3198         struct root_record *rec;
3199         struct root_backref *backref;
3200
3201         rec = container_of(cache, struct root_record, cache);
3202         while (!list_empty(&rec->backrefs)) {
3203                 backref = to_root_backref(rec->backrefs.next);
3204                 list_del(&backref->list);
3205                 free(backref);
3206         }
3207
3208         free(rec);
3209 }
3210
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212
3213 static int add_root_backref(struct cache_tree *root_cache,
3214                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3215                             const char *name, int namelen,
3216                             int item_type, int errors)
3217 {
3218         struct root_record *rec;
3219         struct root_backref *backref;
3220
3221         rec = get_root_rec(root_cache, root_id);
3222         BUG_ON(IS_ERR(rec));
3223         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3224         BUG_ON(!backref);
3225
3226         backref->errors |= errors;
3227
3228         if (item_type != BTRFS_DIR_ITEM_KEY) {
3229                 if (backref->found_dir_index || backref->found_back_ref ||
3230                     backref->found_forward_ref) {
3231                         if (backref->index != index)
3232                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233                 } else {
3234                         backref->index = index;
3235                 }
3236         }
3237
3238         if (item_type == BTRFS_DIR_ITEM_KEY) {
3239                 if (backref->found_forward_ref)
3240                         rec->found_ref++;
3241                 backref->found_dir_item = 1;
3242         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243                 backref->found_dir_index = 1;
3244         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245                 if (backref->found_forward_ref)
3246                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3247                 else if (backref->found_dir_item)
3248                         rec->found_ref++;
3249                 backref->found_forward_ref = 1;
3250         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251                 if (backref->found_back_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253                 backref->found_back_ref = 1;
3254         } else {
3255                 BUG_ON(1);
3256         }
3257
3258         if (backref->found_forward_ref && backref->found_dir_item)
3259                 backref->reachable = 1;
3260         return 0;
3261 }
3262
3263 static int merge_root_recs(struct btrfs_root *root,
3264                            struct cache_tree *src_cache,
3265                            struct cache_tree *dst_cache)
3266 {
3267         struct cache_extent *cache;
3268         struct ptr_node *node;
3269         struct inode_record *rec;
3270         struct inode_backref *backref;
3271         int ret = 0;
3272
3273         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274                 free_inode_recs_tree(src_cache);
3275                 return 0;
3276         }
3277
3278         while (1) {
3279                 cache = search_cache_extent(src_cache, 0);
3280                 if (!cache)
3281                         break;
3282                 node = container_of(cache, struct ptr_node, cache);
3283                 rec = node->data;
3284                 remove_cache_extent(src_cache, &node->cache);
3285                 free(node);
3286
3287                 ret = is_child_root(root, root->objectid, rec->ino);
3288                 if (ret < 0)
3289                         break;
3290                 else if (ret == 0)
3291                         goto skip;
3292
3293                 list_for_each_entry(backref, &rec->backrefs, list) {
3294                         BUG_ON(backref->found_inode_ref);
3295                         if (backref->found_dir_item)
3296                                 add_root_backref(dst_cache, rec->ino,
3297                                         root->root_key.objectid, backref->dir,
3298                                         backref->index, backref->name,
3299                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3300                                         backref->errors);
3301                         if (backref->found_dir_index)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3306                                         backref->errors);
3307                 }
3308 skip:
3309                 free_inode_rec(rec);
3310         }
3311         if (ret < 0)
3312                 return ret;
3313         return 0;
3314 }
3315
3316 static int check_root_refs(struct btrfs_root *root,
3317                            struct cache_tree *root_cache)
3318 {
3319         struct root_record *rec;
3320         struct root_record *ref_root;
3321         struct root_backref *backref;
3322         struct cache_extent *cache;
3323         int loop = 1;
3324         int ret;
3325         int error;
3326         int errors = 0;
3327
3328         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329         BUG_ON(IS_ERR(rec));
3330         rec->found_ref = 1;
3331
3332         /* fixme: this can not detect circular references */
3333         while (loop) {
3334                 loop = 0;
3335                 cache = search_cache_extent(root_cache, 0);
3336                 while (1) {
3337                         if (!cache)
3338                                 break;
3339                         rec = container_of(cache, struct root_record, cache);
3340                         cache = next_cache_extent(cache);
3341
3342                         if (rec->found_ref == 0)
3343                                 continue;
3344
3345                         list_for_each_entry(backref, &rec->backrefs, list) {
3346                                 if (!backref->reachable)
3347                                         continue;
3348
3349                                 ref_root = get_root_rec(root_cache,
3350                                                         backref->ref_root);
3351                                 BUG_ON(IS_ERR(ref_root));
3352                                 if (ref_root->found_ref > 0)
3353                                         continue;
3354
3355                                 backref->reachable = 0;
3356                                 rec->found_ref--;
3357                                 if (rec->found_ref == 0)
3358                                         loop = 1;
3359                         }
3360                 }
3361         }
3362
3363         cache = search_cache_extent(root_cache, 0);
3364         while (1) {
3365                 if (!cache)
3366                         break;
3367                 rec = container_of(cache, struct root_record, cache);
3368                 cache = next_cache_extent(cache);
3369
3370                 if (rec->found_ref == 0 &&
3371                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373                         ret = check_orphan_item(root->fs_info->tree_root,
3374                                                 rec->objectid);
3375                         if (ret == 0)
3376                                 continue;
3377
3378                         /*
3379                          * If we don't have a root item then we likely just have
3380                          * a dir item in a snapshot for this root but no actual
3381                          * ref key or anything so it's meaningless.
3382                          */
3383                         if (!rec->found_root_item)
3384                                 continue;
3385                         errors++;
3386                         fprintf(stderr, "fs tree %llu not referenced\n",
3387                                 (unsigned long long)rec->objectid);
3388                 }
3389
3390                 error = 0;
3391                 if (rec->found_ref > 0 && !rec->found_root_item)
3392                         error = 1;
3393                 list_for_each_entry(backref, &rec->backrefs, list) {
3394                         if (!backref->found_dir_item)
3395                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396                         if (!backref->found_dir_index)
3397                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398                         if (!backref->found_back_ref)
3399                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400                         if (!backref->found_forward_ref)
3401                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3402                         if (backref->reachable && backref->errors)
3403                                 error = 1;
3404                 }
3405                 if (!error)
3406                         continue;
3407
3408                 errors++;
3409                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410                         (unsigned long long)rec->objectid, rec->found_ref,
3411                          rec->found_root_item ? "" : "not found");
3412
3413                 list_for_each_entry(backref, &rec->backrefs, list) {
3414                         if (!backref->reachable)
3415                                 continue;
3416                         if (!backref->errors && rec->found_root_item)
3417                                 continue;
3418                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419                                 " index %llu namelen %u name %s errors %x\n",
3420                                 (unsigned long long)backref->ref_root,
3421                                 (unsigned long long)backref->dir,
3422                                 (unsigned long long)backref->index,
3423                                 backref->namelen, backref->name,
3424                                 backref->errors);
3425                         print_ref_error(backref->errors);
3426                 }
3427         }
3428         return errors > 0 ? 1 : 0;
3429 }
3430
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432                             struct btrfs_key *key,
3433                             struct cache_tree *root_cache)
3434 {
3435         u64 dirid;
3436         u64 index;
3437         u32 len;
3438         u32 name_len;
3439         struct btrfs_root_ref *ref;
3440         char namebuf[BTRFS_NAME_LEN];
3441         int error;
3442
3443         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444
3445         dirid = btrfs_root_ref_dirid(eb, ref);
3446         index = btrfs_root_ref_sequence(eb, ref);
3447         name_len = btrfs_root_ref_name_len(eb, ref);
3448
3449         if (name_len <= BTRFS_NAME_LEN) {
3450                 len = name_len;
3451                 error = 0;
3452         } else {
3453                 len = BTRFS_NAME_LEN;
3454                 error = REF_ERR_NAME_TOO_LONG;
3455         }
3456         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457
3458         if (key->type == BTRFS_ROOT_REF_KEY) {
3459                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460                                  index, namebuf, len, key->type, error);
3461         } else {
3462                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463                                  index, namebuf, len, key->type, error);
3464         }
3465         return 0;
3466 }
3467
3468 static void free_corrupt_block(struct cache_extent *cache)
3469 {
3470         struct btrfs_corrupt_block *corrupt;
3471
3472         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3473         free(corrupt);
3474 }
3475
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3477
3478 /*
3479  * Repair the btree of the given root.
3480  *
3481  * The fix is to remove the node key in corrupt_blocks cache_tree.
3482  * and rebalance the tree.
3483  * After the fix, the btree should be writeable.
3484  */
3485 static int repair_btree(struct btrfs_root *root,
3486                         struct cache_tree *corrupt_blocks)
3487 {
3488         struct btrfs_trans_handle *trans;
3489         struct btrfs_path path;
3490         struct btrfs_corrupt_block *corrupt;
3491         struct cache_extent *cache;
3492         struct btrfs_key key;
3493         u64 offset;
3494         int level;
3495         int ret = 0;
3496
3497         if (cache_tree_empty(corrupt_blocks))
3498                 return 0;
3499
3500         trans = btrfs_start_transaction(root, 1);
3501         if (IS_ERR(trans)) {
3502                 ret = PTR_ERR(trans);
3503                 fprintf(stderr, "Error starting transaction: %s\n",
3504                         strerror(-ret));
3505                 return ret;
3506         }
3507         btrfs_init_path(&path);
3508         cache = first_cache_extent(corrupt_blocks);
3509         while (cache) {
3510                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511                                        cache);
3512                 level = corrupt->level;
3513                 path.lowest_level = level;
3514                 key.objectid = corrupt->key.objectid;
3515                 key.type = corrupt->key.type;
3516                 key.offset = corrupt->key.offset;
3517
3518                 /*
3519                  * Here we don't want to do any tree balance, since it may
3520                  * cause a balance with corrupted brother leaf/node,
3521                  * so ins_len set to 0 here.
3522                  * Balance will be done after all corrupt node/leaf is deleted.
3523                  */
3524                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3525                 if (ret < 0)
3526                         goto out;
3527                 offset = btrfs_node_blockptr(path.nodes[level],
3528                                              path.slots[level]);
3529
3530                 /* Remove the ptr */
3531                 ret = btrfs_del_ptr(trans, root, &path, level,
3532                                     path.slots[level]);
3533                 if (ret < 0)
3534                         goto out;
3535                 /*
3536                  * Remove the corresponding extent
3537                  * return value is not concerned.
3538                  */
3539                 btrfs_release_path(&path);
3540                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541                                         0, root->root_key.objectid,
3542                                         level - 1, 0);
3543                 cache = next_cache_extent(cache);
3544         }
3545
3546         /* Balance the btree using btrfs_search_slot() */
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 memcpy(&key, &corrupt->key, sizeof(key));
3552                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3553                 if (ret < 0)
3554                         goto out;
3555                 /* return will always >0 since it won't find the item */
3556                 ret = 0;
3557                 btrfs_release_path(&path);
3558                 cache = next_cache_extent(cache);
3559         }
3560 out:
3561         btrfs_commit_transaction(trans, root);
3562         btrfs_release_path(&path);
3563         return ret;
3564 }
3565
3566 static int check_fs_root(struct btrfs_root *root,
3567                          struct cache_tree *root_cache,
3568                          struct walk_control *wc)
3569 {
3570         int ret = 0;
3571         int err = 0;
3572         int wret;
3573         int level;
3574         struct btrfs_path path;
3575         struct shared_node root_node;
3576         struct root_record *rec;
3577         struct btrfs_root_item *root_item = &root->root_item;
3578         struct cache_tree corrupt_blocks;
3579         struct orphan_data_extent *orphan;
3580         struct orphan_data_extent *tmp;
3581         enum btrfs_tree_block_status status;
3582         struct node_refs nrefs;
3583
3584         /*
3585          * Reuse the corrupt_block cache tree to record corrupted tree block
3586          *
3587          * Unlike the usage in extent tree check, here we do it in a per
3588          * fs/subvol tree base.
3589          */
3590         cache_tree_init(&corrupt_blocks);
3591         root->fs_info->corrupt_blocks = &corrupt_blocks;
3592
3593         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594                 rec = get_root_rec(root_cache, root->root_key.objectid);
3595                 BUG_ON(IS_ERR(rec));
3596                 if (btrfs_root_refs(root_item) > 0)
3597                         rec->found_root_item = 1;
3598         }
3599
3600         btrfs_init_path(&path);
3601         memset(&root_node, 0, sizeof(root_node));
3602         cache_tree_init(&root_node.root_cache);
3603         cache_tree_init(&root_node.inode_cache);
3604         memset(&nrefs, 0, sizeof(nrefs));
3605
3606         /* Move the orphan extent record to corresponding inode_record */
3607         list_for_each_entry_safe(orphan, tmp,
3608                                  &root->orphan_data_extents, list) {
3609                 struct inode_record *inode;
3610
3611                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612                                       1);
3613                 BUG_ON(IS_ERR(inode));
3614                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615                 list_move(&orphan->list, &inode->orphan_extents);
3616         }
3617
3618         level = btrfs_header_level(root->node);
3619         memset(wc->nodes, 0, sizeof(wc->nodes));
3620         wc->nodes[level] = &root_node;
3621         wc->active_node = level;
3622         wc->root_level = level;
3623
3624         /* We may not have checked the root block, lets do that now */
3625         if (btrfs_is_leaf(root->node))
3626                 status = btrfs_check_leaf(root, NULL, root->node);
3627         else
3628                 status = btrfs_check_node(root, NULL, root->node);
3629         if (status != BTRFS_TREE_BLOCK_CLEAN)
3630                 return -EIO;
3631
3632         if (btrfs_root_refs(root_item) > 0 ||
3633             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634                 path.nodes[level] = root->node;
3635                 extent_buffer_get(root->node);
3636                 path.slots[level] = 0;
3637         } else {
3638                 struct btrfs_key key;
3639                 struct btrfs_disk_key found_key;
3640
3641                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642                 level = root_item->drop_level;
3643                 path.lowest_level = level;
3644                 if (level > btrfs_header_level(root->node) ||
3645                     level >= BTRFS_MAX_LEVEL) {
3646                         error("ignoring invalid drop level: %u", level);
3647                         goto skip_walking;
3648                 }
3649                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3650                 if (wret < 0)
3651                         goto skip_walking;
3652                 btrfs_node_key(path.nodes[level], &found_key,
3653                                 path.slots[level]);
3654                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655                                         sizeof(found_key)));
3656         }
3657
3658         while (1) {
3659                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3660                 if (wret < 0)
3661                         ret = wret;
3662                 if (wret != 0)
3663                         break;
3664
3665                 wret = walk_up_tree(root, &path, wc, &level);
3666                 if (wret < 0)
3667                         ret = wret;
3668                 if (wret != 0)
3669                         break;
3670         }
3671 skip_walking:
3672         btrfs_release_path(&path);
3673
3674         if (!cache_tree_empty(&corrupt_blocks)) {
3675                 struct cache_extent *cache;
3676                 struct btrfs_corrupt_block *corrupt;
3677
3678                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679                        root->root_key.objectid);
3680                 cache = first_cache_extent(&corrupt_blocks);
3681                 while (cache) {
3682                         corrupt = container_of(cache,
3683                                                struct btrfs_corrupt_block,
3684                                                cache);
3685                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686                                cache->start, corrupt->level,
3687                                corrupt->key.objectid, corrupt->key.type,
3688                                corrupt->key.offset);
3689                         cache = next_cache_extent(cache);
3690                 }
3691                 if (repair) {
3692                         printf("Try to repair the btree for root %llu\n",
3693                                root->root_key.objectid);
3694                         ret = repair_btree(root, &corrupt_blocks);
3695                         if (ret < 0)
3696                                 fprintf(stderr, "Failed to repair btree: %s\n",
3697                                         strerror(-ret));
3698                         if (!ret)
3699                                 printf("Btree for root %llu is fixed\n",
3700                                        root->root_key.objectid);
3701                 }
3702         }
3703
3704         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3705         if (err < 0)
3706                 ret = err;
3707
3708         if (root_node.current) {
3709                 root_node.current->checked = 1;
3710                 maybe_free_inode_rec(&root_node.inode_cache,
3711                                 root_node.current);
3712         }
3713
3714         err = check_inode_recs(root, &root_node.inode_cache);
3715         if (!ret)
3716                 ret = err;
3717
3718         free_corrupt_blocks_tree(&corrupt_blocks);
3719         root->fs_info->corrupt_blocks = NULL;
3720         free_orphan_data_extents(&root->orphan_data_extents);
3721         return ret;
3722 }
3723
3724 static int fs_root_objectid(u64 objectid)
3725 {
3726         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728                 return 1;
3729         return is_fstree(objectid);
3730 }
3731
3732 static int check_fs_roots(struct btrfs_root *root,
3733                           struct cache_tree *root_cache)
3734 {
3735         struct btrfs_path path;
3736         struct btrfs_key key;
3737         struct walk_control wc;
3738         struct extent_buffer *leaf, *tree_node;
3739         struct btrfs_root *tmp_root;
3740         struct btrfs_root *tree_root = root->fs_info->tree_root;
3741         int ret;
3742         int err = 0;
3743
3744         if (ctx.progress_enabled) {
3745                 ctx.tp = TASK_FS_ROOTS;
3746                 task_start(ctx.info);
3747         }
3748
3749         /*
3750          * Just in case we made any changes to the extent tree that weren't
3751          * reflected into the free space cache yet.
3752          */
3753         if (repair)
3754                 reset_cached_block_groups(root->fs_info);
3755         memset(&wc, 0, sizeof(wc));
3756         cache_tree_init(&wc.shared);
3757         btrfs_init_path(&path);
3758
3759 again:
3760         key.offset = 0;
3761         key.objectid = 0;
3762         key.type = BTRFS_ROOT_ITEM_KEY;
3763         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3764         if (ret < 0) {
3765                 err = 1;
3766                 goto out;
3767         }
3768         tree_node = tree_root->node;
3769         while (1) {
3770                 if (tree_node != tree_root->node) {
3771                         free_root_recs_tree(root_cache);
3772                         btrfs_release_path(&path);
3773                         goto again;
3774                 }
3775                 leaf = path.nodes[0];
3776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777                         ret = btrfs_next_leaf(tree_root, &path);
3778                         if (ret) {
3779                                 if (ret < 0)
3780                                         err = 1;
3781                                 break;
3782                         }
3783                         leaf = path.nodes[0];
3784                 }
3785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787                     fs_root_objectid(key.objectid)) {
3788                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789                                 tmp_root = btrfs_read_fs_root_no_cache(
3790                                                 root->fs_info, &key);
3791                         } else {
3792                                 key.offset = (u64)-1;
3793                                 tmp_root = btrfs_read_fs_root(
3794                                                 root->fs_info, &key);
3795                         }
3796                         if (IS_ERR(tmp_root)) {
3797                                 err = 1;
3798                                 goto next;
3799                         }
3800                         ret = check_fs_root(tmp_root, root_cache, &wc);
3801                         if (ret == -EAGAIN) {
3802                                 free_root_recs_tree(root_cache);
3803                                 btrfs_release_path(&path);
3804                                 goto again;
3805                         }
3806                         if (ret)
3807                                 err = 1;
3808                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809                                 btrfs_free_fs_root(tmp_root);
3810                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3812                         process_root_ref(leaf, path.slots[0], &key,
3813                                          root_cache);
3814                 }
3815 next:
3816                 path.slots[0]++;
3817         }
3818 out:
3819         btrfs_release_path(&path);
3820         if (err)
3821                 free_extent_cache_tree(&wc.shared);
3822         if (!cache_tree_empty(&wc.shared))
3823                 fprintf(stderr, "warning line %d\n", __LINE__);
3824
3825         task_stop(ctx.info);
3826
3827         return err;
3828 }
3829
3830 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3833
3834 /*
3835  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3836  * INODE_REF/INODE_EXTREF match.
3837  *
3838  * @root:       the root of the fs/file tree
3839  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3840  * @key:        the key of the DIR_ITEM/DIR_INDEX
3841  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3842  *              distinguish root_dir between normal dir/file
3843  * @name:       the name in the INODE_REF/INODE_EXTREF
3844  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3845  * @mode:       the st_mode of INODE_ITEM
3846  *
3847  * Return 0 if no error occurred.
3848  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3849  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3850  * dir/file.
3851  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3852  * not match for normal dir/file.
3853  */
3854 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3855                          struct btrfs_key *key, u64 index, char *name,
3856                          u32 namelen, u32 mode)
3857 {
3858         struct btrfs_path path;
3859         struct extent_buffer *node;
3860         struct btrfs_dir_item *di;
3861         struct btrfs_key location;
3862         char namebuf[BTRFS_NAME_LEN] = {0};
3863         u32 total;
3864         u32 cur = 0;
3865         u32 len;
3866         u32 name_len;
3867         u32 data_len;
3868         u8 filetype;
3869         int slot;
3870         int ret;
3871
3872         btrfs_init_path(&path);
3873         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3874         if (ret < 0) {
3875                 ret = DIR_ITEM_MISSING;
3876                 goto out;
3877         }
3878
3879         /* Process root dir and goto out*/
3880         if (index == 0) {
3881                 if (ret == 0) {
3882                         ret = ROOT_DIR_ERROR;
3883                         error(
3884                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3885                                 root->objectid,
3886                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3887                                         "REF" : "EXTREF",
3888                                 ref_key->objectid, ref_key->offset,
3889                                 key->type == BTRFS_DIR_ITEM_KEY ?
3890                                         "DIR_ITEM" : "DIR_INDEX");
3891                 } else {
3892                         ret = 0;
3893                 }
3894
3895                 goto out;
3896         }
3897
3898         /* Process normal file/dir */
3899         if (ret > 0) {
3900                 ret = DIR_ITEM_MISSING;
3901                 error(
3902                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3903                         root->objectid,
3904                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3905                         ref_key->objectid, ref_key->offset,
3906                         key->type == BTRFS_DIR_ITEM_KEY ?
3907                                 "DIR_ITEM" : "DIR_INDEX",
3908                         key->objectid, key->offset, namelen, name,
3909                         imode_to_type(mode));
3910                 goto out;
3911         }
3912
3913         /* Check whether inode_id/filetype/name match */
3914         node = path.nodes[0];
3915         slot = path.slots[0];
3916         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3917         total = btrfs_item_size_nr(node, slot);
3918         while (cur < total) {
3919                 ret = DIR_ITEM_MISMATCH;
3920                 name_len = btrfs_dir_name_len(node, di);
3921                 data_len = btrfs_dir_data_len(node, di);
3922
3923                 btrfs_dir_item_key_to_cpu(node, di, &location);
3924                 if (location.objectid != ref_key->objectid ||
3925                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3926                     location.offset != 0)
3927                         goto next;
3928
3929                 filetype = btrfs_dir_type(node, di);
3930                 if (imode_to_type(mode) != filetype)
3931                         goto next;
3932
3933                 if (name_len <= BTRFS_NAME_LEN) {
3934                         len = name_len;
3935                 } else {
3936                         len = BTRFS_NAME_LEN;
3937                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3938                         root->objectid,
3939                         key->type == BTRFS_DIR_ITEM_KEY ?
3940                         "DIR_ITEM" : "DIR_INDEX",
3941                         key->objectid, key->offset, name_len);
3942                 }
3943                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3944                 if (len != namelen || strncmp(namebuf, name, len))
3945                         goto next;
3946
3947                 ret = 0;
3948                 goto out;
3949 next:
3950                 len = sizeof(*di) + name_len + data_len;
3951                 di = (struct btrfs_dir_item *)((char *)di + len);
3952                 cur += len;
3953         }
3954         if (ret == DIR_ITEM_MISMATCH)
3955                 error(
3956                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3957                         root->objectid,
3958                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3959                         ref_key->objectid, ref_key->offset,
3960                         key->type == BTRFS_DIR_ITEM_KEY ?
3961                                 "DIR_ITEM" : "DIR_INDEX",
3962                         key->objectid, key->offset, namelen, name,
3963                         imode_to_type(mode));
3964 out:
3965         btrfs_release_path(&path);
3966         return ret;
3967 }
3968
3969 /*
3970  * Traverse the given INODE_REF and call find_dir_item() to find related
3971  * DIR_ITEM/DIR_INDEX.
3972  *
3973  * @root:       the root of the fs/file tree
3974  * @ref_key:    the key of the INODE_REF
3975  * @refs:       the count of INODE_REF
3976  * @mode:       the st_mode of INODE_ITEM
3977  *
3978  * Return 0 if no error occurred.
3979  */
3980 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3981                            struct extent_buffer *node, int slot, u64 *refs,
3982                            int mode)
3983 {
3984         struct btrfs_key key;
3985         struct btrfs_inode_ref *ref;
3986         char namebuf[BTRFS_NAME_LEN] = {0};
3987         u32 total;
3988         u32 cur = 0;
3989         u32 len;
3990         u32 name_len;
3991         u64 index;
3992         int ret, err = 0;
3993
3994         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
3995         total = btrfs_item_size_nr(node, slot);
3996
3997 next:
3998         /* Update inode ref count */
3999         (*refs)++;
4000
4001         index = btrfs_inode_ref_index(node, ref);
4002         name_len = btrfs_inode_ref_name_len(node, ref);
4003         if (name_len <= BTRFS_NAME_LEN) {
4004                 len = name_len;
4005         } else {
4006                 len = BTRFS_NAME_LEN;
4007                 warning("root %llu INODE_REF[%llu %llu] name too long",
4008                         root->objectid, ref_key->objectid, ref_key->offset);
4009         }
4010
4011         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4012
4013         /* Check root dir ref name */
4014         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4015                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4016                       root->objectid, ref_key->objectid, ref_key->offset,
4017                       namebuf);
4018                 err |= ROOT_DIR_ERROR;
4019         }
4020
4021         /* Find related DIR_INDEX */
4022         key.objectid = ref_key->offset;
4023         key.type = BTRFS_DIR_INDEX_KEY;
4024         key.offset = index;
4025         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4026         err |= ret;
4027
4028         /* Find related dir_item */
4029         key.objectid = ref_key->offset;
4030         key.type = BTRFS_DIR_ITEM_KEY;
4031         key.offset = btrfs_name_hash(namebuf, len);
4032         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4033         err |= ret;
4034
4035         len = sizeof(*ref) + name_len;
4036         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4037         cur += len;
4038         if (cur < total)
4039                 goto next;
4040
4041         return err;
4042 }
4043
4044 /*
4045  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4046  * DIR_ITEM/DIR_INDEX.
4047  *
4048  * @root:       the root of the fs/file tree
4049  * @ref_key:    the key of the INODE_EXTREF
4050  * @refs:       the count of INODE_EXTREF
4051  * @mode:       the st_mode of INODE_ITEM
4052  *
4053  * Return 0 if no error occurred.
4054  */
4055 static int check_inode_extref(struct btrfs_root *root,
4056                               struct btrfs_key *ref_key,
4057                               struct extent_buffer *node, int slot, u64 *refs,
4058                               int mode)
4059 {
4060         struct btrfs_key key;
4061         struct btrfs_inode_extref *extref;
4062         char namebuf[BTRFS_NAME_LEN] = {0};
4063         u32 total;
4064         u32 cur = 0;
4065         u32 len;
4066         u32 name_len;
4067         u64 index;
4068         u64 parent;
4069         int ret;
4070         int err = 0;
4071
4072         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4073         total = btrfs_item_size_nr(node, slot);
4074
4075 next:
4076         /* update inode ref count */
4077         (*refs)++;
4078         name_len = btrfs_inode_extref_name_len(node, extref);
4079         index = btrfs_inode_extref_index(node, extref);
4080         parent = btrfs_inode_extref_parent(node, extref);
4081         if (name_len <= BTRFS_NAME_LEN) {
4082                 len = name_len;
4083         } else {
4084                 len = BTRFS_NAME_LEN;
4085                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4086                         root->objectid, ref_key->objectid, ref_key->offset);
4087         }
4088         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4089
4090         /* Check root dir ref name */
4091         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4092                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4093                       root->objectid, ref_key->objectid, ref_key->offset,
4094                       namebuf);
4095                 err |= ROOT_DIR_ERROR;
4096         }
4097
4098         /* find related dir_index */
4099         key.objectid = parent;
4100         key.type = BTRFS_DIR_INDEX_KEY;
4101         key.offset = index;
4102         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4103         err |= ret;
4104
4105         /* find related dir_item */
4106         key.objectid = parent;
4107         key.type = BTRFS_DIR_ITEM_KEY;
4108         key.offset = btrfs_name_hash(namebuf, len);
4109         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4110         err |= ret;
4111
4112         len = sizeof(*extref) + name_len;
4113         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4114         cur += len;
4115
4116         if (cur < total)
4117                 goto next;
4118
4119         return err;
4120 }
4121
4122 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4123 {
4124         struct list_head *cur = rec->backrefs.next;
4125         struct extent_backref *back;
4126         struct tree_backref *tback;
4127         struct data_backref *dback;
4128         u64 found = 0;
4129         int err = 0;
4130
4131         while(cur != &rec->backrefs) {
4132                 back = to_extent_backref(cur);
4133                 cur = cur->next;
4134                 if (!back->found_extent_tree) {
4135                         err = 1;
4136                         if (!print_errs)
4137                                 goto out;
4138                         if (back->is_data) {
4139                                 dback = to_data_backref(back);
4140                                 fprintf(stderr, "Backref %llu %s %llu"
4141                                         " owner %llu offset %llu num_refs %lu"
4142                                         " not found in extent tree\n",
4143                                         (unsigned long long)rec->start,
4144                                         back->full_backref ?
4145                                         "parent" : "root",
4146                                         back->full_backref ?
4147                                         (unsigned long long)dback->parent:
4148                                         (unsigned long long)dback->root,
4149                                         (unsigned long long)dback->owner,
4150                                         (unsigned long long)dback->offset,
4151                                         (unsigned long)dback->num_refs);
4152                         } else {
4153                                 tback = to_tree_backref(back);
4154                                 fprintf(stderr, "Backref %llu parent %llu"
4155                                         " root %llu not found in extent tree\n",
4156                                         (unsigned long long)rec->start,
4157                                         (unsigned long long)tback->parent,
4158                                         (unsigned long long)tback->root);
4159                         }
4160                 }
4161                 if (!back->is_data && !back->found_ref) {
4162                         err = 1;
4163                         if (!print_errs)
4164                                 goto out;
4165                         tback = to_tree_backref(back);
4166                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4167                                 (unsigned long long)rec->start,
4168                                 back->full_backref ? "parent" : "root",
4169                                 back->full_backref ?
4170                                 (unsigned long long)tback->parent :
4171                                 (unsigned long long)tback->root, back);
4172                 }
4173                 if (back->is_data) {
4174                         dback = to_data_backref(back);
4175                         if (dback->found_ref != dback->num_refs) {
4176                                 err = 1;
4177                                 if (!print_errs)
4178                                         goto out;
4179                                 fprintf(stderr, "Incorrect local backref count"
4180                                         " on %llu %s %llu owner %llu"
4181                                         " offset %llu found %u wanted %u back %p\n",
4182                                         (unsigned long long)rec->start,
4183                                         back->full_backref ?
4184                                         "parent" : "root",
4185                                         back->full_backref ?
4186                                         (unsigned long long)dback->parent:
4187                                         (unsigned long long)dback->root,
4188                                         (unsigned long long)dback->owner,
4189                                         (unsigned long long)dback->offset,
4190                                         dback->found_ref, dback->num_refs, back);
4191                         }
4192                         if (dback->disk_bytenr != rec->start) {
4193                                 err = 1;
4194                                 if (!print_errs)
4195                                         goto out;
4196                                 fprintf(stderr, "Backref disk bytenr does not"
4197                                         " match extent record, bytenr=%llu, "
4198                                         "ref bytenr=%llu\n",
4199                                         (unsigned long long)rec->start,
4200                                         (unsigned long long)dback->disk_bytenr);
4201                         }
4202
4203                         if (dback->bytes != rec->nr) {
4204                                 err = 1;
4205                                 if (!print_errs)
4206                                         goto out;
4207                                 fprintf(stderr, "Backref bytes do not match "
4208                                         "extent backref, bytenr=%llu, ref "
4209                                         "bytes=%llu, backref bytes=%llu\n",
4210                                         (unsigned long long)rec->start,
4211                                         (unsigned long long)rec->nr,
4212                                         (unsigned long long)dback->bytes);
4213                         }
4214                 }
4215                 if (!back->is_data) {
4216                         found += 1;
4217                 } else {
4218                         dback = to_data_backref(back);
4219                         found += dback->found_ref;
4220                 }
4221         }
4222         if (found != rec->refs) {
4223                 err = 1;
4224                 if (!print_errs)
4225                         goto out;
4226                 fprintf(stderr, "Incorrect global backref count "
4227                         "on %llu found %llu wanted %llu\n",
4228                         (unsigned long long)rec->start,
4229                         (unsigned long long)found,
4230                         (unsigned long long)rec->refs);
4231         }
4232 out:
4233         return err;
4234 }
4235
4236 static int free_all_extent_backrefs(struct extent_record *rec)
4237 {
4238         struct extent_backref *back;
4239         struct list_head *cur;
4240         while (!list_empty(&rec->backrefs)) {
4241                 cur = rec->backrefs.next;
4242                 back = to_extent_backref(cur);
4243                 list_del(cur);
4244                 free(back);
4245         }
4246         return 0;
4247 }
4248
4249 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4250                                      struct cache_tree *extent_cache)
4251 {
4252         struct cache_extent *cache;
4253         struct extent_record *rec;
4254
4255         while (1) {
4256                 cache = first_cache_extent(extent_cache);
4257                 if (!cache)
4258                         break;
4259                 rec = container_of(cache, struct extent_record, cache);
4260                 remove_cache_extent(extent_cache, cache);
4261                 free_all_extent_backrefs(rec);
4262                 free(rec);
4263         }
4264 }
4265
4266 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4267                                  struct extent_record *rec)
4268 {
4269         if (rec->content_checked && rec->owner_ref_checked &&
4270             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4271             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4272             !rec->bad_full_backref && !rec->crossing_stripes &&
4273             !rec->wrong_chunk_type) {
4274                 remove_cache_extent(extent_cache, &rec->cache);
4275                 free_all_extent_backrefs(rec);
4276                 list_del_init(&rec->list);
4277                 free(rec);
4278         }
4279         return 0;
4280 }
4281
4282 static int check_owner_ref(struct btrfs_root *root,
4283                             struct extent_record *rec,
4284                             struct extent_buffer *buf)
4285 {
4286         struct extent_backref *node;
4287         struct tree_backref *back;
4288         struct btrfs_root *ref_root;
4289         struct btrfs_key key;
4290         struct btrfs_path path;
4291         struct extent_buffer *parent;
4292         int level;
4293         int found = 0;
4294         int ret;
4295
4296         list_for_each_entry(node, &rec->backrefs, list) {
4297                 if (node->is_data)
4298                         continue;
4299                 if (!node->found_ref)
4300                         continue;
4301                 if (node->full_backref)
4302                         continue;
4303                 back = to_tree_backref(node);
4304                 if (btrfs_header_owner(buf) == back->root)
4305                         return 0;
4306         }
4307         BUG_ON(rec->is_root);
4308
4309         /* try to find the block by search corresponding fs tree */
4310         key.objectid = btrfs_header_owner(buf);
4311         key.type = BTRFS_ROOT_ITEM_KEY;
4312         key.offset = (u64)-1;
4313
4314         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4315         if (IS_ERR(ref_root))
4316                 return 1;
4317
4318         level = btrfs_header_level(buf);
4319         if (level == 0)
4320                 btrfs_item_key_to_cpu(buf, &key, 0);
4321         else
4322                 btrfs_node_key_to_cpu(buf, &key, 0);
4323
4324         btrfs_init_path(&path);
4325         path.lowest_level = level + 1;
4326         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4327         if (ret < 0)
4328                 return 0;
4329
4330         parent = path.nodes[level + 1];
4331         if (parent && buf->start == btrfs_node_blockptr(parent,
4332                                                         path.slots[level + 1]))
4333                 found = 1;
4334
4335         btrfs_release_path(&path);
4336         return found ? 0 : 1;
4337 }
4338
4339 static int is_extent_tree_record(struct extent_record *rec)
4340 {
4341         struct list_head *cur = rec->backrefs.next;
4342         struct extent_backref *node;
4343         struct tree_backref *back;
4344         int is_extent = 0;
4345
4346         while(cur != &rec->backrefs) {
4347                 node = to_extent_backref(cur);
4348                 cur = cur->next;
4349                 if (node->is_data)
4350                         return 0;
4351                 back = to_tree_backref(node);
4352                 if (node->full_backref)
4353                         return 0;
4354                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4355                         is_extent = 1;
4356         }
4357         return is_extent;
4358 }
4359
4360
4361 static int record_bad_block_io(struct btrfs_fs_info *info,
4362                                struct cache_tree *extent_cache,
4363                                u64 start, u64 len)
4364 {
4365         struct extent_record *rec;
4366         struct cache_extent *cache;
4367         struct btrfs_key key;
4368
4369         cache = lookup_cache_extent(extent_cache, start, len);
4370         if (!cache)
4371                 return 0;
4372
4373         rec = container_of(cache, struct extent_record, cache);
4374         if (!is_extent_tree_record(rec))
4375                 return 0;
4376
4377         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4378         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4379 }
4380
4381 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4382                        struct extent_buffer *buf, int slot)
4383 {
4384         if (btrfs_header_level(buf)) {
4385                 struct btrfs_key_ptr ptr1, ptr2;
4386
4387                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4388                                    sizeof(struct btrfs_key_ptr));
4389                 read_extent_buffer(buf, &ptr2,
4390                                    btrfs_node_key_ptr_offset(slot + 1),
4391                                    sizeof(struct btrfs_key_ptr));
4392                 write_extent_buffer(buf, &ptr1,
4393                                     btrfs_node_key_ptr_offset(slot + 1),
4394                                     sizeof(struct btrfs_key_ptr));
4395                 write_extent_buffer(buf, &ptr2,
4396                                     btrfs_node_key_ptr_offset(slot),
4397                                     sizeof(struct btrfs_key_ptr));
4398                 if (slot == 0) {
4399                         struct btrfs_disk_key key;
4400                         btrfs_node_key(buf, &key, 0);
4401                         btrfs_fixup_low_keys(root, path, &key,
4402                                              btrfs_header_level(buf) + 1);
4403                 }
4404         } else {
4405                 struct btrfs_item *item1, *item2;
4406                 struct btrfs_key k1, k2;
4407                 char *item1_data, *item2_data;
4408                 u32 item1_offset, item2_offset, item1_size, item2_size;
4409
4410                 item1 = btrfs_item_nr(slot);
4411                 item2 = btrfs_item_nr(slot + 1);
4412                 btrfs_item_key_to_cpu(buf, &k1, slot);
4413                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4414                 item1_offset = btrfs_item_offset(buf, item1);
4415                 item2_offset = btrfs_item_offset(buf, item2);
4416                 item1_size = btrfs_item_size(buf, item1);
4417                 item2_size = btrfs_item_size(buf, item2);
4418
4419                 item1_data = malloc(item1_size);
4420                 if (!item1_data)
4421                         return -ENOMEM;
4422                 item2_data = malloc(item2_size);
4423                 if (!item2_data) {
4424                         free(item1_data);
4425                         return -ENOMEM;
4426                 }
4427
4428                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4429                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4430
4431                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4432                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4433                 free(item1_data);
4434                 free(item2_data);
4435
4436                 btrfs_set_item_offset(buf, item1, item2_offset);
4437                 btrfs_set_item_offset(buf, item2, item1_offset);
4438                 btrfs_set_item_size(buf, item1, item2_size);
4439                 btrfs_set_item_size(buf, item2, item1_size);
4440
4441                 path->slots[0] = slot;
4442                 btrfs_set_item_key_unsafe(root, path, &k2);
4443                 path->slots[0] = slot + 1;
4444                 btrfs_set_item_key_unsafe(root, path, &k1);
4445         }
4446         return 0;
4447 }
4448
4449 static int fix_key_order(struct btrfs_trans_handle *trans,
4450                          struct btrfs_root *root,
4451                          struct btrfs_path *path)
4452 {
4453         struct extent_buffer *buf;
4454         struct btrfs_key k1, k2;
4455         int i;
4456         int level = path->lowest_level;
4457         int ret = -EIO;
4458
4459         buf = path->nodes[level];
4460         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4461                 if (level) {
4462                         btrfs_node_key_to_cpu(buf, &k1, i);
4463                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4464                 } else {
4465                         btrfs_item_key_to_cpu(buf, &k1, i);
4466                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4467                 }
4468                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4469                         continue;
4470                 ret = swap_values(root, path, buf, i);
4471                 if (ret)
4472                         break;
4473                 btrfs_mark_buffer_dirty(buf);
4474                 i = 0;
4475         }
4476         return ret;
4477 }
4478
4479 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4480                              struct btrfs_root *root,
4481                              struct btrfs_path *path,
4482                              struct extent_buffer *buf, int slot)
4483 {
4484         struct btrfs_key key;
4485         int nritems = btrfs_header_nritems(buf);
4486
4487         btrfs_item_key_to_cpu(buf, &key, slot);
4488
4489         /* These are all the keys we can deal with missing. */
4490         if (key.type != BTRFS_DIR_INDEX_KEY &&
4491             key.type != BTRFS_EXTENT_ITEM_KEY &&
4492             key.type != BTRFS_METADATA_ITEM_KEY &&
4493             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4494             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4495                 return -1;
4496
4497         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4498                (unsigned long long)key.objectid, key.type,
4499                (unsigned long long)key.offset, slot, buf->start);
4500         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4501                               btrfs_item_nr_offset(slot + 1),
4502                               sizeof(struct btrfs_item) *
4503                               (nritems - slot - 1));
4504         btrfs_set_header_nritems(buf, nritems - 1);
4505         if (slot == 0) {
4506                 struct btrfs_disk_key disk_key;
4507
4508                 btrfs_item_key(buf, &disk_key, 0);
4509                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4510         }
4511         btrfs_mark_buffer_dirty(buf);
4512         return 0;
4513 }
4514
4515 static int fix_item_offset(struct btrfs_trans_handle *trans,
4516                            struct btrfs_root *root,
4517                            struct btrfs_path *path)
4518 {
4519         struct extent_buffer *buf;
4520         int i;
4521         int ret = 0;
4522
4523         /* We should only get this for leaves */
4524         BUG_ON(path->lowest_level);
4525         buf = path->nodes[0];
4526 again:
4527         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4528                 unsigned int shift = 0, offset;
4529
4530                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4531                     BTRFS_LEAF_DATA_SIZE(root)) {
4532                         if (btrfs_item_end_nr(buf, i) >
4533                             BTRFS_LEAF_DATA_SIZE(root)) {
4534                                 ret = delete_bogus_item(trans, root, path,
4535                                                         buf, i);
4536                                 if (!ret)
4537                                         goto again;
4538                                 fprintf(stderr, "item is off the end of the "
4539                                         "leaf, can't fix\n");
4540                                 ret = -EIO;
4541                                 break;
4542                         }
4543                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4544                                 btrfs_item_end_nr(buf, i);
4545                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4546                            btrfs_item_offset_nr(buf, i - 1)) {
4547                         if (btrfs_item_end_nr(buf, i) >
4548                             btrfs_item_offset_nr(buf, i - 1)) {
4549                                 ret = delete_bogus_item(trans, root, path,
4550                                                         buf, i);
4551                                 if (!ret)
4552                                         goto again;
4553                                 fprintf(stderr, "items overlap, can't fix\n");
4554                                 ret = -EIO;
4555                                 break;
4556                         }
4557                         shift = btrfs_item_offset_nr(buf, i - 1) -
4558                                 btrfs_item_end_nr(buf, i);
4559                 }
4560                 if (!shift)
4561                         continue;
4562
4563                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4564                        i, shift, (unsigned long long)buf->start);
4565                 offset = btrfs_item_offset_nr(buf, i);
4566                 memmove_extent_buffer(buf,
4567                                       btrfs_leaf_data(buf) + offset + shift,
4568                                       btrfs_leaf_data(buf) + offset,
4569                                       btrfs_item_size_nr(buf, i));
4570                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4571                                       offset + shift);
4572                 btrfs_mark_buffer_dirty(buf);
4573         }
4574
4575         /*
4576          * We may have moved things, in which case we want to exit so we don't
4577          * write those changes out.  Once we have proper abort functionality in
4578          * progs this can be changed to something nicer.
4579          */
4580         BUG_ON(ret);
4581         return ret;
4582 }
4583
4584 /*
4585  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4586  * then just return -EIO.
4587  */
4588 static int try_to_fix_bad_block(struct btrfs_root *root,
4589                                 struct extent_buffer *buf,
4590                                 enum btrfs_tree_block_status status)
4591 {
4592         struct btrfs_trans_handle *trans;
4593         struct ulist *roots;
4594         struct ulist_node *node;
4595         struct btrfs_root *search_root;
4596         struct btrfs_path path;
4597         struct ulist_iterator iter;
4598         struct btrfs_key root_key, key;
4599         int ret;
4600
4601         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4602             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4603                 return -EIO;
4604
4605         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4606         if (ret)
4607                 return -EIO;
4608
4609         btrfs_init_path(&path);
4610         ULIST_ITER_INIT(&iter);
4611         while ((node = ulist_next(roots, &iter))) {
4612                 root_key.objectid = node->val;
4613                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4614                 root_key.offset = (u64)-1;
4615
4616                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4617                 if (IS_ERR(root)) {
4618                         ret = -EIO;
4619                         break;
4620                 }
4621
4622
4623                 trans = btrfs_start_transaction(search_root, 0);
4624                 if (IS_ERR(trans)) {
4625                         ret = PTR_ERR(trans);
4626                         break;
4627                 }
4628
4629                 path.lowest_level = btrfs_header_level(buf);
4630                 path.skip_check_block = 1;
4631                 if (path.lowest_level)
4632                         btrfs_node_key_to_cpu(buf, &key, 0);
4633                 else
4634                         btrfs_item_key_to_cpu(buf, &key, 0);
4635                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4636                 if (ret) {
4637                         ret = -EIO;
4638                         btrfs_commit_transaction(trans, search_root);
4639                         break;
4640                 }
4641                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4642                         ret = fix_key_order(trans, search_root, &path);
4643                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4644                         ret = fix_item_offset(trans, search_root, &path);
4645                 if (ret) {
4646                         btrfs_commit_transaction(trans, search_root);
4647                         break;
4648                 }
4649                 btrfs_release_path(&path);
4650                 btrfs_commit_transaction(trans, search_root);
4651         }
4652         ulist_free(roots);
4653         btrfs_release_path(&path);
4654         return ret;
4655 }
4656
4657 static int check_block(struct btrfs_root *root,
4658                        struct cache_tree *extent_cache,
4659                        struct extent_buffer *buf, u64 flags)
4660 {
4661         struct extent_record *rec;
4662         struct cache_extent *cache;
4663         struct btrfs_key key;
4664         enum btrfs_tree_block_status status;
4665         int ret = 0;
4666         int level;
4667
4668         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4669         if (!cache)
4670                 return 1;
4671         rec = container_of(cache, struct extent_record, cache);
4672         rec->generation = btrfs_header_generation(buf);
4673
4674         level = btrfs_header_level(buf);
4675         if (btrfs_header_nritems(buf) > 0) {
4676
4677                 if (level == 0)
4678                         btrfs_item_key_to_cpu(buf, &key, 0);
4679                 else
4680                         btrfs_node_key_to_cpu(buf, &key, 0);
4681
4682                 rec->info_objectid = key.objectid;
4683         }
4684         rec->info_level = level;
4685
4686         if (btrfs_is_leaf(buf))
4687                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4688         else
4689                 status = btrfs_check_node(root, &rec->parent_key, buf);
4690
4691         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4692                 if (repair)
4693                         status = try_to_fix_bad_block(root, buf, status);
4694                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4695                         ret = -EIO;
4696                         fprintf(stderr, "bad block %llu\n",
4697                                 (unsigned long long)buf->start);
4698                 } else {
4699                         /*
4700                          * Signal to callers we need to start the scan over
4701                          * again since we'll have cowed blocks.
4702                          */
4703                         ret = -EAGAIN;
4704                 }
4705         } else {
4706                 rec->content_checked = 1;
4707                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4708                         rec->owner_ref_checked = 1;
4709                 else {
4710                         ret = check_owner_ref(root, rec, buf);
4711                         if (!ret)
4712                                 rec->owner_ref_checked = 1;
4713                 }
4714         }
4715         if (!ret)
4716                 maybe_free_extent_rec(extent_cache, rec);
4717         return ret;
4718 }
4719
4720 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4721                                                 u64 parent, u64 root)
4722 {
4723         struct list_head *cur = rec->backrefs.next;
4724         struct extent_backref *node;
4725         struct tree_backref *back;
4726
4727         while(cur != &rec->backrefs) {
4728                 node = to_extent_backref(cur);
4729                 cur = cur->next;
4730                 if (node->is_data)
4731                         continue;
4732                 back = to_tree_backref(node);
4733                 if (parent > 0) {
4734                         if (!node->full_backref)
4735                                 continue;
4736                         if (parent == back->parent)
4737                                 return back;
4738                 } else {
4739                         if (node->full_backref)
4740                                 continue;
4741                         if (back->root == root)
4742                                 return back;
4743                 }
4744         }
4745         return NULL;
4746 }
4747
4748 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4749                                                 u64 parent, u64 root)
4750 {
4751         struct tree_backref *ref = malloc(sizeof(*ref));
4752
4753         if (!ref)
4754                 return NULL;
4755         memset(&ref->node, 0, sizeof(ref->node));
4756         if (parent > 0) {
4757                 ref->parent = parent;
4758                 ref->node.full_backref = 1;
4759         } else {
4760                 ref->root = root;
4761                 ref->node.full_backref = 0;
4762         }
4763         list_add_tail(&ref->node.list, &rec->backrefs);
4764
4765         return ref;
4766 }
4767
4768 static struct data_backref *find_data_backref(struct extent_record *rec,
4769                                                 u64 parent, u64 root,
4770                                                 u64 owner, u64 offset,
4771                                                 int found_ref,
4772                                                 u64 disk_bytenr, u64 bytes)
4773 {
4774         struct list_head *cur = rec->backrefs.next;
4775         struct extent_backref *node;
4776         struct data_backref *back;
4777
4778         while(cur != &rec->backrefs) {
4779                 node = to_extent_backref(cur);
4780                 cur = cur->next;
4781                 if (!node->is_data)
4782                         continue;
4783                 back = to_data_backref(node);
4784                 if (parent > 0) {
4785                         if (!node->full_backref)
4786                                 continue;
4787                         if (parent == back->parent)
4788                                 return back;
4789                 } else {
4790                         if (node->full_backref)
4791                                 continue;
4792                         if (back->root == root && back->owner == owner &&
4793                             back->offset == offset) {
4794                                 if (found_ref && node->found_ref &&
4795                                     (back->bytes != bytes ||
4796                                     back->disk_bytenr != disk_bytenr))
4797                                         continue;
4798                                 return back;
4799                         }
4800                 }
4801         }
4802         return NULL;
4803 }
4804
4805 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4806                                                 u64 parent, u64 root,
4807                                                 u64 owner, u64 offset,
4808                                                 u64 max_size)
4809 {
4810         struct data_backref *ref = malloc(sizeof(*ref));
4811
4812         if (!ref)
4813                 return NULL;
4814         memset(&ref->node, 0, sizeof(ref->node));
4815         ref->node.is_data = 1;
4816
4817         if (parent > 0) {
4818                 ref->parent = parent;
4819                 ref->owner = 0;
4820                 ref->offset = 0;
4821                 ref->node.full_backref = 1;
4822         } else {
4823                 ref->root = root;
4824                 ref->owner = owner;
4825                 ref->offset = offset;
4826                 ref->node.full_backref = 0;
4827         }
4828         ref->bytes = max_size;
4829         ref->found_ref = 0;
4830         ref->num_refs = 0;
4831         list_add_tail(&ref->node.list, &rec->backrefs);
4832         if (max_size > rec->max_size)
4833                 rec->max_size = max_size;
4834         return ref;
4835 }
4836
4837 /* Check if the type of extent matches with its chunk */
4838 static void check_extent_type(struct extent_record *rec)
4839 {
4840         struct btrfs_block_group_cache *bg_cache;
4841
4842         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4843         if (!bg_cache)
4844                 return;
4845
4846         /* data extent, check chunk directly*/
4847         if (!rec->metadata) {
4848                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4849                         rec->wrong_chunk_type = 1;
4850                 return;
4851         }
4852
4853         /* metadata extent, check the obvious case first */
4854         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4855                                  BTRFS_BLOCK_GROUP_METADATA))) {
4856                 rec->wrong_chunk_type = 1;
4857                 return;
4858         }
4859
4860         /*
4861          * Check SYSTEM extent, as it's also marked as metadata, we can only
4862          * make sure it's a SYSTEM extent by its backref
4863          */
4864         if (!list_empty(&rec->backrefs)) {
4865                 struct extent_backref *node;
4866                 struct tree_backref *tback;
4867                 u64 bg_type;
4868
4869                 node = to_extent_backref(rec->backrefs.next);
4870                 if (node->is_data) {
4871                         /* tree block shouldn't have data backref */
4872                         rec->wrong_chunk_type = 1;
4873                         return;
4874                 }
4875                 tback = container_of(node, struct tree_backref, node);
4876
4877                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4878                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4879                 else
4880                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4881                 if (!(bg_cache->flags & bg_type))
4882                         rec->wrong_chunk_type = 1;
4883         }
4884 }
4885
4886 /*
4887  * Allocate a new extent record, fill default values from @tmpl and insert int
4888  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4889  * the cache, otherwise it fails.
4890  */
4891 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4892                 struct extent_record *tmpl)
4893 {
4894         struct extent_record *rec;
4895         int ret = 0;
4896
4897         rec = malloc(sizeof(*rec));
4898         if (!rec)
4899                 return -ENOMEM;
4900         rec->start = tmpl->start;
4901         rec->max_size = tmpl->max_size;
4902         rec->nr = max(tmpl->nr, tmpl->max_size);
4903         rec->found_rec = tmpl->found_rec;
4904         rec->content_checked = tmpl->content_checked;
4905         rec->owner_ref_checked = tmpl->owner_ref_checked;
4906         rec->num_duplicates = 0;
4907         rec->metadata = tmpl->metadata;
4908         rec->flag_block_full_backref = FLAG_UNSET;
4909         rec->bad_full_backref = 0;
4910         rec->crossing_stripes = 0;
4911         rec->wrong_chunk_type = 0;
4912         rec->is_root = tmpl->is_root;
4913         rec->refs = tmpl->refs;
4914         rec->extent_item_refs = tmpl->extent_item_refs;
4915         rec->parent_generation = tmpl->parent_generation;
4916         INIT_LIST_HEAD(&rec->backrefs);
4917         INIT_LIST_HEAD(&rec->dups);
4918         INIT_LIST_HEAD(&rec->list);
4919         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4920         rec->cache.start = tmpl->start;
4921         rec->cache.size = tmpl->nr;
4922         ret = insert_cache_extent(extent_cache, &rec->cache);
4923         if (ret) {
4924                 free(rec);
4925                 return ret;
4926         }
4927         bytes_used += rec->nr;
4928
4929         if (tmpl->metadata)
4930                 rec->crossing_stripes = check_crossing_stripes(global_info,
4931                                 rec->start, global_info->tree_root->nodesize);
4932         check_extent_type(rec);
4933         return ret;
4934 }
4935
4936 /*
4937  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4938  * some are hints:
4939  * - refs              - if found, increase refs
4940  * - is_root           - if found, set
4941  * - content_checked   - if found, set
4942  * - owner_ref_checked - if found, set
4943  *
4944  * If not found, create a new one, initialize and insert.
4945  */
4946 static int add_extent_rec(struct cache_tree *extent_cache,
4947                 struct extent_record *tmpl)
4948 {
4949         struct extent_record *rec;
4950         struct cache_extent *cache;
4951         int ret = 0;
4952         int dup = 0;
4953
4954         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4955         if (cache) {
4956                 rec = container_of(cache, struct extent_record, cache);
4957                 if (tmpl->refs)
4958                         rec->refs++;
4959                 if (rec->nr == 1)
4960                         rec->nr = max(tmpl->nr, tmpl->max_size);
4961
4962                 /*
4963                  * We need to make sure to reset nr to whatever the extent
4964                  * record says was the real size, this way we can compare it to
4965                  * the backrefs.
4966                  */
4967                 if (tmpl->found_rec) {
4968                         if (tmpl->start != rec->start || rec->found_rec) {
4969                                 struct extent_record *tmp;
4970
4971                                 dup = 1;
4972                                 if (list_empty(&rec->list))
4973                                         list_add_tail(&rec->list,
4974                                                       &duplicate_extents);
4975
4976                                 /*
4977                                  * We have to do this song and dance in case we
4978                                  * find an extent record that falls inside of
4979                                  * our current extent record but does not have
4980                                  * the same objectid.
4981                                  */
4982                                 tmp = malloc(sizeof(*tmp));
4983                                 if (!tmp)
4984                                         return -ENOMEM;
4985                                 tmp->start = tmpl->start;
4986                                 tmp->max_size = tmpl->max_size;
4987                                 tmp->nr = tmpl->nr;
4988                                 tmp->found_rec = 1;
4989                                 tmp->metadata = tmpl->metadata;
4990                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4991                                 INIT_LIST_HEAD(&tmp->list);
4992                                 list_add_tail(&tmp->list, &rec->dups);
4993                                 rec->num_duplicates++;
4994                         } else {
4995                                 rec->nr = tmpl->nr;
4996                                 rec->found_rec = 1;
4997                         }
4998                 }
4999
5000                 if (tmpl->extent_item_refs && !dup) {
5001                         if (rec->extent_item_refs) {
5002                                 fprintf(stderr, "block %llu rec "
5003                                         "extent_item_refs %llu, passed %llu\n",
5004                                         (unsigned long long)tmpl->start,
5005                                         (unsigned long long)
5006                                                         rec->extent_item_refs,
5007                                         (unsigned long long)tmpl->extent_item_refs);
5008                         }
5009                         rec->extent_item_refs = tmpl->extent_item_refs;
5010                 }
5011                 if (tmpl->is_root)
5012                         rec->is_root = 1;
5013                 if (tmpl->content_checked)
5014                         rec->content_checked = 1;
5015                 if (tmpl->owner_ref_checked)
5016                         rec->owner_ref_checked = 1;
5017                 memcpy(&rec->parent_key, &tmpl->parent_key,
5018                                 sizeof(tmpl->parent_key));
5019                 if (tmpl->parent_generation)
5020                         rec->parent_generation = tmpl->parent_generation;
5021                 if (rec->max_size < tmpl->max_size)
5022                         rec->max_size = tmpl->max_size;
5023
5024                 /*
5025                  * A metadata extent can't cross stripe_len boundary, otherwise
5026                  * kernel scrub won't be able to handle it.
5027                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5028                  * it.
5029                  */
5030                 if (tmpl->metadata)
5031                         rec->crossing_stripes = check_crossing_stripes(
5032                                         global_info, rec->start,
5033                                         global_info->tree_root->nodesize);
5034                 check_extent_type(rec);
5035                 maybe_free_extent_rec(extent_cache, rec);
5036                 return ret;
5037         }
5038
5039         ret = add_extent_rec_nolookup(extent_cache, tmpl);
5040
5041         return ret;
5042 }
5043
5044 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5045                             u64 parent, u64 root, int found_ref)
5046 {
5047         struct extent_record *rec;
5048         struct tree_backref *back;
5049         struct cache_extent *cache;
5050         int ret;
5051
5052         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5053         if (!cache) {
5054                 struct extent_record tmpl;
5055
5056                 memset(&tmpl, 0, sizeof(tmpl));
5057                 tmpl.start = bytenr;
5058                 tmpl.nr = 1;
5059                 tmpl.metadata = 1;
5060
5061                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5062                 if (ret)
5063                         return ret;
5064
5065                 /* really a bug in cache_extent implement now */
5066                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5067                 if (!cache)
5068                         return -ENOENT;
5069         }
5070
5071         rec = container_of(cache, struct extent_record, cache);
5072         if (rec->start != bytenr) {
5073                 /*
5074                  * Several cause, from unaligned bytenr to over lapping extents
5075                  */
5076                 return -EEXIST;
5077         }
5078
5079         back = find_tree_backref(rec, parent, root);
5080         if (!back) {
5081                 back = alloc_tree_backref(rec, parent, root);
5082                 if (!back)
5083                         return -ENOMEM;
5084         }
5085
5086         if (found_ref) {
5087                 if (back->node.found_ref) {
5088                         fprintf(stderr, "Extent back ref already exists "
5089                                 "for %llu parent %llu root %llu \n",
5090                                 (unsigned long long)bytenr,
5091                                 (unsigned long long)parent,
5092                                 (unsigned long long)root);
5093                 }
5094                 back->node.found_ref = 1;
5095         } else {
5096                 if (back->node.found_extent_tree) {
5097                         fprintf(stderr, "Extent back ref already exists "
5098                                 "for %llu parent %llu root %llu \n",
5099                                 (unsigned long long)bytenr,
5100                                 (unsigned long long)parent,
5101                                 (unsigned long long)root);
5102                 }
5103                 back->node.found_extent_tree = 1;
5104         }
5105         check_extent_type(rec);
5106         maybe_free_extent_rec(extent_cache, rec);
5107         return 0;
5108 }
5109
5110 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5111                             u64 parent, u64 root, u64 owner, u64 offset,
5112                             u32 num_refs, int found_ref, u64 max_size)
5113 {
5114         struct extent_record *rec;
5115         struct data_backref *back;
5116         struct cache_extent *cache;
5117         int ret;
5118
5119         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5120         if (!cache) {
5121                 struct extent_record tmpl;
5122
5123                 memset(&tmpl, 0, sizeof(tmpl));
5124                 tmpl.start = bytenr;
5125                 tmpl.nr = 1;
5126                 tmpl.max_size = max_size;
5127
5128                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5129                 if (ret)
5130                         return ret;
5131
5132                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5133                 if (!cache)
5134                         abort();
5135         }
5136
5137         rec = container_of(cache, struct extent_record, cache);
5138         if (rec->max_size < max_size)
5139                 rec->max_size = max_size;
5140
5141         /*
5142          * If found_ref is set then max_size is the real size and must match the
5143          * existing refs.  So if we have already found a ref then we need to
5144          * make sure that this ref matches the existing one, otherwise we need
5145          * to add a new backref so we can notice that the backrefs don't match
5146          * and we need to figure out who is telling the truth.  This is to
5147          * account for that awful fsync bug I introduced where we'd end up with
5148          * a btrfs_file_extent_item that would have its length include multiple
5149          * prealloc extents or point inside of a prealloc extent.
5150          */
5151         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5152                                  bytenr, max_size);
5153         if (!back) {
5154                 back = alloc_data_backref(rec, parent, root, owner, offset,
5155                                           max_size);
5156                 BUG_ON(!back);
5157         }
5158
5159         if (found_ref) {
5160                 BUG_ON(num_refs != 1);
5161                 if (back->node.found_ref)
5162                         BUG_ON(back->bytes != max_size);
5163                 back->node.found_ref = 1;
5164                 back->found_ref += 1;
5165                 back->bytes = max_size;
5166                 back->disk_bytenr = bytenr;
5167                 rec->refs += 1;
5168                 rec->content_checked = 1;
5169                 rec->owner_ref_checked = 1;
5170         } else {
5171                 if (back->node.found_extent_tree) {
5172                         fprintf(stderr, "Extent back ref already exists "
5173                                 "for %llu parent %llu root %llu "
5174                                 "owner %llu offset %llu num_refs %lu\n",
5175                                 (unsigned long long)bytenr,
5176                                 (unsigned long long)parent,
5177                                 (unsigned long long)root,
5178                                 (unsigned long long)owner,
5179                                 (unsigned long long)offset,
5180                                 (unsigned long)num_refs);
5181                 }
5182                 back->num_refs = num_refs;
5183                 back->node.found_extent_tree = 1;
5184         }
5185         maybe_free_extent_rec(extent_cache, rec);
5186         return 0;
5187 }
5188
5189 static int add_pending(struct cache_tree *pending,
5190                        struct cache_tree *seen, u64 bytenr, u32 size)
5191 {
5192         int ret;
5193         ret = add_cache_extent(seen, bytenr, size);
5194         if (ret)
5195                 return ret;
5196         add_cache_extent(pending, bytenr, size);
5197         return 0;
5198 }
5199
5200 static int pick_next_pending(struct cache_tree *pending,
5201                         struct cache_tree *reada,
5202                         struct cache_tree *nodes,
5203                         u64 last, struct block_info *bits, int bits_nr,
5204                         int *reada_bits)
5205 {
5206         unsigned long node_start = last;
5207         struct cache_extent *cache;
5208         int ret;
5209
5210         cache = search_cache_extent(reada, 0);
5211         if (cache) {
5212                 bits[0].start = cache->start;
5213                 bits[0].size = cache->size;
5214                 *reada_bits = 1;
5215                 return 1;
5216         }
5217         *reada_bits = 0;
5218         if (node_start > 32768)
5219                 node_start -= 32768;
5220
5221         cache = search_cache_extent(nodes, node_start);
5222         if (!cache)
5223                 cache = search_cache_extent(nodes, 0);
5224
5225         if (!cache) {
5226                  cache = search_cache_extent(pending, 0);
5227                  if (!cache)
5228                          return 0;
5229                  ret = 0;
5230                  do {
5231                          bits[ret].start = cache->start;
5232                          bits[ret].size = cache->size;
5233                          cache = next_cache_extent(cache);
5234                          ret++;
5235                  } while (cache && ret < bits_nr);
5236                  return ret;
5237         }
5238
5239         ret = 0;
5240         do {
5241                 bits[ret].start = cache->start;
5242                 bits[ret].size = cache->size;
5243                 cache = next_cache_extent(cache);
5244                 ret++;
5245         } while (cache && ret < bits_nr);
5246
5247         if (bits_nr - ret > 8) {
5248                 u64 lookup = bits[0].start + bits[0].size;
5249                 struct cache_extent *next;
5250                 next = search_cache_extent(pending, lookup);
5251                 while(next) {
5252                         if (next->start - lookup > 32768)
5253                                 break;
5254                         bits[ret].start = next->start;
5255                         bits[ret].size = next->size;
5256                         lookup = next->start + next->size;
5257                         ret++;
5258                         if (ret == bits_nr)
5259                                 break;
5260                         next = next_cache_extent(next);
5261                         if (!next)
5262                                 break;
5263                 }
5264         }
5265         return ret;
5266 }
5267
5268 static void free_chunk_record(struct cache_extent *cache)
5269 {
5270         struct chunk_record *rec;
5271
5272         rec = container_of(cache, struct chunk_record, cache);
5273         list_del_init(&rec->list);
5274         list_del_init(&rec->dextents);
5275         free(rec);
5276 }
5277
5278 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5279 {
5280         cache_tree_free_extents(chunk_cache, free_chunk_record);
5281 }
5282
5283 static void free_device_record(struct rb_node *node)
5284 {
5285         struct device_record *rec;
5286
5287         rec = container_of(node, struct device_record, node);
5288         free(rec);
5289 }
5290
5291 FREE_RB_BASED_TREE(device_cache, free_device_record);
5292
5293 int insert_block_group_record(struct block_group_tree *tree,
5294                               struct block_group_record *bg_rec)
5295 {
5296         int ret;
5297
5298         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5299         if (ret)
5300                 return ret;
5301
5302         list_add_tail(&bg_rec->list, &tree->block_groups);
5303         return 0;
5304 }
5305
5306 static void free_block_group_record(struct cache_extent *cache)
5307 {
5308         struct block_group_record *rec;
5309
5310         rec = container_of(cache, struct block_group_record, cache);
5311         list_del_init(&rec->list);
5312         free(rec);
5313 }
5314
5315 void free_block_group_tree(struct block_group_tree *tree)
5316 {
5317         cache_tree_free_extents(&tree->tree, free_block_group_record);
5318 }
5319
5320 int insert_device_extent_record(struct device_extent_tree *tree,
5321                                 struct device_extent_record *de_rec)
5322 {
5323         int ret;
5324
5325         /*
5326          * Device extent is a bit different from the other extents, because
5327          * the extents which belong to the different devices may have the
5328          * same start and size, so we need use the special extent cache
5329          * search/insert functions.
5330          */
5331         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5332         if (ret)
5333                 return ret;
5334
5335         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5336         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5337         return 0;
5338 }
5339
5340 static void free_device_extent_record(struct cache_extent *cache)
5341 {
5342         struct device_extent_record *rec;
5343
5344         rec = container_of(cache, struct device_extent_record, cache);
5345         if (!list_empty(&rec->chunk_list))
5346                 list_del_init(&rec->chunk_list);
5347         if (!list_empty(&rec->device_list))
5348                 list_del_init(&rec->device_list);
5349         free(rec);
5350 }
5351
5352 void free_device_extent_tree(struct device_extent_tree *tree)
5353 {
5354         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5355 }
5356
5357 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5358 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5359                                  struct extent_buffer *leaf, int slot)
5360 {
5361         struct btrfs_extent_ref_v0 *ref0;
5362         struct btrfs_key key;
5363         int ret;
5364
5365         btrfs_item_key_to_cpu(leaf, &key, slot);
5366         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5367         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5368                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5369                                 0, 0);
5370         } else {
5371                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5372                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5373         }
5374         return ret;
5375 }
5376 #endif
5377
5378 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5379                                             struct btrfs_key *key,
5380                                             int slot)
5381 {
5382         struct btrfs_chunk *ptr;
5383         struct chunk_record *rec;
5384         int num_stripes, i;
5385
5386         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5387         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5388
5389         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5390         if (!rec) {
5391                 fprintf(stderr, "memory allocation failed\n");
5392                 exit(-1);
5393         }
5394
5395         INIT_LIST_HEAD(&rec->list);
5396         INIT_LIST_HEAD(&rec->dextents);
5397         rec->bg_rec = NULL;
5398
5399         rec->cache.start = key->offset;
5400         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5401
5402         rec->generation = btrfs_header_generation(leaf);
5403
5404         rec->objectid = key->objectid;
5405         rec->type = key->type;
5406         rec->offset = key->offset;
5407
5408         rec->length = rec->cache.size;
5409         rec->owner = btrfs_chunk_owner(leaf, ptr);
5410         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5411         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5412         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5413         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5414         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5415         rec->num_stripes = num_stripes;
5416         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5417
5418         for (i = 0; i < rec->num_stripes; ++i) {
5419                 rec->stripes[i].devid =
5420                         btrfs_stripe_devid_nr(leaf, ptr, i);
5421                 rec->stripes[i].offset =
5422                         btrfs_stripe_offset_nr(leaf, ptr, i);
5423                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5424                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5425                                 BTRFS_UUID_SIZE);
5426         }
5427
5428         return rec;
5429 }
5430
5431 static int process_chunk_item(struct cache_tree *chunk_cache,
5432                               struct btrfs_key *key, struct extent_buffer *eb,
5433                               int slot)
5434 {
5435         struct chunk_record *rec;
5436         struct btrfs_chunk *chunk;
5437         int ret = 0;
5438
5439         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5440         /*
5441          * Do extra check for this chunk item,
5442          *
5443          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5444          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5445          * and owner<->key_type check.
5446          */
5447         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5448                                       key->offset);
5449         if (ret < 0) {
5450                 error("chunk(%llu, %llu) is not valid, ignore it",
5451                       key->offset, btrfs_chunk_length(eb, chunk));
5452                 return 0;
5453         }
5454         rec = btrfs_new_chunk_record(eb, key, slot);
5455         ret = insert_cache_extent(chunk_cache, &rec->cache);
5456         if (ret) {
5457                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5458                         rec->offset, rec->length);
5459                 free(rec);
5460         }
5461
5462         return ret;
5463 }
5464
5465 static int process_device_item(struct rb_root *dev_cache,
5466                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5467 {
5468         struct btrfs_dev_item *ptr;
5469         struct device_record *rec;
5470         int ret = 0;
5471
5472         ptr = btrfs_item_ptr(eb,
5473                 slot, struct btrfs_dev_item);
5474
5475         rec = malloc(sizeof(*rec));
5476         if (!rec) {
5477                 fprintf(stderr, "memory allocation failed\n");
5478                 return -ENOMEM;
5479         }
5480
5481         rec->devid = key->offset;
5482         rec->generation = btrfs_header_generation(eb);
5483
5484         rec->objectid = key->objectid;
5485         rec->type = key->type;
5486         rec->offset = key->offset;
5487
5488         rec->devid = btrfs_device_id(eb, ptr);
5489         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5490         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5491
5492         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5493         if (ret) {
5494                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5495                 free(rec);
5496         }
5497
5498         return ret;
5499 }
5500
5501 struct block_group_record *
5502 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5503                              int slot)
5504 {
5505         struct btrfs_block_group_item *ptr;
5506         struct block_group_record *rec;
5507
5508         rec = calloc(1, sizeof(*rec));
5509         if (!rec) {
5510                 fprintf(stderr, "memory allocation failed\n");
5511                 exit(-1);
5512         }
5513
5514         rec->cache.start = key->objectid;
5515         rec->cache.size = key->offset;
5516
5517         rec->generation = btrfs_header_generation(leaf);
5518
5519         rec->objectid = key->objectid;
5520         rec->type = key->type;
5521         rec->offset = key->offset;
5522
5523         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5524         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5525
5526         INIT_LIST_HEAD(&rec->list);
5527
5528         return rec;
5529 }
5530
5531 static int process_block_group_item(struct block_group_tree *block_group_cache,
5532                                     struct btrfs_key *key,
5533                                     struct extent_buffer *eb, int slot)
5534 {
5535         struct block_group_record *rec;
5536         int ret = 0;
5537
5538         rec = btrfs_new_block_group_record(eb, key, slot);
5539         ret = insert_block_group_record(block_group_cache, rec);
5540         if (ret) {
5541                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5542                         rec->objectid, rec->offset);
5543                 free(rec);
5544         }
5545
5546         return ret;
5547 }
5548
5549 struct device_extent_record *
5550 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5551                                struct btrfs_key *key, int slot)
5552 {
5553         struct device_extent_record *rec;
5554         struct btrfs_dev_extent *ptr;
5555
5556         rec = calloc(1, sizeof(*rec));
5557         if (!rec) {
5558                 fprintf(stderr, "memory allocation failed\n");
5559                 exit(-1);
5560         }
5561
5562         rec->cache.objectid = key->objectid;
5563         rec->cache.start = key->offset;
5564
5565         rec->generation = btrfs_header_generation(leaf);
5566
5567         rec->objectid = key->objectid;
5568         rec->type = key->type;
5569         rec->offset = key->offset;
5570
5571         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5572         rec->chunk_objecteid =
5573                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5574         rec->chunk_offset =
5575                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5576         rec->length = btrfs_dev_extent_length(leaf, ptr);
5577         rec->cache.size = rec->length;
5578
5579         INIT_LIST_HEAD(&rec->chunk_list);
5580         INIT_LIST_HEAD(&rec->device_list);
5581
5582         return rec;
5583 }
5584
5585 static int
5586 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5587                            struct btrfs_key *key, struct extent_buffer *eb,
5588                            int slot)
5589 {
5590         struct device_extent_record *rec;
5591         int ret;
5592
5593         rec = btrfs_new_device_extent_record(eb, key, slot);
5594         ret = insert_device_extent_record(dev_extent_cache, rec);
5595         if (ret) {
5596                 fprintf(stderr,
5597                         "Device extent[%llu, %llu, %llu] existed.\n",
5598                         rec->objectid, rec->offset, rec->length);
5599                 free(rec);
5600         }
5601
5602         return ret;
5603 }
5604
5605 static int process_extent_item(struct btrfs_root *root,
5606                                struct cache_tree *extent_cache,
5607                                struct extent_buffer *eb, int slot)
5608 {
5609         struct btrfs_extent_item *ei;
5610         struct btrfs_extent_inline_ref *iref;
5611         struct btrfs_extent_data_ref *dref;
5612         struct btrfs_shared_data_ref *sref;
5613         struct btrfs_key key;
5614         struct extent_record tmpl;
5615         unsigned long end;
5616         unsigned long ptr;
5617         int ret;
5618         int type;
5619         u32 item_size = btrfs_item_size_nr(eb, slot);
5620         u64 refs = 0;
5621         u64 offset;
5622         u64 num_bytes;
5623         int metadata = 0;
5624
5625         btrfs_item_key_to_cpu(eb, &key, slot);
5626
5627         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5628                 metadata = 1;
5629                 num_bytes = root->nodesize;
5630         } else {
5631                 num_bytes = key.offset;
5632         }
5633
5634         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5635                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5636                       key.objectid, root->sectorsize);
5637                 return -EIO;
5638         }
5639         if (item_size < sizeof(*ei)) {
5640 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5641                 struct btrfs_extent_item_v0 *ei0;
5642                 BUG_ON(item_size != sizeof(*ei0));
5643                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5644                 refs = btrfs_extent_refs_v0(eb, ei0);
5645 #else
5646                 BUG();
5647 #endif
5648                 memset(&tmpl, 0, sizeof(tmpl));
5649                 tmpl.start = key.objectid;
5650                 tmpl.nr = num_bytes;
5651                 tmpl.extent_item_refs = refs;
5652                 tmpl.metadata = metadata;
5653                 tmpl.found_rec = 1;
5654                 tmpl.max_size = num_bytes;
5655
5656                 return add_extent_rec(extent_cache, &tmpl);
5657         }
5658
5659         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5660         refs = btrfs_extent_refs(eb, ei);
5661         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5662                 metadata = 1;
5663         else
5664                 metadata = 0;
5665         if (metadata && num_bytes != root->nodesize) {
5666                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5667                       num_bytes, root->nodesize);
5668                 return -EIO;
5669         }
5670         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5671                 error("ignore invalid data extent, length %llu is not aligned to %u",
5672                       num_bytes, root->sectorsize);
5673                 return -EIO;
5674         }
5675
5676         memset(&tmpl, 0, sizeof(tmpl));
5677         tmpl.start = key.objectid;
5678         tmpl.nr = num_bytes;
5679         tmpl.extent_item_refs = refs;
5680         tmpl.metadata = metadata;
5681         tmpl.found_rec = 1;
5682         tmpl.max_size = num_bytes;
5683         add_extent_rec(extent_cache, &tmpl);
5684
5685         ptr = (unsigned long)(ei + 1);
5686         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5687             key.type == BTRFS_EXTENT_ITEM_KEY)
5688                 ptr += sizeof(struct btrfs_tree_block_info);
5689
5690         end = (unsigned long)ei + item_size;
5691         while (ptr < end) {
5692                 iref = (struct btrfs_extent_inline_ref *)ptr;
5693                 type = btrfs_extent_inline_ref_type(eb, iref);
5694                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5695                 switch (type) {
5696                 case BTRFS_TREE_BLOCK_REF_KEY:
5697                         ret = add_tree_backref(extent_cache, key.objectid,
5698                                         0, offset, 0);
5699                         if (ret < 0)
5700                                 error("add_tree_backref failed: %s",
5701                                       strerror(-ret));
5702                         break;
5703                 case BTRFS_SHARED_BLOCK_REF_KEY:
5704                         ret = add_tree_backref(extent_cache, key.objectid,
5705                                         offset, 0, 0);
5706                         if (ret < 0)
5707                                 error("add_tree_backref failed: %s",
5708                                       strerror(-ret));
5709                         break;
5710                 case BTRFS_EXTENT_DATA_REF_KEY:
5711                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5712                         add_data_backref(extent_cache, key.objectid, 0,
5713                                         btrfs_extent_data_ref_root(eb, dref),
5714                                         btrfs_extent_data_ref_objectid(eb,
5715                                                                        dref),
5716                                         btrfs_extent_data_ref_offset(eb, dref),
5717                                         btrfs_extent_data_ref_count(eb, dref),
5718                                         0, num_bytes);
5719                         break;
5720                 case BTRFS_SHARED_DATA_REF_KEY:
5721                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5722                         add_data_backref(extent_cache, key.objectid, offset,
5723                                         0, 0, 0,
5724                                         btrfs_shared_data_ref_count(eb, sref),
5725                                         0, num_bytes);
5726                         break;
5727                 default:
5728                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5729                                 key.objectid, key.type, num_bytes);
5730                         goto out;
5731                 }
5732                 ptr += btrfs_extent_inline_ref_size(type);
5733         }
5734         WARN_ON(ptr > end);
5735 out:
5736         return 0;
5737 }
5738
5739 static int check_cache_range(struct btrfs_root *root,
5740                              struct btrfs_block_group_cache *cache,
5741                              u64 offset, u64 bytes)
5742 {
5743         struct btrfs_free_space *entry;
5744         u64 *logical;
5745         u64 bytenr;
5746         int stripe_len;
5747         int i, nr, ret;
5748
5749         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5750                 bytenr = btrfs_sb_offset(i);
5751                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5752                                        cache->key.objectid, bytenr, 0,
5753                                        &logical, &nr, &stripe_len);
5754                 if (ret)
5755                         return ret;
5756
5757                 while (nr--) {
5758                         if (logical[nr] + stripe_len <= offset)
5759                                 continue;
5760                         if (offset + bytes <= logical[nr])
5761                                 continue;
5762                         if (logical[nr] == offset) {
5763                                 if (stripe_len >= bytes) {
5764                                         free(logical);
5765                                         return 0;
5766                                 }
5767                                 bytes -= stripe_len;
5768                                 offset += stripe_len;
5769                         } else if (logical[nr] < offset) {
5770                                 if (logical[nr] + stripe_len >=
5771                                     offset + bytes) {
5772                                         free(logical);
5773                                         return 0;
5774                                 }
5775                                 bytes = (offset + bytes) -
5776                                         (logical[nr] + stripe_len);
5777                                 offset = logical[nr] + stripe_len;
5778                         } else {
5779                                 /*
5780                                  * Could be tricky, the super may land in the
5781                                  * middle of the area we're checking.  First
5782                                  * check the easiest case, it's at the end.
5783                                  */
5784                                 if (logical[nr] + stripe_len >=
5785                                     bytes + offset) {
5786                                         bytes = logical[nr] - offset;
5787                                         continue;
5788                                 }
5789
5790                                 /* Check the left side */
5791                                 ret = check_cache_range(root, cache,
5792                                                         offset,
5793                                                         logical[nr] - offset);
5794                                 if (ret) {
5795                                         free(logical);
5796                                         return ret;
5797                                 }
5798
5799                                 /* Now we continue with the right side */
5800                                 bytes = (offset + bytes) -
5801                                         (logical[nr] + stripe_len);
5802                                 offset = logical[nr] + stripe_len;
5803                         }
5804                 }
5805
5806                 free(logical);
5807         }
5808
5809         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5810         if (!entry) {
5811                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5812                         offset, offset+bytes);
5813                 return -EINVAL;
5814         }
5815
5816         if (entry->offset != offset) {
5817                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5818                         entry->offset);
5819                 return -EINVAL;
5820         }
5821
5822         if (entry->bytes != bytes) {
5823                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5824                         bytes, entry->bytes, offset);
5825                 return -EINVAL;
5826         }
5827
5828         unlink_free_space(cache->free_space_ctl, entry);
5829         free(entry);
5830         return 0;
5831 }
5832
5833 static int verify_space_cache(struct btrfs_root *root,
5834                               struct btrfs_block_group_cache *cache)
5835 {
5836         struct btrfs_path path;
5837         struct extent_buffer *leaf;
5838         struct btrfs_key key;
5839         u64 last;
5840         int ret = 0;
5841
5842         root = root->fs_info->extent_root;
5843
5844         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5845
5846         btrfs_init_path(&path);
5847         key.objectid = last;
5848         key.offset = 0;
5849         key.type = BTRFS_EXTENT_ITEM_KEY;
5850         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5851         if (ret < 0)
5852                 goto out;
5853         ret = 0;
5854         while (1) {
5855                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5856                         ret = btrfs_next_leaf(root, &path);
5857                         if (ret < 0)
5858                                 goto out;
5859                         if (ret > 0) {
5860                                 ret = 0;
5861                                 break;
5862                         }
5863                 }
5864                 leaf = path.nodes[0];
5865                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5866                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5867                         break;
5868                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5869                     key.type != BTRFS_METADATA_ITEM_KEY) {
5870                         path.slots[0]++;
5871                         continue;
5872                 }
5873
5874                 if (last == key.objectid) {
5875                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5876                                 last = key.objectid + key.offset;
5877                         else
5878                                 last = key.objectid + root->nodesize;
5879                         path.slots[0]++;
5880                         continue;
5881                 }
5882
5883                 ret = check_cache_range(root, cache, last,
5884                                         key.objectid - last);
5885                 if (ret)
5886                         break;
5887                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5888                         last = key.objectid + key.offset;
5889                 else
5890                         last = key.objectid + root->nodesize;
5891                 path.slots[0]++;
5892         }
5893
5894         if (last < cache->key.objectid + cache->key.offset)
5895                 ret = check_cache_range(root, cache, last,
5896                                         cache->key.objectid +
5897                                         cache->key.offset - last);
5898
5899 out:
5900         btrfs_release_path(&path);
5901
5902         if (!ret &&
5903             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5904                 fprintf(stderr, "There are still entries left in the space "
5905                         "cache\n");
5906                 ret = -EINVAL;
5907         }
5908
5909         return ret;
5910 }
5911
5912 static int check_space_cache(struct btrfs_root *root)
5913 {
5914         struct btrfs_block_group_cache *cache;
5915         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5916         int ret;
5917         int error = 0;
5918
5919         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5920             btrfs_super_generation(root->fs_info->super_copy) !=
5921             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5922                 printf("cache and super generation don't match, space cache "
5923                        "will be invalidated\n");
5924                 return 0;
5925         }
5926
5927         if (ctx.progress_enabled) {
5928                 ctx.tp = TASK_FREE_SPACE;
5929                 task_start(ctx.info);
5930         }
5931
5932         while (1) {
5933                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5934                 if (!cache)
5935                         break;
5936
5937                 start = cache->key.objectid + cache->key.offset;
5938                 if (!cache->free_space_ctl) {
5939                         if (btrfs_init_free_space_ctl(cache,
5940                                                       root->sectorsize)) {
5941                                 ret = -ENOMEM;
5942                                 break;
5943                         }
5944                 } else {
5945                         btrfs_remove_free_space_cache(cache);
5946                 }
5947
5948                 if (btrfs_fs_compat_ro(root->fs_info,
5949                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5950                         ret = exclude_super_stripes(root, cache);
5951                         if (ret) {
5952                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5953                                         strerror(-ret));
5954                                 error++;
5955                                 continue;
5956                         }
5957                         ret = load_free_space_tree(root->fs_info, cache);
5958                         free_excluded_extents(root, cache);
5959                         if (ret < 0) {
5960                                 fprintf(stderr, "could not load free space tree: %s\n",
5961                                         strerror(-ret));
5962                                 error++;
5963                                 continue;
5964                         }
5965                         error += ret;
5966                 } else {
5967                         ret = load_free_space_cache(root->fs_info, cache);
5968                         if (!ret)
5969                                 continue;
5970                 }
5971
5972                 ret = verify_space_cache(root, cache);
5973                 if (ret) {
5974                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5975                                 cache->key.objectid);
5976                         error++;
5977                 }
5978         }
5979
5980         task_stop(ctx.info);
5981
5982         return error ? -EINVAL : 0;
5983 }
5984
5985 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5986                         u64 num_bytes, unsigned long leaf_offset,
5987                         struct extent_buffer *eb) {
5988
5989         u64 offset = 0;
5990         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5991         char *data;
5992         unsigned long csum_offset;
5993         u32 csum;
5994         u32 csum_expected;
5995         u64 read_len;
5996         u64 data_checked = 0;
5997         u64 tmp;
5998         int ret = 0;
5999         int mirror;
6000         int num_copies;
6001
6002         if (num_bytes % root->sectorsize)
6003                 return -EINVAL;
6004
6005         data = malloc(num_bytes);
6006         if (!data)
6007                 return -ENOMEM;
6008
6009         while (offset < num_bytes) {
6010                 mirror = 0;
6011 again:
6012                 read_len = num_bytes - offset;
6013                 /* read as much space once a time */
6014                 ret = read_extent_data(root, data + offset,
6015                                 bytenr + offset, &read_len, mirror);
6016                 if (ret)
6017                         goto out;
6018                 data_checked = 0;
6019                 /* verify every 4k data's checksum */
6020                 while (data_checked < read_len) {
6021                         csum = ~(u32)0;
6022                         tmp = offset + data_checked;
6023
6024                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
6025                                                csum, root->sectorsize);
6026                         btrfs_csum_final(csum, (u8 *)&csum);
6027
6028                         csum_offset = leaf_offset +
6029                                  tmp / root->sectorsize * csum_size;
6030                         read_extent_buffer(eb, (char *)&csum_expected,
6031                                            csum_offset, csum_size);
6032                         /* try another mirror */
6033                         if (csum != csum_expected) {
6034                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6035                                                 mirror, bytenr + tmp,
6036                                                 csum, csum_expected);
6037                                 num_copies = btrfs_num_copies(
6038                                                 &root->fs_info->mapping_tree,
6039                                                 bytenr, num_bytes);
6040                                 if (mirror < num_copies - 1) {
6041                                         mirror += 1;
6042                                         goto again;
6043                                 }
6044                         }
6045                         data_checked += root->sectorsize;
6046                 }
6047                 offset += read_len;
6048         }
6049 out:
6050         free(data);
6051         return ret;
6052 }
6053
6054 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6055                                u64 num_bytes)
6056 {
6057         struct btrfs_path path;
6058         struct extent_buffer *leaf;
6059         struct btrfs_key key;
6060         int ret;
6061
6062         btrfs_init_path(&path);
6063         key.objectid = bytenr;
6064         key.type = BTRFS_EXTENT_ITEM_KEY;
6065         key.offset = (u64)-1;
6066
6067 again:
6068         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6069                                 0, 0);
6070         if (ret < 0) {
6071                 fprintf(stderr, "Error looking up extent record %d\n", ret);
6072                 btrfs_release_path(&path);
6073                 return ret;
6074         } else if (ret) {
6075                 if (path.slots[0] > 0) {
6076                         path.slots[0]--;
6077                 } else {
6078                         ret = btrfs_prev_leaf(root, &path);
6079                         if (ret < 0) {
6080                                 goto out;
6081                         } else if (ret > 0) {
6082                                 ret = 0;
6083                                 goto out;
6084                         }
6085                 }
6086         }
6087
6088         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6089
6090         /*
6091          * Block group items come before extent items if they have the same
6092          * bytenr, so walk back one more just in case.  Dear future traveller,
6093          * first congrats on mastering time travel.  Now if it's not too much
6094          * trouble could you go back to 2006 and tell Chris to make the
6095          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6096          * EXTENT_ITEM_KEY please?
6097          */
6098         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6099                 if (path.slots[0] > 0) {
6100                         path.slots[0]--;
6101                 } else {
6102                         ret = btrfs_prev_leaf(root, &path);
6103                         if (ret < 0) {
6104                                 goto out;
6105                         } else if (ret > 0) {
6106                                 ret = 0;
6107                                 goto out;
6108                         }
6109                 }
6110                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6111         }
6112
6113         while (num_bytes) {
6114                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6115                         ret = btrfs_next_leaf(root, &path);
6116                         if (ret < 0) {
6117                                 fprintf(stderr, "Error going to next leaf "
6118                                         "%d\n", ret);
6119                                 btrfs_release_path(&path);
6120                                 return ret;
6121                         } else if (ret) {
6122                                 break;
6123                         }
6124                 }
6125                 leaf = path.nodes[0];
6126                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6127                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6128                         path.slots[0]++;
6129                         continue;
6130                 }
6131                 if (key.objectid + key.offset < bytenr) {
6132                         path.slots[0]++;
6133                         continue;
6134                 }
6135                 if (key.objectid > bytenr + num_bytes)
6136                         break;
6137
6138                 if (key.objectid == bytenr) {
6139                         if (key.offset >= num_bytes) {
6140                                 num_bytes = 0;
6141                                 break;
6142                         }
6143                         num_bytes -= key.offset;
6144                         bytenr += key.offset;
6145                 } else if (key.objectid < bytenr) {
6146                         if (key.objectid + key.offset >= bytenr + num_bytes) {
6147                                 num_bytes = 0;
6148                                 break;
6149                         }
6150                         num_bytes = (bytenr + num_bytes) -
6151                                 (key.objectid + key.offset);
6152                         bytenr = key.objectid + key.offset;
6153                 } else {
6154                         if (key.objectid + key.offset < bytenr + num_bytes) {
6155                                 u64 new_start = key.objectid + key.offset;
6156                                 u64 new_bytes = bytenr + num_bytes - new_start;
6157
6158                                 /*
6159                                  * Weird case, the extent is in the middle of
6160                                  * our range, we'll have to search one side
6161                                  * and then the other.  Not sure if this happens
6162                                  * in real life, but no harm in coding it up
6163                                  * anyway just in case.
6164                                  */
6165                                 btrfs_release_path(&path);
6166                                 ret = check_extent_exists(root, new_start,
6167                                                           new_bytes);
6168                                 if (ret) {
6169                                         fprintf(stderr, "Right section didn't "
6170                                                 "have a record\n");
6171                                         break;
6172                                 }
6173                                 num_bytes = key.objectid - bytenr;
6174                                 goto again;
6175                         }
6176                         num_bytes = key.objectid - bytenr;
6177                 }
6178                 path.slots[0]++;
6179         }
6180         ret = 0;
6181
6182 out:
6183         if (num_bytes && !ret) {
6184                 fprintf(stderr, "There are no extents for csum range "
6185                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6186                 ret = 1;
6187         }
6188
6189         btrfs_release_path(&path);
6190         return ret;
6191 }
6192
6193 static int check_csums(struct btrfs_root *root)
6194 {
6195         struct btrfs_path path;
6196         struct extent_buffer *leaf;
6197         struct btrfs_key key;
6198         u64 offset = 0, num_bytes = 0;
6199         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6200         int errors = 0;
6201         int ret;
6202         u64 data_len;
6203         unsigned long leaf_offset;
6204
6205         root = root->fs_info->csum_root;
6206         if (!extent_buffer_uptodate(root->node)) {
6207                 fprintf(stderr, "No valid csum tree found\n");
6208                 return -ENOENT;
6209         }
6210
6211         btrfs_init_path(&path);
6212         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6213         key.type = BTRFS_EXTENT_CSUM_KEY;
6214         key.offset = 0;
6215         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6216         if (ret < 0) {
6217                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6218                 btrfs_release_path(&path);
6219                 return ret;
6220         }
6221
6222         if (ret > 0 && path.slots[0])
6223                 path.slots[0]--;
6224         ret = 0;
6225
6226         while (1) {
6227                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6228                         ret = btrfs_next_leaf(root, &path);
6229                         if (ret < 0) {
6230                                 fprintf(stderr, "Error going to next leaf "
6231                                         "%d\n", ret);
6232                                 break;
6233                         }
6234                         if (ret)
6235                                 break;
6236                 }
6237                 leaf = path.nodes[0];
6238
6239                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6240                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6241                         path.slots[0]++;
6242                         continue;
6243                 }
6244
6245                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6246                               csum_size) * root->sectorsize;
6247                 if (!check_data_csum)
6248                         goto skip_csum_check;
6249                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6250                 ret = check_extent_csums(root, key.offset, data_len,
6251                                          leaf_offset, leaf);
6252                 if (ret)
6253                         break;
6254 skip_csum_check:
6255                 if (!num_bytes) {
6256                         offset = key.offset;
6257                 } else if (key.offset != offset + num_bytes) {
6258                         ret = check_extent_exists(root, offset, num_bytes);
6259                         if (ret) {
6260                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6261                                         "there is no extent record\n",
6262                                         offset, offset+num_bytes);
6263                                 errors++;
6264                         }
6265                         offset = key.offset;
6266                         num_bytes = 0;
6267                 }
6268                 num_bytes += data_len;
6269                 path.slots[0]++;
6270         }
6271
6272         btrfs_release_path(&path);
6273         return errors;
6274 }
6275
6276 static int is_dropped_key(struct btrfs_key *key,
6277                           struct btrfs_key *drop_key) {
6278         if (key->objectid < drop_key->objectid)
6279                 return 1;
6280         else if (key->objectid == drop_key->objectid) {
6281                 if (key->type < drop_key->type)
6282                         return 1;
6283                 else if (key->type == drop_key->type) {
6284                         if (key->offset < drop_key->offset)
6285                                 return 1;
6286                 }
6287         }
6288         return 0;
6289 }
6290
6291 /*
6292  * Here are the rules for FULL_BACKREF.
6293  *
6294  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6295  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6296  *      FULL_BACKREF set.
6297  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6298  *    if it happened after the relocation occurred since we'll have dropped the
6299  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6300  *    have no real way to know for sure.
6301  *
6302  * We process the blocks one root at a time, and we start from the lowest root
6303  * objectid and go to the highest.  So we can just lookup the owner backref for
6304  * the record and if we don't find it then we know it doesn't exist and we have
6305  * a FULL BACKREF.
6306  *
6307  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6308  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6309  * be set or not and then we can check later once we've gathered all the refs.
6310  */
6311 static int calc_extent_flag(struct btrfs_root *root,
6312                            struct cache_tree *extent_cache,
6313                            struct extent_buffer *buf,
6314                            struct root_item_record *ri,
6315                            u64 *flags)
6316 {
6317         struct extent_record *rec;
6318         struct cache_extent *cache;
6319         struct tree_backref *tback;
6320         u64 owner = 0;
6321
6322         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6323         /* we have added this extent before */
6324         if (!cache)
6325                 return -ENOENT;
6326
6327         rec = container_of(cache, struct extent_record, cache);
6328
6329         /*
6330          * Except file/reloc tree, we can not have
6331          * FULL BACKREF MODE
6332          */
6333         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6334                 goto normal;
6335         /*
6336          * root node
6337          */
6338         if (buf->start == ri->bytenr)
6339                 goto normal;
6340
6341         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6342                 goto full_backref;
6343
6344         owner = btrfs_header_owner(buf);
6345         if (owner == ri->objectid)
6346                 goto normal;
6347
6348         tback = find_tree_backref(rec, 0, owner);
6349         if (!tback)
6350                 goto full_backref;
6351 normal:
6352         *flags = 0;
6353         if (rec->flag_block_full_backref != FLAG_UNSET &&
6354             rec->flag_block_full_backref != 0)
6355                 rec->bad_full_backref = 1;
6356         return 0;
6357 full_backref:
6358         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6359         if (rec->flag_block_full_backref != FLAG_UNSET &&
6360             rec->flag_block_full_backref != 1)
6361                 rec->bad_full_backref = 1;
6362         return 0;
6363 }
6364
6365 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6366 {
6367         fprintf(stderr, "Invalid key type(");
6368         print_key_type(stderr, 0, key_type);
6369         fprintf(stderr, ") found in root(");
6370         print_objectid(stderr, rootid, 0);
6371         fprintf(stderr, ")\n");
6372 }
6373
6374 /*
6375  * Check if the key is valid with its extent buffer.
6376  *
6377  * This is a early check in case invalid key exists in a extent buffer
6378  * This is not comprehensive yet, but should prevent wrong key/item passed
6379  * further
6380  */
6381 static int check_type_with_root(u64 rootid, u8 key_type)
6382 {
6383         switch (key_type) {
6384         /* Only valid in chunk tree */
6385         case BTRFS_DEV_ITEM_KEY:
6386         case BTRFS_CHUNK_ITEM_KEY:
6387                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6388                         goto err;
6389                 break;
6390         /* valid in csum and log tree */
6391         case BTRFS_CSUM_TREE_OBJECTID:
6392                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6393                       is_fstree(rootid)))
6394                         goto err;
6395                 break;
6396         case BTRFS_EXTENT_ITEM_KEY:
6397         case BTRFS_METADATA_ITEM_KEY:
6398         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6399                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6400                         goto err;
6401                 break;
6402         case BTRFS_ROOT_ITEM_KEY:
6403                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6404                         goto err;
6405                 break;
6406         case BTRFS_DEV_EXTENT_KEY:
6407                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6408                         goto err;
6409                 break;
6410         }
6411         return 0;
6412 err:
6413         report_mismatch_key_root(key_type, rootid);
6414         return -EINVAL;
6415 }
6416
6417 static int run_next_block(struct btrfs_root *root,
6418                           struct block_info *bits,
6419                           int bits_nr,
6420                           u64 *last,
6421                           struct cache_tree *pending,
6422                           struct cache_tree *seen,
6423                           struct cache_tree *reada,
6424                           struct cache_tree *nodes,
6425                           struct cache_tree *extent_cache,
6426                           struct cache_tree *chunk_cache,
6427                           struct rb_root *dev_cache,
6428                           struct block_group_tree *block_group_cache,
6429                           struct device_extent_tree *dev_extent_cache,
6430                           struct root_item_record *ri)
6431 {
6432         struct extent_buffer *buf;
6433         struct extent_record *rec = NULL;
6434         u64 bytenr;
6435         u32 size;
6436         u64 parent;
6437         u64 owner;
6438         u64 flags;
6439         u64 ptr;
6440         u64 gen = 0;
6441         int ret = 0;
6442         int i;
6443         int nritems;
6444         struct btrfs_key key;
6445         struct cache_extent *cache;
6446         int reada_bits;
6447
6448         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6449                                     bits_nr, &reada_bits);
6450         if (nritems == 0)
6451                 return 1;
6452
6453         if (!reada_bits) {
6454                 for(i = 0; i < nritems; i++) {
6455                         ret = add_cache_extent(reada, bits[i].start,
6456                                                bits[i].size);
6457                         if (ret == -EEXIST)
6458                                 continue;
6459
6460                         /* fixme, get the parent transid */
6461                         readahead_tree_block(root, bits[i].start,
6462                                              bits[i].size, 0);
6463                 }
6464         }
6465         *last = bits[0].start;
6466         bytenr = bits[0].start;
6467         size = bits[0].size;
6468
6469         cache = lookup_cache_extent(pending, bytenr, size);
6470         if (cache) {
6471                 remove_cache_extent(pending, cache);
6472                 free(cache);
6473         }
6474         cache = lookup_cache_extent(reada, bytenr, size);
6475         if (cache) {
6476                 remove_cache_extent(reada, cache);
6477                 free(cache);
6478         }
6479         cache = lookup_cache_extent(nodes, bytenr, size);
6480         if (cache) {
6481                 remove_cache_extent(nodes, cache);
6482                 free(cache);
6483         }
6484         cache = lookup_cache_extent(extent_cache, bytenr, size);
6485         if (cache) {
6486                 rec = container_of(cache, struct extent_record, cache);
6487                 gen = rec->parent_generation;
6488         }
6489
6490         /* fixme, get the real parent transid */
6491         buf = read_tree_block(root, bytenr, size, gen);
6492         if (!extent_buffer_uptodate(buf)) {
6493                 record_bad_block_io(root->fs_info,
6494                                     extent_cache, bytenr, size);
6495                 goto out;
6496         }
6497
6498         nritems = btrfs_header_nritems(buf);
6499
6500         flags = 0;
6501         if (!init_extent_tree) {
6502                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6503                                        btrfs_header_level(buf), 1, NULL,
6504                                        &flags);
6505                 if (ret < 0) {
6506                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6507                         if (ret < 0) {
6508                                 fprintf(stderr, "Couldn't calc extent flags\n");
6509                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6510                         }
6511                 }
6512         } else {
6513                 flags = 0;
6514                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6515                 if (ret < 0) {
6516                         fprintf(stderr, "Couldn't calc extent flags\n");
6517                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6518                 }
6519         }
6520
6521         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6522                 if (ri != NULL &&
6523                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6524                     ri->objectid == btrfs_header_owner(buf)) {
6525                         /*
6526                          * Ok we got to this block from it's original owner and
6527                          * we have FULL_BACKREF set.  Relocation can leave
6528                          * converted blocks over so this is altogether possible,
6529                          * however it's not possible if the generation > the
6530                          * last snapshot, so check for this case.
6531                          */
6532                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6533                             btrfs_header_generation(buf) > ri->last_snapshot) {
6534                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6535                                 rec->bad_full_backref = 1;
6536                         }
6537                 }
6538         } else {
6539                 if (ri != NULL &&
6540                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6541                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6542                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6543                         rec->bad_full_backref = 1;
6544                 }
6545         }
6546
6547         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6548                 rec->flag_block_full_backref = 1;
6549                 parent = bytenr;
6550                 owner = 0;
6551         } else {
6552                 rec->flag_block_full_backref = 0;
6553                 parent = 0;
6554                 owner = btrfs_header_owner(buf);
6555         }
6556
6557         ret = check_block(root, extent_cache, buf, flags);
6558         if (ret)
6559                 goto out;
6560
6561         if (btrfs_is_leaf(buf)) {
6562                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6563                 for (i = 0; i < nritems; i++) {
6564                         struct btrfs_file_extent_item *fi;
6565                         btrfs_item_key_to_cpu(buf, &key, i);
6566                         /*
6567                          * Check key type against the leaf owner.
6568                          * Could filter quite a lot of early error if
6569                          * owner is correct
6570                          */
6571                         if (check_type_with_root(btrfs_header_owner(buf),
6572                                                  key.type)) {
6573                                 fprintf(stderr, "ignoring invalid key\n");
6574                                 continue;
6575                         }
6576                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6577                                 process_extent_item(root, extent_cache, buf,
6578                                                     i);
6579                                 continue;
6580                         }
6581                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6582                                 process_extent_item(root, extent_cache, buf,
6583                                                     i);
6584                                 continue;
6585                         }
6586                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6587                                 total_csum_bytes +=
6588                                         btrfs_item_size_nr(buf, i);
6589                                 continue;
6590                         }
6591                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6592                                 process_chunk_item(chunk_cache, &key, buf, i);
6593                                 continue;
6594                         }
6595                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6596                                 process_device_item(dev_cache, &key, buf, i);
6597                                 continue;
6598                         }
6599                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6600                                 process_block_group_item(block_group_cache,
6601                                         &key, buf, i);
6602                                 continue;
6603                         }
6604                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6605                                 process_device_extent_item(dev_extent_cache,
6606                                         &key, buf, i);
6607                                 continue;
6608
6609                         }
6610                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6611 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6612                                 process_extent_ref_v0(extent_cache, buf, i);
6613 #else
6614                                 BUG();
6615 #endif
6616                                 continue;
6617                         }
6618
6619                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6620                                 ret = add_tree_backref(extent_cache,
6621                                                 key.objectid, 0, key.offset, 0);
6622                                 if (ret < 0)
6623                                         error("add_tree_backref failed: %s",
6624                                               strerror(-ret));
6625                                 continue;
6626                         }
6627                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6628                                 ret = add_tree_backref(extent_cache,
6629                                                 key.objectid, key.offset, 0, 0);
6630                                 if (ret < 0)
6631                                         error("add_tree_backref failed: %s",
6632                                               strerror(-ret));
6633                                 continue;
6634                         }
6635                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6636                                 struct btrfs_extent_data_ref *ref;
6637                                 ref = btrfs_item_ptr(buf, i,
6638                                                 struct btrfs_extent_data_ref);
6639                                 add_data_backref(extent_cache,
6640                                         key.objectid, 0,
6641                                         btrfs_extent_data_ref_root(buf, ref),
6642                                         btrfs_extent_data_ref_objectid(buf,
6643                                                                        ref),
6644                                         btrfs_extent_data_ref_offset(buf, ref),
6645                                         btrfs_extent_data_ref_count(buf, ref),
6646                                         0, root->sectorsize);
6647                                 continue;
6648                         }
6649                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6650                                 struct btrfs_shared_data_ref *ref;
6651                                 ref = btrfs_item_ptr(buf, i,
6652                                                 struct btrfs_shared_data_ref);
6653                                 add_data_backref(extent_cache,
6654                                         key.objectid, key.offset, 0, 0, 0,
6655                                         btrfs_shared_data_ref_count(buf, ref),
6656                                         0, root->sectorsize);
6657                                 continue;
6658                         }
6659                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6660                                 struct bad_item *bad;
6661
6662                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6663                                         continue;
6664                                 if (!owner)
6665                                         continue;
6666                                 bad = malloc(sizeof(struct bad_item));
6667                                 if (!bad)
6668                                         continue;
6669                                 INIT_LIST_HEAD(&bad->list);
6670                                 memcpy(&bad->key, &key,
6671                                        sizeof(struct btrfs_key));
6672                                 bad->root_id = owner;
6673                                 list_add_tail(&bad->list, &delete_items);
6674                                 continue;
6675                         }
6676                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6677                                 continue;
6678                         fi = btrfs_item_ptr(buf, i,
6679                                             struct btrfs_file_extent_item);
6680                         if (btrfs_file_extent_type(buf, fi) ==
6681                             BTRFS_FILE_EXTENT_INLINE)
6682                                 continue;
6683                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6684                                 continue;
6685
6686                         data_bytes_allocated +=
6687                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6688                         if (data_bytes_allocated < root->sectorsize) {
6689                                 abort();
6690                         }
6691                         data_bytes_referenced +=
6692                                 btrfs_file_extent_num_bytes(buf, fi);
6693                         add_data_backref(extent_cache,
6694                                 btrfs_file_extent_disk_bytenr(buf, fi),
6695                                 parent, owner, key.objectid, key.offset -
6696                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6697                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6698                 }
6699         } else {
6700                 int level;
6701                 struct btrfs_key first_key;
6702
6703                 first_key.objectid = 0;
6704
6705                 if (nritems > 0)
6706                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6707                 level = btrfs_header_level(buf);
6708                 for (i = 0; i < nritems; i++) {
6709                         struct extent_record tmpl;
6710
6711                         ptr = btrfs_node_blockptr(buf, i);
6712                         size = root->nodesize;
6713                         btrfs_node_key_to_cpu(buf, &key, i);
6714                         if (ri != NULL) {
6715                                 if ((level == ri->drop_level)
6716                                     && is_dropped_key(&key, &ri->drop_key)) {
6717                                         continue;
6718                                 }
6719                         }
6720
6721                         memset(&tmpl, 0, sizeof(tmpl));
6722                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6723                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6724                         tmpl.start = ptr;
6725                         tmpl.nr = size;
6726                         tmpl.refs = 1;
6727                         tmpl.metadata = 1;
6728                         tmpl.max_size = size;
6729                         ret = add_extent_rec(extent_cache, &tmpl);
6730                         if (ret < 0)
6731                                 goto out;
6732
6733                         ret = add_tree_backref(extent_cache, ptr, parent,
6734                                         owner, 1);
6735                         if (ret < 0) {
6736                                 error("add_tree_backref failed: %s",
6737                                       strerror(-ret));
6738                                 continue;
6739                         }
6740
6741                         if (level > 1) {
6742                                 add_pending(nodes, seen, ptr, size);
6743                         } else {
6744                                 add_pending(pending, seen, ptr, size);
6745                         }
6746                 }
6747                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6748                                       nritems) * sizeof(struct btrfs_key_ptr);
6749         }
6750         total_btree_bytes += buf->len;
6751         if (fs_root_objectid(btrfs_header_owner(buf)))
6752                 total_fs_tree_bytes += buf->len;
6753         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6754                 total_extent_tree_bytes += buf->len;
6755         if (!found_old_backref &&
6756             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6757             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6758             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6759                 found_old_backref = 1;
6760 out:
6761         free_extent_buffer(buf);
6762         return ret;
6763 }
6764
6765 static int add_root_to_pending(struct extent_buffer *buf,
6766                                struct cache_tree *extent_cache,
6767                                struct cache_tree *pending,
6768                                struct cache_tree *seen,
6769                                struct cache_tree *nodes,
6770                                u64 objectid)
6771 {
6772         struct extent_record tmpl;
6773         int ret;
6774
6775         if (btrfs_header_level(buf) > 0)
6776                 add_pending(nodes, seen, buf->start, buf->len);
6777         else
6778                 add_pending(pending, seen, buf->start, buf->len);
6779
6780         memset(&tmpl, 0, sizeof(tmpl));
6781         tmpl.start = buf->start;
6782         tmpl.nr = buf->len;
6783         tmpl.is_root = 1;
6784         tmpl.refs = 1;
6785         tmpl.metadata = 1;
6786         tmpl.max_size = buf->len;
6787         add_extent_rec(extent_cache, &tmpl);
6788
6789         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6790             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6791                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6792                                 0, 1);
6793         else
6794                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6795                                 1);
6796         return ret;
6797 }
6798
6799 /* as we fix the tree, we might be deleting blocks that
6800  * we're tracking for repair.  This hook makes sure we
6801  * remove any backrefs for blocks as we are fixing them.
6802  */
6803 static int free_extent_hook(struct btrfs_trans_handle *trans,
6804                             struct btrfs_root *root,
6805                             u64 bytenr, u64 num_bytes, u64 parent,
6806                             u64 root_objectid, u64 owner, u64 offset,
6807                             int refs_to_drop)
6808 {
6809         struct extent_record *rec;
6810         struct cache_extent *cache;
6811         int is_data;
6812         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6813
6814         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6815         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6816         if (!cache)
6817                 return 0;
6818
6819         rec = container_of(cache, struct extent_record, cache);
6820         if (is_data) {
6821                 struct data_backref *back;
6822                 back = find_data_backref(rec, parent, root_objectid, owner,
6823                                          offset, 1, bytenr, num_bytes);
6824                 if (!back)
6825                         goto out;
6826                 if (back->node.found_ref) {
6827                         back->found_ref -= refs_to_drop;
6828                         if (rec->refs)
6829                                 rec->refs -= refs_to_drop;
6830                 }
6831                 if (back->node.found_extent_tree) {
6832                         back->num_refs -= refs_to_drop;
6833                         if (rec->extent_item_refs)
6834                                 rec->extent_item_refs -= refs_to_drop;
6835                 }
6836                 if (back->found_ref == 0)
6837                         back->node.found_ref = 0;
6838                 if (back->num_refs == 0)
6839                         back->node.found_extent_tree = 0;
6840
6841                 if (!back->node.found_extent_tree && back->node.found_ref) {
6842                         list_del(&back->node.list);
6843                         free(back);
6844                 }
6845         } else {
6846                 struct tree_backref *back;
6847                 back = find_tree_backref(rec, parent, root_objectid);
6848                 if (!back)
6849                         goto out;
6850                 if (back->node.found_ref) {
6851                         if (rec->refs)
6852                                 rec->refs--;
6853                         back->node.found_ref = 0;
6854                 }
6855                 if (back->node.found_extent_tree) {
6856                         if (rec->extent_item_refs)
6857                                 rec->extent_item_refs--;
6858                         back->node.found_extent_tree = 0;
6859                 }
6860                 if (!back->node.found_extent_tree && back->node.found_ref) {
6861                         list_del(&back->node.list);
6862                         free(back);
6863                 }
6864         }
6865         maybe_free_extent_rec(extent_cache, rec);
6866 out:
6867         return 0;
6868 }
6869
6870 static int delete_extent_records(struct btrfs_trans_handle *trans,
6871                                  struct btrfs_root *root,
6872                                  struct btrfs_path *path,
6873                                  u64 bytenr, u64 new_len)
6874 {
6875         struct btrfs_key key;
6876         struct btrfs_key found_key;
6877         struct extent_buffer *leaf;
6878         int ret;
6879         int slot;
6880
6881
6882         key.objectid = bytenr;
6883         key.type = (u8)-1;
6884         key.offset = (u64)-1;
6885
6886         while(1) {
6887                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6888                                         &key, path, 0, 1);
6889                 if (ret < 0)
6890                         break;
6891
6892                 if (ret > 0) {
6893                         ret = 0;
6894                         if (path->slots[0] == 0)
6895                                 break;
6896                         path->slots[0]--;
6897                 }
6898                 ret = 0;
6899
6900                 leaf = path->nodes[0];
6901                 slot = path->slots[0];
6902
6903                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6904                 if (found_key.objectid != bytenr)
6905                         break;
6906
6907                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6908                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6909                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6910                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6911                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6912                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6913                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6914                         btrfs_release_path(path);
6915                         if (found_key.type == 0) {
6916                                 if (found_key.offset == 0)
6917                                         break;
6918                                 key.offset = found_key.offset - 1;
6919                                 key.type = found_key.type;
6920                         }
6921                         key.type = found_key.type - 1;
6922                         key.offset = (u64)-1;
6923                         continue;
6924                 }
6925
6926                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6927                         found_key.objectid, found_key.type, found_key.offset);
6928
6929                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6930                 if (ret)
6931                         break;
6932                 btrfs_release_path(path);
6933
6934                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6935                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6936                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6937                                 found_key.offset : root->nodesize;
6938
6939                         ret = btrfs_update_block_group(trans, root, bytenr,
6940                                                        bytes, 0, 0);
6941                         if (ret)
6942                                 break;
6943                 }
6944         }
6945
6946         btrfs_release_path(path);
6947         return ret;
6948 }
6949
6950 /*
6951  * for a single backref, this will allocate a new extent
6952  * and add the backref to it.
6953  */
6954 static int record_extent(struct btrfs_trans_handle *trans,
6955                          struct btrfs_fs_info *info,
6956                          struct btrfs_path *path,
6957                          struct extent_record *rec,
6958                          struct extent_backref *back,
6959                          int allocated, u64 flags)
6960 {
6961         int ret;
6962         struct btrfs_root *extent_root = info->extent_root;
6963         struct extent_buffer *leaf;
6964         struct btrfs_key ins_key;
6965         struct btrfs_extent_item *ei;
6966         struct data_backref *dback;
6967         struct btrfs_tree_block_info *bi;
6968
6969         if (!back->is_data)
6970                 rec->max_size = max_t(u64, rec->max_size,
6971                                     info->extent_root->nodesize);
6972
6973         if (!allocated) {
6974                 u32 item_size = sizeof(*ei);
6975
6976                 if (!back->is_data)
6977                         item_size += sizeof(*bi);
6978
6979                 ins_key.objectid = rec->start;
6980                 ins_key.offset = rec->max_size;
6981                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6982
6983                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6984                                         &ins_key, item_size);
6985                 if (ret)
6986                         goto fail;
6987
6988                 leaf = path->nodes[0];
6989                 ei = btrfs_item_ptr(leaf, path->slots[0],
6990                                     struct btrfs_extent_item);
6991
6992                 btrfs_set_extent_refs(leaf, ei, 0);
6993                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6994
6995                 if (back->is_data) {
6996                         btrfs_set_extent_flags(leaf, ei,
6997                                                BTRFS_EXTENT_FLAG_DATA);
6998                 } else {
6999                         struct btrfs_disk_key copy_key;;
7000
7001                         bi = (struct btrfs_tree_block_info *)(ei + 1);
7002                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
7003                                              sizeof(*bi));
7004
7005                         btrfs_set_disk_key_objectid(&copy_key,
7006                                                     rec->info_objectid);
7007                         btrfs_set_disk_key_type(&copy_key, 0);
7008                         btrfs_set_disk_key_offset(&copy_key, 0);
7009
7010                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7011                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
7012
7013                         btrfs_set_extent_flags(leaf, ei,
7014                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7015                 }
7016
7017                 btrfs_mark_buffer_dirty(leaf);
7018                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7019                                                rec->max_size, 1, 0);
7020                 if (ret)
7021                         goto fail;
7022                 btrfs_release_path(path);
7023         }
7024
7025         if (back->is_data) {
7026                 u64 parent;
7027                 int i;
7028
7029                 dback = to_data_backref(back);
7030                 if (back->full_backref)
7031                         parent = dback->parent;
7032                 else
7033                         parent = 0;
7034
7035                 for (i = 0; i < dback->found_ref; i++) {
7036                         /* if parent != 0, we're doing a full backref
7037                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7038                          * just makes the backref allocator create a data
7039                          * backref
7040                          */
7041                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
7042                                                    rec->start, rec->max_size,
7043                                                    parent,
7044                                                    dback->root,
7045                                                    parent ?
7046                                                    BTRFS_FIRST_FREE_OBJECTID :
7047                                                    dback->owner,
7048                                                    dback->offset);
7049                         if (ret)
7050                                 break;
7051                 }
7052                 fprintf(stderr, "adding new data backref"
7053                                 " on %llu %s %llu owner %llu"
7054                                 " offset %llu found %d\n",
7055                                 (unsigned long long)rec->start,
7056                                 back->full_backref ?
7057                                 "parent" : "root",
7058                                 back->full_backref ?
7059                                 (unsigned long long)parent :
7060                                 (unsigned long long)dback->root,
7061                                 (unsigned long long)dback->owner,
7062                                 (unsigned long long)dback->offset,
7063                                 dback->found_ref);
7064         } else {
7065                 u64 parent;
7066                 struct tree_backref *tback;
7067
7068                 tback = to_tree_backref(back);
7069                 if (back->full_backref)
7070                         parent = tback->parent;
7071                 else
7072                         parent = 0;
7073
7074                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7075                                            rec->start, rec->max_size,
7076                                            parent, tback->root, 0, 0);
7077                 fprintf(stderr, "adding new tree backref on "
7078                         "start %llu len %llu parent %llu root %llu\n",
7079                         rec->start, rec->max_size, parent, tback->root);
7080         }
7081 fail:
7082         btrfs_release_path(path);
7083         return ret;
7084 }
7085
7086 static struct extent_entry *find_entry(struct list_head *entries,
7087                                        u64 bytenr, u64 bytes)
7088 {
7089         struct extent_entry *entry = NULL;
7090
7091         list_for_each_entry(entry, entries, list) {
7092                 if (entry->bytenr == bytenr && entry->bytes == bytes)
7093                         return entry;
7094         }
7095
7096         return NULL;
7097 }
7098
7099 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7100 {
7101         struct extent_entry *entry, *best = NULL, *prev = NULL;
7102
7103         list_for_each_entry(entry, entries, list) {
7104                 /*
7105                  * If there are as many broken entries as entries then we know
7106                  * not to trust this particular entry.
7107                  */
7108                 if (entry->broken == entry->count)
7109                         continue;
7110
7111                 /*
7112                  * Special case, when there are only two entries and 'best' is
7113                  * the first one
7114                  */
7115                 if (!prev) {
7116                         best = entry;
7117                         prev = entry;
7118                         continue;
7119                 }
7120
7121                 /*
7122                  * If our current entry == best then we can't be sure our best
7123                  * is really the best, so we need to keep searching.
7124                  */
7125                 if (best && best->count == entry->count) {
7126                         prev = entry;
7127                         best = NULL;
7128                         continue;
7129                 }
7130
7131                 /* Prev == entry, not good enough, have to keep searching */
7132                 if (!prev->broken && prev->count == entry->count)
7133                         continue;
7134
7135                 if (!best)
7136                         best = (prev->count > entry->count) ? prev : entry;
7137                 else if (best->count < entry->count)
7138                         best = entry;
7139                 prev = entry;
7140         }
7141
7142         return best;
7143 }
7144
7145 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7146                       struct data_backref *dback, struct extent_entry *entry)
7147 {
7148         struct btrfs_trans_handle *trans;
7149         struct btrfs_root *root;
7150         struct btrfs_file_extent_item *fi;
7151         struct extent_buffer *leaf;
7152         struct btrfs_key key;
7153         u64 bytenr, bytes;
7154         int ret, err;
7155
7156         key.objectid = dback->root;
7157         key.type = BTRFS_ROOT_ITEM_KEY;
7158         key.offset = (u64)-1;
7159         root = btrfs_read_fs_root(info, &key);
7160         if (IS_ERR(root)) {
7161                 fprintf(stderr, "Couldn't find root for our ref\n");
7162                 return -EINVAL;
7163         }
7164
7165         /*
7166          * The backref points to the original offset of the extent if it was
7167          * split, so we need to search down to the offset we have and then walk
7168          * forward until we find the backref we're looking for.
7169          */
7170         key.objectid = dback->owner;
7171         key.type = BTRFS_EXTENT_DATA_KEY;
7172         key.offset = dback->offset;
7173         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7174         if (ret < 0) {
7175                 fprintf(stderr, "Error looking up ref %d\n", ret);
7176                 return ret;
7177         }
7178
7179         while (1) {
7180                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7181                         ret = btrfs_next_leaf(root, path);
7182                         if (ret) {
7183                                 fprintf(stderr, "Couldn't find our ref, next\n");
7184                                 return -EINVAL;
7185                         }
7186                 }
7187                 leaf = path->nodes[0];
7188                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7189                 if (key.objectid != dback->owner ||
7190                     key.type != BTRFS_EXTENT_DATA_KEY) {
7191                         fprintf(stderr, "Couldn't find our ref, search\n");
7192                         return -EINVAL;
7193                 }
7194                 fi = btrfs_item_ptr(leaf, path->slots[0],
7195                                     struct btrfs_file_extent_item);
7196                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7197                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7198
7199                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7200                         break;
7201                 path->slots[0]++;
7202         }
7203
7204         btrfs_release_path(path);
7205
7206         trans = btrfs_start_transaction(root, 1);
7207         if (IS_ERR(trans))
7208                 return PTR_ERR(trans);
7209
7210         /*
7211          * Ok we have the key of the file extent we want to fix, now we can cow
7212          * down to the thing and fix it.
7213          */
7214         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7215         if (ret < 0) {
7216                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7217                         key.objectid, key.type, key.offset, ret);
7218                 goto out;
7219         }
7220         if (ret > 0) {
7221                 fprintf(stderr, "Well that's odd, we just found this key "
7222                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7223                         key.offset);
7224                 ret = -EINVAL;
7225                 goto out;
7226         }
7227         leaf = path->nodes[0];
7228         fi = btrfs_item_ptr(leaf, path->slots[0],
7229                             struct btrfs_file_extent_item);
7230
7231         if (btrfs_file_extent_compression(leaf, fi) &&
7232             dback->disk_bytenr != entry->bytenr) {
7233                 fprintf(stderr, "Ref doesn't match the record start and is "
7234                         "compressed, please take a btrfs-image of this file "
7235                         "system and send it to a btrfs developer so they can "
7236                         "complete this functionality for bytenr %Lu\n",
7237                         dback->disk_bytenr);
7238                 ret = -EINVAL;
7239                 goto out;
7240         }
7241
7242         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7243                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7244         } else if (dback->disk_bytenr > entry->bytenr) {
7245                 u64 off_diff, offset;
7246
7247                 off_diff = dback->disk_bytenr - entry->bytenr;
7248                 offset = btrfs_file_extent_offset(leaf, fi);
7249                 if (dback->disk_bytenr + offset +
7250                     btrfs_file_extent_num_bytes(leaf, fi) >
7251                     entry->bytenr + entry->bytes) {
7252                         fprintf(stderr, "Ref is past the entry end, please "
7253                                 "take a btrfs-image of this file system and "
7254                                 "send it to a btrfs developer, ref %Lu\n",
7255                                 dback->disk_bytenr);
7256                         ret = -EINVAL;
7257                         goto out;
7258                 }
7259                 offset += off_diff;
7260                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7261                 btrfs_set_file_extent_offset(leaf, fi, offset);
7262         } else if (dback->disk_bytenr < entry->bytenr) {
7263                 u64 offset;
7264
7265                 offset = btrfs_file_extent_offset(leaf, fi);
7266                 if (dback->disk_bytenr + offset < entry->bytenr) {
7267                         fprintf(stderr, "Ref is before the entry start, please"
7268                                 " take a btrfs-image of this file system and "
7269                                 "send it to a btrfs developer, ref %Lu\n",
7270                                 dback->disk_bytenr);
7271                         ret = -EINVAL;
7272                         goto out;
7273                 }
7274
7275                 offset += dback->disk_bytenr;
7276                 offset -= entry->bytenr;
7277                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7278                 btrfs_set_file_extent_offset(leaf, fi, offset);
7279         }
7280
7281         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7282
7283         /*
7284          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7285          * only do this if we aren't using compression, otherwise it's a
7286          * trickier case.
7287          */
7288         if (!btrfs_file_extent_compression(leaf, fi))
7289                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7290         else
7291                 printf("ram bytes may be wrong?\n");
7292         btrfs_mark_buffer_dirty(leaf);
7293 out:
7294         err = btrfs_commit_transaction(trans, root);
7295         btrfs_release_path(path);
7296         return ret ? ret : err;
7297 }
7298
7299 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7300                            struct extent_record *rec)
7301 {
7302         struct extent_backref *back;
7303         struct data_backref *dback;
7304         struct extent_entry *entry, *best = NULL;
7305         LIST_HEAD(entries);
7306         int nr_entries = 0;
7307         int broken_entries = 0;
7308         int ret = 0;
7309         short mismatch = 0;
7310
7311         /*
7312          * Metadata is easy and the backrefs should always agree on bytenr and
7313          * size, if not we've got bigger issues.
7314          */
7315         if (rec->metadata)
7316                 return 0;
7317
7318         list_for_each_entry(back, &rec->backrefs, list) {
7319                 if (back->full_backref || !back->is_data)
7320                         continue;
7321
7322                 dback = to_data_backref(back);
7323
7324                 /*
7325                  * We only pay attention to backrefs that we found a real
7326                  * backref for.
7327                  */
7328                 if (dback->found_ref == 0)
7329                         continue;
7330
7331                 /*
7332                  * For now we only catch when the bytes don't match, not the
7333                  * bytenr.  We can easily do this at the same time, but I want
7334                  * to have a fs image to test on before we just add repair
7335                  * functionality willy-nilly so we know we won't screw up the
7336                  * repair.
7337                  */
7338
7339                 entry = find_entry(&entries, dback->disk_bytenr,
7340                                    dback->bytes);
7341                 if (!entry) {
7342                         entry = malloc(sizeof(struct extent_entry));
7343                         if (!entry) {
7344                                 ret = -ENOMEM;
7345                                 goto out;
7346                         }
7347                         memset(entry, 0, sizeof(*entry));
7348                         entry->bytenr = dback->disk_bytenr;
7349                         entry->bytes = dback->bytes;
7350                         list_add_tail(&entry->list, &entries);
7351                         nr_entries++;
7352                 }
7353
7354                 /*
7355                  * If we only have on entry we may think the entries agree when
7356                  * in reality they don't so we have to do some extra checking.
7357                  */
7358                 if (dback->disk_bytenr != rec->start ||
7359                     dback->bytes != rec->nr || back->broken)
7360                         mismatch = 1;
7361
7362                 if (back->broken) {
7363                         entry->broken++;
7364                         broken_entries++;
7365                 }
7366
7367                 entry->count++;
7368         }
7369
7370         /* Yay all the backrefs agree, carry on good sir */
7371         if (nr_entries <= 1 && !mismatch)
7372                 goto out;
7373
7374         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7375                 "%Lu\n", rec->start);
7376
7377         /*
7378          * First we want to see if the backrefs can agree amongst themselves who
7379          * is right, so figure out which one of the entries has the highest
7380          * count.
7381          */
7382         best = find_most_right_entry(&entries);
7383
7384         /*
7385          * Ok so we may have an even split between what the backrefs think, so
7386          * this is where we use the extent ref to see what it thinks.
7387          */
7388         if (!best) {
7389                 entry = find_entry(&entries, rec->start, rec->nr);
7390                 if (!entry && (!broken_entries || !rec->found_rec)) {
7391                         fprintf(stderr, "Backrefs don't agree with each other "
7392                                 "and extent record doesn't agree with anybody,"
7393                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7394                                 rec->start, rec->nr);
7395                         ret = -EINVAL;
7396                         goto out;
7397                 } else if (!entry) {
7398                         /*
7399                          * Ok our backrefs were broken, we'll assume this is the
7400                          * correct value and add an entry for this range.
7401                          */
7402                         entry = malloc(sizeof(struct extent_entry));
7403                         if (!entry) {
7404                                 ret = -ENOMEM;
7405                                 goto out;
7406                         }
7407                         memset(entry, 0, sizeof(*entry));
7408                         entry->bytenr = rec->start;
7409                         entry->bytes = rec->nr;
7410                         list_add_tail(&entry->list, &entries);
7411                         nr_entries++;
7412                 }
7413                 entry->count++;
7414                 best = find_most_right_entry(&entries);
7415                 if (!best) {
7416                         fprintf(stderr, "Backrefs and extent record evenly "
7417                                 "split on who is right, this is going to "
7418                                 "require user input to fix bytenr %Lu bytes "
7419                                 "%Lu\n", rec->start, rec->nr);
7420                         ret = -EINVAL;
7421                         goto out;
7422                 }
7423         }
7424
7425         /*
7426          * I don't think this can happen currently as we'll abort() if we catch
7427          * this case higher up, but in case somebody removes that we still can't
7428          * deal with it properly here yet, so just bail out of that's the case.
7429          */
7430         if (best->bytenr != rec->start) {
7431                 fprintf(stderr, "Extent start and backref starts don't match, "
7432                         "please use btrfs-image on this file system and send "
7433                         "it to a btrfs developer so they can make fsck fix "
7434                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7435                         rec->start, rec->nr);
7436                 ret = -EINVAL;
7437                 goto out;
7438         }
7439
7440         /*
7441          * Ok great we all agreed on an extent record, let's go find the real
7442          * references and fix up the ones that don't match.
7443          */
7444         list_for_each_entry(back, &rec->backrefs, list) {
7445                 if (back->full_backref || !back->is_data)
7446                         continue;
7447
7448                 dback = to_data_backref(back);
7449
7450                 /*
7451                  * Still ignoring backrefs that don't have a real ref attached
7452                  * to them.
7453                  */
7454                 if (dback->found_ref == 0)
7455                         continue;
7456
7457                 if (dback->bytes == best->bytes &&
7458                     dback->disk_bytenr == best->bytenr)
7459                         continue;
7460
7461                 ret = repair_ref(info, path, dback, best);
7462                 if (ret)
7463                         goto out;
7464         }
7465
7466         /*
7467          * Ok we messed with the actual refs, which means we need to drop our
7468          * entire cache and go back and rescan.  I know this is a huge pain and
7469          * adds a lot of extra work, but it's the only way to be safe.  Once all
7470          * the backrefs agree we may not need to do anything to the extent
7471          * record itself.
7472          */
7473         ret = -EAGAIN;
7474 out:
7475         while (!list_empty(&entries)) {
7476                 entry = list_entry(entries.next, struct extent_entry, list);
7477                 list_del_init(&entry->list);
7478                 free(entry);
7479         }
7480         return ret;
7481 }
7482
7483 static int process_duplicates(struct btrfs_root *root,
7484                               struct cache_tree *extent_cache,
7485                               struct extent_record *rec)
7486 {
7487         struct extent_record *good, *tmp;
7488         struct cache_extent *cache;
7489         int ret;
7490
7491         /*
7492          * If we found a extent record for this extent then return, or if we
7493          * have more than one duplicate we are likely going to need to delete
7494          * something.
7495          */
7496         if (rec->found_rec || rec->num_duplicates > 1)
7497                 return 0;
7498
7499         /* Shouldn't happen but just in case */
7500         BUG_ON(!rec->num_duplicates);
7501
7502         /*
7503          * So this happens if we end up with a backref that doesn't match the
7504          * actual extent entry.  So either the backref is bad or the extent
7505          * entry is bad.  Either way we want to have the extent_record actually
7506          * reflect what we found in the extent_tree, so we need to take the
7507          * duplicate out and use that as the extent_record since the only way we
7508          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7509          */
7510         remove_cache_extent(extent_cache, &rec->cache);
7511
7512         good = to_extent_record(rec->dups.next);
7513         list_del_init(&good->list);
7514         INIT_LIST_HEAD(&good->backrefs);
7515         INIT_LIST_HEAD(&good->dups);
7516         good->cache.start = good->start;
7517         good->cache.size = good->nr;
7518         good->content_checked = 0;
7519         good->owner_ref_checked = 0;
7520         good->num_duplicates = 0;
7521         good->refs = rec->refs;
7522         list_splice_init(&rec->backrefs, &good->backrefs);
7523         while (1) {
7524                 cache = lookup_cache_extent(extent_cache, good->start,
7525                                             good->nr);
7526                 if (!cache)
7527                         break;
7528                 tmp = container_of(cache, struct extent_record, cache);
7529
7530                 /*
7531                  * If we find another overlapping extent and it's found_rec is
7532                  * set then it's a duplicate and we need to try and delete
7533                  * something.
7534                  */
7535                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7536                         if (list_empty(&good->list))
7537                                 list_add_tail(&good->list,
7538                                               &duplicate_extents);
7539                         good->num_duplicates += tmp->num_duplicates + 1;
7540                         list_splice_init(&tmp->dups, &good->dups);
7541                         list_del_init(&tmp->list);
7542                         list_add_tail(&tmp->list, &good->dups);
7543                         remove_cache_extent(extent_cache, &tmp->cache);
7544                         continue;
7545                 }
7546
7547                 /*
7548                  * Ok we have another non extent item backed extent rec, so lets
7549                  * just add it to this extent and carry on like we did above.
7550                  */
7551                 good->refs += tmp->refs;
7552                 list_splice_init(&tmp->backrefs, &good->backrefs);
7553                 remove_cache_extent(extent_cache, &tmp->cache);
7554                 free(tmp);
7555         }
7556         ret = insert_cache_extent(extent_cache, &good->cache);
7557         BUG_ON(ret);
7558         free(rec);
7559         return good->num_duplicates ? 0 : 1;
7560 }
7561
7562 static int delete_duplicate_records(struct btrfs_root *root,
7563                                     struct extent_record *rec)
7564 {
7565         struct btrfs_trans_handle *trans;
7566         LIST_HEAD(delete_list);
7567         struct btrfs_path path;
7568         struct extent_record *tmp, *good, *n;
7569         int nr_del = 0;
7570         int ret = 0, err;
7571         struct btrfs_key key;
7572
7573         btrfs_init_path(&path);
7574
7575         good = rec;
7576         /* Find the record that covers all of the duplicates. */
7577         list_for_each_entry(tmp, &rec->dups, list) {
7578                 if (good->start < tmp->start)
7579                         continue;
7580                 if (good->nr > tmp->nr)
7581                         continue;
7582
7583                 if (tmp->start + tmp->nr < good->start + good->nr) {
7584                         fprintf(stderr, "Ok we have overlapping extents that "
7585                                 "aren't completely covered by each other, this "
7586                                 "is going to require more careful thought.  "
7587                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7588                                 tmp->start, tmp->nr, good->start, good->nr);
7589                         abort();
7590                 }
7591                 good = tmp;
7592         }
7593
7594         if (good != rec)
7595                 list_add_tail(&rec->list, &delete_list);
7596
7597         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7598                 if (tmp == good)
7599                         continue;
7600                 list_move_tail(&tmp->list, &delete_list);
7601         }
7602
7603         root = root->fs_info->extent_root;
7604         trans = btrfs_start_transaction(root, 1);
7605         if (IS_ERR(trans)) {
7606                 ret = PTR_ERR(trans);
7607                 goto out;
7608         }
7609
7610         list_for_each_entry(tmp, &delete_list, list) {
7611                 if (tmp->found_rec == 0)
7612                         continue;
7613                 key.objectid = tmp->start;
7614                 key.type = BTRFS_EXTENT_ITEM_KEY;
7615                 key.offset = tmp->nr;
7616
7617                 /* Shouldn't happen but just in case */
7618                 if (tmp->metadata) {
7619                         fprintf(stderr, "Well this shouldn't happen, extent "
7620                                 "record overlaps but is metadata? "
7621                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7622                         abort();
7623                 }
7624
7625                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7626                 if (ret) {
7627                         if (ret > 0)
7628                                 ret = -EINVAL;
7629                         break;
7630                 }
7631                 ret = btrfs_del_item(trans, root, &path);
7632                 if (ret)
7633                         break;
7634                 btrfs_release_path(&path);
7635                 nr_del++;
7636         }
7637         err = btrfs_commit_transaction(trans, root);
7638         if (err && !ret)
7639                 ret = err;
7640 out:
7641         while (!list_empty(&delete_list)) {
7642                 tmp = to_extent_record(delete_list.next);
7643                 list_del_init(&tmp->list);
7644                 if (tmp == rec)
7645                         continue;
7646                 free(tmp);
7647         }
7648
7649         while (!list_empty(&rec->dups)) {
7650                 tmp = to_extent_record(rec->dups.next);
7651                 list_del_init(&tmp->list);
7652                 free(tmp);
7653         }
7654
7655         btrfs_release_path(&path);
7656
7657         if (!ret && !nr_del)
7658                 rec->num_duplicates = 0;
7659
7660         return ret ? ret : nr_del;
7661 }
7662
7663 static int find_possible_backrefs(struct btrfs_fs_info *info,
7664                                   struct btrfs_path *path,
7665                                   struct cache_tree *extent_cache,
7666                                   struct extent_record *rec)
7667 {
7668         struct btrfs_root *root;
7669         struct extent_backref *back;
7670         struct data_backref *dback;
7671         struct cache_extent *cache;
7672         struct btrfs_file_extent_item *fi;
7673         struct btrfs_key key;
7674         u64 bytenr, bytes;
7675         int ret;
7676
7677         list_for_each_entry(back, &rec->backrefs, list) {
7678                 /* Don't care about full backrefs (poor unloved backrefs) */
7679                 if (back->full_backref || !back->is_data)
7680                         continue;
7681
7682                 dback = to_data_backref(back);
7683
7684                 /* We found this one, we don't need to do a lookup */
7685                 if (dback->found_ref)
7686                         continue;
7687
7688                 key.objectid = dback->root;
7689                 key.type = BTRFS_ROOT_ITEM_KEY;
7690                 key.offset = (u64)-1;
7691
7692                 root = btrfs_read_fs_root(info, &key);
7693
7694                 /* No root, definitely a bad ref, skip */
7695                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7696                         continue;
7697                 /* Other err, exit */
7698                 if (IS_ERR(root))
7699                         return PTR_ERR(root);
7700
7701                 key.objectid = dback->owner;
7702                 key.type = BTRFS_EXTENT_DATA_KEY;
7703                 key.offset = dback->offset;
7704                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7705                 if (ret) {
7706                         btrfs_release_path(path);
7707                         if (ret < 0)
7708                                 return ret;
7709                         /* Didn't find it, we can carry on */
7710                         ret = 0;
7711                         continue;
7712                 }
7713
7714                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7715                                     struct btrfs_file_extent_item);
7716                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7717                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7718                 btrfs_release_path(path);
7719                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7720                 if (cache) {
7721                         struct extent_record *tmp;
7722                         tmp = container_of(cache, struct extent_record, cache);
7723
7724                         /*
7725                          * If we found an extent record for the bytenr for this
7726                          * particular backref then we can't add it to our
7727                          * current extent record.  We only want to add backrefs
7728                          * that don't have a corresponding extent item in the
7729                          * extent tree since they likely belong to this record
7730                          * and we need to fix it if it doesn't match bytenrs.
7731                          */
7732                         if  (tmp->found_rec)
7733                                 continue;
7734                 }
7735
7736                 dback->found_ref += 1;
7737                 dback->disk_bytenr = bytenr;
7738                 dback->bytes = bytes;
7739
7740                 /*
7741                  * Set this so the verify backref code knows not to trust the
7742                  * values in this backref.
7743                  */
7744                 back->broken = 1;
7745         }
7746
7747         return 0;
7748 }
7749
7750 /*
7751  * Record orphan data ref into corresponding root.
7752  *
7753  * Return 0 if the extent item contains data ref and recorded.
7754  * Return 1 if the extent item contains no useful data ref
7755  *   On that case, it may contains only shared_dataref or metadata backref
7756  *   or the file extent exists(this should be handled by the extent bytenr
7757  *   recovery routine)
7758  * Return <0 if something goes wrong.
7759  */
7760 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7761                                       struct extent_record *rec)
7762 {
7763         struct btrfs_key key;
7764         struct btrfs_root *dest_root;
7765         struct extent_backref *back;
7766         struct data_backref *dback;
7767         struct orphan_data_extent *orphan;
7768         struct btrfs_path path;
7769         int recorded_data_ref = 0;
7770         int ret = 0;
7771
7772         if (rec->metadata)
7773                 return 1;
7774         btrfs_init_path(&path);
7775         list_for_each_entry(back, &rec->backrefs, list) {
7776                 if (back->full_backref || !back->is_data ||
7777                     !back->found_extent_tree)
7778                         continue;
7779                 dback = to_data_backref(back);
7780                 if (dback->found_ref)
7781                         continue;
7782                 key.objectid = dback->root;
7783                 key.type = BTRFS_ROOT_ITEM_KEY;
7784                 key.offset = (u64)-1;
7785
7786                 dest_root = btrfs_read_fs_root(fs_info, &key);
7787
7788                 /* For non-exist root we just skip it */
7789                 if (IS_ERR(dest_root) || !dest_root)
7790                         continue;
7791
7792                 key.objectid = dback->owner;
7793                 key.type = BTRFS_EXTENT_DATA_KEY;
7794                 key.offset = dback->offset;
7795
7796                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7797                 btrfs_release_path(&path);
7798                 /*
7799                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7800                  * we need to record it for inode/file extent rebuild.
7801                  * For ret > 0, we record it only for file extent rebuild.
7802                  * For ret == 0, the file extent exists but only bytenr
7803                  * mismatch, let the original bytenr fix routine to handle,
7804                  * don't record it.
7805                  */
7806                 if (ret == 0)
7807                         continue;
7808                 ret = 0;
7809                 orphan = malloc(sizeof(*orphan));
7810                 if (!orphan) {
7811                         ret = -ENOMEM;
7812                         goto out;
7813                 }
7814                 INIT_LIST_HEAD(&orphan->list);
7815                 orphan->root = dback->root;
7816                 orphan->objectid = dback->owner;
7817                 orphan->offset = dback->offset;
7818                 orphan->disk_bytenr = rec->cache.start;
7819                 orphan->disk_len = rec->cache.size;
7820                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7821                 recorded_data_ref = 1;
7822         }
7823 out:
7824         btrfs_release_path(&path);
7825         if (!ret)
7826                 return !recorded_data_ref;
7827         else
7828                 return ret;
7829 }
7830
7831 /*
7832  * when an incorrect extent item is found, this will delete
7833  * all of the existing entries for it and recreate them
7834  * based on what the tree scan found.
7835  */
7836 static int fixup_extent_refs(struct btrfs_fs_info *info,
7837                              struct cache_tree *extent_cache,
7838                              struct extent_record *rec)
7839 {
7840         struct btrfs_trans_handle *trans = NULL;
7841         int ret;
7842         struct btrfs_path path;
7843         struct list_head *cur = rec->backrefs.next;
7844         struct cache_extent *cache;
7845         struct extent_backref *back;
7846         int allocated = 0;
7847         u64 flags = 0;
7848
7849         if (rec->flag_block_full_backref)
7850                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7851
7852         btrfs_init_path(&path);
7853         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7854                 /*
7855                  * Sometimes the backrefs themselves are so broken they don't
7856                  * get attached to any meaningful rec, so first go back and
7857                  * check any of our backrefs that we couldn't find and throw
7858                  * them into the list if we find the backref so that
7859                  * verify_backrefs can figure out what to do.
7860                  */
7861                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7862                 if (ret < 0)
7863                         goto out;
7864         }
7865
7866         /* step one, make sure all of the backrefs agree */
7867         ret = verify_backrefs(info, &path, rec);
7868         if (ret < 0)
7869                 goto out;
7870
7871         trans = btrfs_start_transaction(info->extent_root, 1);
7872         if (IS_ERR(trans)) {
7873                 ret = PTR_ERR(trans);
7874                 goto out;
7875         }
7876
7877         /* step two, delete all the existing records */
7878         ret = delete_extent_records(trans, info->extent_root, &path,
7879                                     rec->start, rec->max_size);
7880
7881         if (ret < 0)
7882                 goto out;
7883
7884         /* was this block corrupt?  If so, don't add references to it */
7885         cache = lookup_cache_extent(info->corrupt_blocks,
7886                                     rec->start, rec->max_size);
7887         if (cache) {
7888                 ret = 0;
7889                 goto out;
7890         }
7891
7892         /* step three, recreate all the refs we did find */
7893         while(cur != &rec->backrefs) {
7894                 back = to_extent_backref(cur);
7895                 cur = cur->next;
7896
7897                 /*
7898                  * if we didn't find any references, don't create a
7899                  * new extent record
7900                  */
7901                 if (!back->found_ref)
7902                         continue;
7903
7904                 rec->bad_full_backref = 0;
7905                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
7906                 allocated = 1;
7907
7908                 if (ret)
7909                         goto out;
7910         }
7911 out:
7912         if (trans) {
7913                 int err = btrfs_commit_transaction(trans, info->extent_root);
7914                 if (!ret)
7915                         ret = err;
7916         }
7917
7918         btrfs_release_path(&path);
7919         return ret;
7920 }
7921
7922 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7923                               struct extent_record *rec)
7924 {
7925         struct btrfs_trans_handle *trans;
7926         struct btrfs_root *root = fs_info->extent_root;
7927         struct btrfs_path path;
7928         struct btrfs_extent_item *ei;
7929         struct btrfs_key key;
7930         u64 flags;
7931         int ret = 0;
7932
7933         key.objectid = rec->start;
7934         if (rec->metadata) {
7935                 key.type = BTRFS_METADATA_ITEM_KEY;
7936                 key.offset = rec->info_level;
7937         } else {
7938                 key.type = BTRFS_EXTENT_ITEM_KEY;
7939                 key.offset = rec->max_size;
7940         }
7941
7942         trans = btrfs_start_transaction(root, 0);
7943         if (IS_ERR(trans))
7944                 return PTR_ERR(trans);
7945
7946         btrfs_init_path(&path);
7947         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7948         if (ret < 0) {
7949                 btrfs_release_path(&path);
7950                 btrfs_commit_transaction(trans, root);
7951                 return ret;
7952         } else if (ret) {
7953                 fprintf(stderr, "Didn't find extent for %llu\n",
7954                         (unsigned long long)rec->start);
7955                 btrfs_release_path(&path);
7956                 btrfs_commit_transaction(trans, root);
7957                 return -ENOENT;
7958         }
7959
7960         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7961                             struct btrfs_extent_item);
7962         flags = btrfs_extent_flags(path.nodes[0], ei);
7963         if (rec->flag_block_full_backref) {
7964                 fprintf(stderr, "setting full backref on %llu\n",
7965                         (unsigned long long)key.objectid);
7966                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7967         } else {
7968                 fprintf(stderr, "clearing full backref on %llu\n",
7969                         (unsigned long long)key.objectid);
7970                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7971         }
7972         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7973         btrfs_mark_buffer_dirty(path.nodes[0]);
7974         btrfs_release_path(&path);
7975         return btrfs_commit_transaction(trans, root);
7976 }
7977
7978 /* right now we only prune from the extent allocation tree */
7979 static int prune_one_block(struct btrfs_trans_handle *trans,
7980                            struct btrfs_fs_info *info,
7981                            struct btrfs_corrupt_block *corrupt)
7982 {
7983         int ret;
7984         struct btrfs_path path;
7985         struct extent_buffer *eb;
7986         u64 found;
7987         int slot;
7988         int nritems;
7989         int level = corrupt->level + 1;
7990
7991         btrfs_init_path(&path);
7992 again:
7993         /* we want to stop at the parent to our busted block */
7994         path.lowest_level = level;
7995
7996         ret = btrfs_search_slot(trans, info->extent_root,
7997                                 &corrupt->key, &path, -1, 1);
7998
7999         if (ret < 0)
8000                 goto out;
8001
8002         eb = path.nodes[level];
8003         if (!eb) {
8004                 ret = -ENOENT;
8005                 goto out;
8006         }
8007
8008         /*
8009          * hopefully the search gave us the block we want to prune,
8010          * lets try that first
8011          */
8012         slot = path.slots[level];
8013         found =  btrfs_node_blockptr(eb, slot);
8014         if (found == corrupt->cache.start)
8015                 goto del_ptr;
8016
8017         nritems = btrfs_header_nritems(eb);
8018
8019         /* the search failed, lets scan this node and hope we find it */
8020         for (slot = 0; slot < nritems; slot++) {
8021                 found =  btrfs_node_blockptr(eb, slot);
8022                 if (found == corrupt->cache.start)
8023                         goto del_ptr;
8024         }
8025         /*
8026          * we couldn't find the bad block.  TODO, search all the nodes for pointers
8027          * to this block
8028          */
8029         if (eb == info->extent_root->node) {
8030                 ret = -ENOENT;
8031                 goto out;
8032         } else {
8033                 level++;
8034                 btrfs_release_path(&path);
8035                 goto again;
8036         }
8037
8038 del_ptr:
8039         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8040         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8041
8042 out:
8043         btrfs_release_path(&path);
8044         return ret;
8045 }
8046
8047 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8048 {
8049         struct btrfs_trans_handle *trans = NULL;
8050         struct cache_extent *cache;
8051         struct btrfs_corrupt_block *corrupt;
8052
8053         while (1) {
8054                 cache = search_cache_extent(info->corrupt_blocks, 0);
8055                 if (!cache)
8056                         break;
8057                 if (!trans) {
8058                         trans = btrfs_start_transaction(info->extent_root, 1);
8059                         if (IS_ERR(trans))
8060                                 return PTR_ERR(trans);
8061                 }
8062                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8063                 prune_one_block(trans, info, corrupt);
8064                 remove_cache_extent(info->corrupt_blocks, cache);
8065         }
8066         if (trans)
8067                 return btrfs_commit_transaction(trans, info->extent_root);
8068         return 0;
8069 }
8070
8071 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8072 {
8073         struct btrfs_block_group_cache *cache;
8074         u64 start, end;
8075         int ret;
8076
8077         while (1) {
8078                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8079                                             &start, &end, EXTENT_DIRTY);
8080                 if (ret)
8081                         break;
8082                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8083                                    GFP_NOFS);
8084         }
8085
8086         start = 0;
8087         while (1) {
8088                 cache = btrfs_lookup_first_block_group(fs_info, start);
8089                 if (!cache)
8090                         break;
8091                 if (cache->cached)
8092                         cache->cached = 0;
8093                 start = cache->key.objectid + cache->key.offset;
8094         }
8095 }
8096
8097 static int check_extent_refs(struct btrfs_root *root,
8098                              struct cache_tree *extent_cache)
8099 {
8100         struct extent_record *rec;
8101         struct cache_extent *cache;
8102         int err = 0;
8103         int ret = 0;
8104         int fixed = 0;
8105         int had_dups = 0;
8106         int recorded = 0;
8107
8108         if (repair) {
8109                 /*
8110                  * if we're doing a repair, we have to make sure
8111                  * we don't allocate from the problem extents.
8112                  * In the worst case, this will be all the
8113                  * extents in the FS
8114                  */
8115                 cache = search_cache_extent(extent_cache, 0);
8116                 while(cache) {
8117                         rec = container_of(cache, struct extent_record, cache);
8118                         set_extent_dirty(root->fs_info->excluded_extents,
8119                                          rec->start,
8120                                          rec->start + rec->max_size - 1,
8121                                          GFP_NOFS);
8122                         cache = next_cache_extent(cache);
8123                 }
8124
8125                 /* pin down all the corrupted blocks too */
8126                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8127                 while(cache) {
8128                         set_extent_dirty(root->fs_info->excluded_extents,
8129                                          cache->start,
8130                                          cache->start + cache->size - 1,
8131                                          GFP_NOFS);
8132                         cache = next_cache_extent(cache);
8133                 }
8134                 prune_corrupt_blocks(root->fs_info);
8135                 reset_cached_block_groups(root->fs_info);
8136         }
8137
8138         reset_cached_block_groups(root->fs_info);
8139
8140         /*
8141          * We need to delete any duplicate entries we find first otherwise we
8142          * could mess up the extent tree when we have backrefs that actually
8143          * belong to a different extent item and not the weird duplicate one.
8144          */
8145         while (repair && !list_empty(&duplicate_extents)) {
8146                 rec = to_extent_record(duplicate_extents.next);
8147                 list_del_init(&rec->list);
8148
8149                 /* Sometimes we can find a backref before we find an actual
8150                  * extent, so we need to process it a little bit to see if there
8151                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8152                  * if this is a backref screwup.  If we need to delete stuff
8153                  * process_duplicates() will return 0, otherwise it will return
8154                  * 1 and we
8155                  */
8156                 if (process_duplicates(root, extent_cache, rec))
8157                         continue;
8158                 ret = delete_duplicate_records(root, rec);
8159                 if (ret < 0)
8160                         return ret;
8161                 /*
8162                  * delete_duplicate_records will return the number of entries
8163                  * deleted, so if it's greater than 0 then we know we actually
8164                  * did something and we need to remove.
8165                  */
8166                 if (ret)
8167                         had_dups = 1;
8168         }
8169
8170         if (had_dups)
8171                 return -EAGAIN;
8172
8173         while(1) {
8174                 int cur_err = 0;
8175
8176                 fixed = 0;
8177                 recorded = 0;
8178                 cache = search_cache_extent(extent_cache, 0);
8179                 if (!cache)
8180                         break;
8181                 rec = container_of(cache, struct extent_record, cache);
8182                 if (rec->num_duplicates) {
8183                         fprintf(stderr, "extent item %llu has multiple extent "
8184                                 "items\n", (unsigned long long)rec->start);
8185                         err = 1;
8186                         cur_err = 1;
8187                 }
8188
8189                 if (rec->refs != rec->extent_item_refs) {
8190                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8191                                 (unsigned long long)rec->start,
8192                                 (unsigned long long)rec->nr);
8193                         fprintf(stderr, "extent item %llu, found %llu\n",
8194                                 (unsigned long long)rec->extent_item_refs,
8195                                 (unsigned long long)rec->refs);
8196                         ret = record_orphan_data_extents(root->fs_info, rec);
8197                         if (ret < 0)
8198                                 goto repair_abort;
8199                         if (ret == 0) {
8200                                 recorded = 1;
8201                         } else {
8202                                 /*
8203                                  * we can't use the extent to repair file
8204                                  * extent, let the fallback method handle it.
8205                                  */
8206                                 if (!fixed && repair) {
8207                                         ret = fixup_extent_refs(
8208                                                         root->fs_info,
8209                                                         extent_cache, rec);
8210                                         if (ret)
8211                                                 goto repair_abort;
8212                                         fixed = 1;
8213                                 }
8214                         }
8215                         err = 1;
8216                         cur_err = 1;
8217                 }
8218                 if (all_backpointers_checked(rec, 1)) {
8219                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8220                                 (unsigned long long)rec->start,
8221                                 (unsigned long long)rec->nr);
8222
8223                         if (!fixed && !recorded && repair) {
8224                                 ret = fixup_extent_refs(root->fs_info,
8225                                                         extent_cache, rec);
8226                                 if (ret)
8227                                         goto repair_abort;
8228                                 fixed = 1;
8229                         }
8230                         cur_err = 1;
8231                         err = 1;
8232                 }
8233                 if (!rec->owner_ref_checked) {
8234                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8235                                 (unsigned long long)rec->start,
8236                                 (unsigned long long)rec->nr);
8237                         if (!fixed && !recorded && repair) {
8238                                 ret = fixup_extent_refs(root->fs_info,
8239                                                         extent_cache, rec);
8240                                 if (ret)
8241                                         goto repair_abort;
8242                                 fixed = 1;
8243                         }
8244                         err = 1;
8245                         cur_err = 1;
8246                 }
8247                 if (rec->bad_full_backref) {
8248                         fprintf(stderr, "bad full backref, on [%llu]\n",
8249                                 (unsigned long long)rec->start);
8250                         if (repair) {
8251                                 ret = fixup_extent_flags(root->fs_info, rec);
8252                                 if (ret)
8253                                         goto repair_abort;
8254                                 fixed = 1;
8255                         }
8256                         err = 1;
8257                         cur_err = 1;
8258                 }
8259                 /*
8260                  * Although it's not a extent ref's problem, we reuse this
8261                  * routine for error reporting.
8262                  * No repair function yet.
8263                  */
8264                 if (rec->crossing_stripes) {
8265                         fprintf(stderr,
8266                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8267                                 rec->start, rec->start + rec->max_size);
8268                         err = 1;
8269                         cur_err = 1;
8270                 }
8271
8272                 if (rec->wrong_chunk_type) {
8273                         fprintf(stderr,
8274                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8275                                 rec->start, rec->start + rec->max_size);
8276                         err = 1;
8277                         cur_err = 1;
8278                 }
8279
8280                 remove_cache_extent(extent_cache, cache);
8281                 free_all_extent_backrefs(rec);
8282                 if (!init_extent_tree && repair && (!cur_err || fixed))
8283                         clear_extent_dirty(root->fs_info->excluded_extents,
8284                                            rec->start,
8285                                            rec->start + rec->max_size - 1,
8286                                            GFP_NOFS);
8287                 free(rec);
8288         }
8289 repair_abort:
8290         if (repair) {
8291                 if (ret && ret != -EAGAIN) {
8292                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8293                         exit(1);
8294                 } else if (!ret) {
8295                         struct btrfs_trans_handle *trans;
8296
8297                         root = root->fs_info->extent_root;
8298                         trans = btrfs_start_transaction(root, 1);
8299                         if (IS_ERR(trans)) {
8300                                 ret = PTR_ERR(trans);
8301                                 goto repair_abort;
8302                         }
8303
8304                         btrfs_fix_block_accounting(trans, root);
8305                         ret = btrfs_commit_transaction(trans, root);
8306                         if (ret)
8307                                 goto repair_abort;
8308                 }
8309                 if (err)
8310                         fprintf(stderr, "repaired damaged extent references\n");
8311                 return ret;
8312         }
8313         return err;
8314 }
8315
8316 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8317 {
8318         u64 stripe_size;
8319
8320         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8321                 stripe_size = length;
8322                 stripe_size /= num_stripes;
8323         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8324                 stripe_size = length * 2;
8325                 stripe_size /= num_stripes;
8326         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8327                 stripe_size = length;
8328                 stripe_size /= (num_stripes - 1);
8329         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8330                 stripe_size = length;
8331                 stripe_size /= (num_stripes - 2);
8332         } else {
8333                 stripe_size = length;
8334         }
8335         return stripe_size;
8336 }
8337
8338 /*
8339  * Check the chunk with its block group/dev list ref:
8340  * Return 0 if all refs seems valid.
8341  * Return 1 if part of refs seems valid, need later check for rebuild ref
8342  * like missing block group and needs to search extent tree to rebuild them.
8343  * Return -1 if essential refs are missing and unable to rebuild.
8344  */
8345 static int check_chunk_refs(struct chunk_record *chunk_rec,
8346                             struct block_group_tree *block_group_cache,
8347                             struct device_extent_tree *dev_extent_cache,
8348                             int silent)
8349 {
8350         struct cache_extent *block_group_item;
8351         struct block_group_record *block_group_rec;
8352         struct cache_extent *dev_extent_item;
8353         struct device_extent_record *dev_extent_rec;
8354         u64 devid;
8355         u64 offset;
8356         u64 length;
8357         int metadump_v2 = 0;
8358         int i;
8359         int ret = 0;
8360
8361         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8362                                                chunk_rec->offset,
8363                                                chunk_rec->length);
8364         if (block_group_item) {
8365                 block_group_rec = container_of(block_group_item,
8366                                                struct block_group_record,
8367                                                cache);
8368                 if (chunk_rec->length != block_group_rec->offset ||
8369                     chunk_rec->offset != block_group_rec->objectid ||
8370                     (!metadump_v2 &&
8371                      chunk_rec->type_flags != block_group_rec->flags)) {
8372                         if (!silent)
8373                                 fprintf(stderr,
8374                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8375                                         chunk_rec->objectid,
8376                                         chunk_rec->type,
8377                                         chunk_rec->offset,
8378                                         chunk_rec->length,
8379                                         chunk_rec->offset,
8380                                         chunk_rec->type_flags,
8381                                         block_group_rec->objectid,
8382                                         block_group_rec->type,
8383                                         block_group_rec->offset,
8384                                         block_group_rec->offset,
8385                                         block_group_rec->objectid,
8386                                         block_group_rec->flags);
8387                         ret = -1;
8388                 } else {
8389                         list_del_init(&block_group_rec->list);
8390                         chunk_rec->bg_rec = block_group_rec;
8391                 }
8392         } else {
8393                 if (!silent)
8394                         fprintf(stderr,
8395                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8396                                 chunk_rec->objectid,
8397                                 chunk_rec->type,
8398                                 chunk_rec->offset,
8399                                 chunk_rec->length,
8400                                 chunk_rec->offset,
8401                                 chunk_rec->type_flags);
8402                 ret = 1;
8403         }
8404
8405         if (metadump_v2)
8406                 return ret;
8407
8408         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8409                                     chunk_rec->num_stripes);
8410         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8411                 devid = chunk_rec->stripes[i].devid;
8412                 offset = chunk_rec->stripes[i].offset;
8413                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8414                                                        devid, offset, length);
8415                 if (dev_extent_item) {
8416                         dev_extent_rec = container_of(dev_extent_item,
8417                                                 struct device_extent_record,
8418                                                 cache);
8419                         if (dev_extent_rec->objectid != devid ||
8420                             dev_extent_rec->offset != offset ||
8421                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8422                             dev_extent_rec->length != length) {
8423                                 if (!silent)
8424                                         fprintf(stderr,
8425                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8426                                                 chunk_rec->objectid,
8427                                                 chunk_rec->type,
8428                                                 chunk_rec->offset,
8429                                                 chunk_rec->stripes[i].devid,
8430                                                 chunk_rec->stripes[i].offset,
8431                                                 dev_extent_rec->objectid,
8432                                                 dev_extent_rec->offset,
8433                                                 dev_extent_rec->length);
8434                                 ret = -1;
8435                         } else {
8436                                 list_move(&dev_extent_rec->chunk_list,
8437                                           &chunk_rec->dextents);
8438                         }
8439                 } else {
8440                         if (!silent)
8441                                 fprintf(stderr,
8442                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8443                                         chunk_rec->objectid,
8444                                         chunk_rec->type,
8445                                         chunk_rec->offset,
8446                                         chunk_rec->stripes[i].devid,
8447                                         chunk_rec->stripes[i].offset);
8448                         ret = -1;
8449                 }
8450         }
8451         return ret;
8452 }
8453
8454 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8455 int check_chunks(struct cache_tree *chunk_cache,
8456                  struct block_group_tree *block_group_cache,
8457                  struct device_extent_tree *dev_extent_cache,
8458                  struct list_head *good, struct list_head *bad,
8459                  struct list_head *rebuild, int silent)
8460 {
8461         struct cache_extent *chunk_item;
8462         struct chunk_record *chunk_rec;
8463         struct block_group_record *bg_rec;
8464         struct device_extent_record *dext_rec;
8465         int err;
8466         int ret = 0;
8467
8468         chunk_item = first_cache_extent(chunk_cache);
8469         while (chunk_item) {
8470                 chunk_rec = container_of(chunk_item, struct chunk_record,
8471                                          cache);
8472                 err = check_chunk_refs(chunk_rec, block_group_cache,
8473                                        dev_extent_cache, silent);
8474                 if (err < 0)
8475                         ret = err;
8476                 if (err == 0 && good)
8477                         list_add_tail(&chunk_rec->list, good);
8478                 if (err > 0 && rebuild)
8479                         list_add_tail(&chunk_rec->list, rebuild);
8480                 if (err < 0 && bad)
8481                         list_add_tail(&chunk_rec->list, bad);
8482                 chunk_item = next_cache_extent(chunk_item);
8483         }
8484
8485         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8486                 if (!silent)
8487                         fprintf(stderr,
8488                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8489                                 bg_rec->objectid,
8490                                 bg_rec->offset,
8491                                 bg_rec->flags);
8492                 if (!ret)
8493                         ret = 1;
8494         }
8495
8496         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8497                             chunk_list) {
8498                 if (!silent)
8499                         fprintf(stderr,
8500                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8501                                 dext_rec->objectid,
8502                                 dext_rec->offset,
8503                                 dext_rec->length);
8504                 if (!ret)
8505                         ret = 1;
8506         }
8507         return ret;
8508 }
8509
8510
8511 static int check_device_used(struct device_record *dev_rec,
8512                              struct device_extent_tree *dext_cache)
8513 {
8514         struct cache_extent *cache;
8515         struct device_extent_record *dev_extent_rec;
8516         u64 total_byte = 0;
8517
8518         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8519         while (cache) {
8520                 dev_extent_rec = container_of(cache,
8521                                               struct device_extent_record,
8522                                               cache);
8523                 if (dev_extent_rec->objectid != dev_rec->devid)
8524                         break;
8525
8526                 list_del_init(&dev_extent_rec->device_list);
8527                 total_byte += dev_extent_rec->length;
8528                 cache = next_cache_extent(cache);
8529         }
8530
8531         if (total_byte != dev_rec->byte_used) {
8532                 fprintf(stderr,
8533                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8534                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8535                         dev_rec->type, dev_rec->offset);
8536                 return -1;
8537         } else {
8538                 return 0;
8539         }
8540 }
8541
8542 /* check btrfs_dev_item -> btrfs_dev_extent */
8543 static int check_devices(struct rb_root *dev_cache,
8544                          struct device_extent_tree *dev_extent_cache)
8545 {
8546         struct rb_node *dev_node;
8547         struct device_record *dev_rec;
8548         struct device_extent_record *dext_rec;
8549         int err;
8550         int ret = 0;
8551
8552         dev_node = rb_first(dev_cache);
8553         while (dev_node) {
8554                 dev_rec = container_of(dev_node, struct device_record, node);
8555                 err = check_device_used(dev_rec, dev_extent_cache);
8556                 if (err)
8557                         ret = err;
8558
8559                 dev_node = rb_next(dev_node);
8560         }
8561         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8562                             device_list) {
8563                 fprintf(stderr,
8564                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8565                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8566                 if (!ret)
8567                         ret = 1;
8568         }
8569         return ret;
8570 }
8571
8572 static int add_root_item_to_list(struct list_head *head,
8573                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8574                                   u8 level, u8 drop_level,
8575                                   int level_size, struct btrfs_key *drop_key)
8576 {
8577
8578         struct root_item_record *ri_rec;
8579         ri_rec = malloc(sizeof(*ri_rec));
8580         if (!ri_rec)
8581                 return -ENOMEM;
8582         ri_rec->bytenr = bytenr;
8583         ri_rec->objectid = objectid;
8584         ri_rec->level = level;
8585         ri_rec->level_size = level_size;
8586         ri_rec->drop_level = drop_level;
8587         ri_rec->last_snapshot = last_snapshot;
8588         if (drop_key)
8589                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8590         list_add_tail(&ri_rec->list, head);
8591
8592         return 0;
8593 }
8594
8595 static void free_root_item_list(struct list_head *list)
8596 {
8597         struct root_item_record *ri_rec;
8598
8599         while (!list_empty(list)) {
8600                 ri_rec = list_first_entry(list, struct root_item_record,
8601                                           list);
8602                 list_del_init(&ri_rec->list);
8603                 free(ri_rec);
8604         }
8605 }
8606
8607 static int deal_root_from_list(struct list_head *list,
8608                                struct btrfs_root *root,
8609                                struct block_info *bits,
8610                                int bits_nr,
8611                                struct cache_tree *pending,
8612                                struct cache_tree *seen,
8613                                struct cache_tree *reada,
8614                                struct cache_tree *nodes,
8615                                struct cache_tree *extent_cache,
8616                                struct cache_tree *chunk_cache,
8617                                struct rb_root *dev_cache,
8618                                struct block_group_tree *block_group_cache,
8619                                struct device_extent_tree *dev_extent_cache)
8620 {
8621         int ret = 0;
8622         u64 last;
8623
8624         while (!list_empty(list)) {
8625                 struct root_item_record *rec;
8626                 struct extent_buffer *buf;
8627                 rec = list_entry(list->next,
8628                                  struct root_item_record, list);
8629                 last = 0;
8630                 buf = read_tree_block(root->fs_info->tree_root,
8631                                       rec->bytenr, rec->level_size, 0);
8632                 if (!extent_buffer_uptodate(buf)) {
8633                         free_extent_buffer(buf);
8634                         ret = -EIO;
8635                         break;
8636                 }
8637                 ret = add_root_to_pending(buf, extent_cache, pending,
8638                                     seen, nodes, rec->objectid);
8639                 if (ret < 0)
8640                         break;
8641                 /*
8642                  * To rebuild extent tree, we need deal with snapshot
8643                  * one by one, otherwise we deal with node firstly which
8644                  * can maximize readahead.
8645                  */
8646                 while (1) {
8647                         ret = run_next_block(root, bits, bits_nr, &last,
8648                                              pending, seen, reada, nodes,
8649                                              extent_cache, chunk_cache,
8650                                              dev_cache, block_group_cache,
8651                                              dev_extent_cache, rec);
8652                         if (ret != 0)
8653                                 break;
8654                 }
8655                 free_extent_buffer(buf);
8656                 list_del(&rec->list);
8657                 free(rec);
8658                 if (ret < 0)
8659                         break;
8660         }
8661         while (ret >= 0) {
8662                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8663                                      reada, nodes, extent_cache, chunk_cache,
8664                                      dev_cache, block_group_cache,
8665                                      dev_extent_cache, NULL);
8666                 if (ret != 0) {
8667                         if (ret > 0)
8668                                 ret = 0;
8669                         break;
8670                 }
8671         }
8672         return ret;
8673 }
8674
8675 static int check_chunks_and_extents(struct btrfs_root *root)
8676 {
8677         struct rb_root dev_cache;
8678         struct cache_tree chunk_cache;
8679         struct block_group_tree block_group_cache;
8680         struct device_extent_tree dev_extent_cache;
8681         struct cache_tree extent_cache;
8682         struct cache_tree seen;
8683         struct cache_tree pending;
8684         struct cache_tree reada;
8685         struct cache_tree nodes;
8686         struct extent_io_tree excluded_extents;
8687         struct cache_tree corrupt_blocks;
8688         struct btrfs_path path;
8689         struct btrfs_key key;
8690         struct btrfs_key found_key;
8691         int ret, err = 0;
8692         struct block_info *bits;
8693         int bits_nr;
8694         struct extent_buffer *leaf;
8695         int slot;
8696         struct btrfs_root_item ri;
8697         struct list_head dropping_trees;
8698         struct list_head normal_trees;
8699         struct btrfs_root *root1;
8700         u64 objectid;
8701         u32 level_size;
8702         u8 level;
8703
8704         dev_cache = RB_ROOT;
8705         cache_tree_init(&chunk_cache);
8706         block_group_tree_init(&block_group_cache);
8707         device_extent_tree_init(&dev_extent_cache);
8708
8709         cache_tree_init(&extent_cache);
8710         cache_tree_init(&seen);
8711         cache_tree_init(&pending);
8712         cache_tree_init(&nodes);
8713         cache_tree_init(&reada);
8714         cache_tree_init(&corrupt_blocks);
8715         extent_io_tree_init(&excluded_extents);
8716         INIT_LIST_HEAD(&dropping_trees);
8717         INIT_LIST_HEAD(&normal_trees);
8718
8719         if (repair) {
8720                 root->fs_info->excluded_extents = &excluded_extents;
8721                 root->fs_info->fsck_extent_cache = &extent_cache;
8722                 root->fs_info->free_extent_hook = free_extent_hook;
8723                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8724         }
8725
8726         bits_nr = 1024;
8727         bits = malloc(bits_nr * sizeof(struct block_info));
8728         if (!bits) {
8729                 perror("malloc");
8730                 exit(1);
8731         }
8732
8733         if (ctx.progress_enabled) {
8734                 ctx.tp = TASK_EXTENTS;
8735                 task_start(ctx.info);
8736         }
8737
8738 again:
8739         root1 = root->fs_info->tree_root;
8740         level = btrfs_header_level(root1->node);
8741         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8742                                     root1->node->start, 0, level, 0,
8743                                     root1->nodesize, NULL);
8744         if (ret < 0)
8745                 goto out;
8746         root1 = root->fs_info->chunk_root;
8747         level = btrfs_header_level(root1->node);
8748         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8749                                     root1->node->start, 0, level, 0,
8750                                     root1->nodesize, NULL);
8751         if (ret < 0)
8752                 goto out;
8753         btrfs_init_path(&path);
8754         key.offset = 0;
8755         key.objectid = 0;
8756         key.type = BTRFS_ROOT_ITEM_KEY;
8757         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8758                                         &key, &path, 0, 0);
8759         if (ret < 0)
8760                 goto out;
8761         while(1) {
8762                 leaf = path.nodes[0];
8763                 slot = path.slots[0];
8764                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8765                         ret = btrfs_next_leaf(root, &path);
8766                         if (ret != 0)
8767                                 break;
8768                         leaf = path.nodes[0];
8769                         slot = path.slots[0];
8770                 }
8771                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8772                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8773                         unsigned long offset;
8774                         u64 last_snapshot;
8775
8776                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8777                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8778                         last_snapshot = btrfs_root_last_snapshot(&ri);
8779                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8780                                 level = btrfs_root_level(&ri);
8781                                 level_size = root->nodesize;
8782                                 ret = add_root_item_to_list(&normal_trees,
8783                                                 found_key.objectid,
8784                                                 btrfs_root_bytenr(&ri),
8785                                                 last_snapshot, level,
8786                                                 0, level_size, NULL);
8787                                 if (ret < 0)
8788                                         goto out;
8789                         } else {
8790                                 level = btrfs_root_level(&ri);
8791                                 level_size = root->nodesize;
8792                                 objectid = found_key.objectid;
8793                                 btrfs_disk_key_to_cpu(&found_key,
8794                                                       &ri.drop_progress);
8795                                 ret = add_root_item_to_list(&dropping_trees,
8796                                                 objectid,
8797                                                 btrfs_root_bytenr(&ri),
8798                                                 last_snapshot, level,
8799                                                 ri.drop_level,
8800                                                 level_size, &found_key);
8801                                 if (ret < 0)
8802                                         goto out;
8803                         }
8804                 }
8805                 path.slots[0]++;
8806         }
8807         btrfs_release_path(&path);
8808
8809         /*
8810          * check_block can return -EAGAIN if it fixes something, please keep
8811          * this in mind when dealing with return values from these functions, if
8812          * we get -EAGAIN we want to fall through and restart the loop.
8813          */
8814         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8815                                   &seen, &reada, &nodes, &extent_cache,
8816                                   &chunk_cache, &dev_cache, &block_group_cache,
8817                                   &dev_extent_cache);
8818         if (ret < 0) {
8819                 if (ret == -EAGAIN)
8820                         goto loop;
8821                 goto out;
8822         }
8823         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8824                                   &pending, &seen, &reada, &nodes,
8825                                   &extent_cache, &chunk_cache, &dev_cache,
8826                                   &block_group_cache, &dev_extent_cache);
8827         if (ret < 0) {
8828                 if (ret == -EAGAIN)
8829                         goto loop;
8830                 goto out;
8831         }
8832
8833         ret = check_chunks(&chunk_cache, &block_group_cache,
8834                            &dev_extent_cache, NULL, NULL, NULL, 0);
8835         if (ret) {
8836                 if (ret == -EAGAIN)
8837                         goto loop;
8838                 err = ret;
8839         }
8840
8841         ret = check_extent_refs(root, &extent_cache);
8842         if (ret < 0) {
8843                 if (ret == -EAGAIN)
8844                         goto loop;
8845                 goto out;
8846         }
8847
8848         ret = check_devices(&dev_cache, &dev_extent_cache);
8849         if (ret && err)
8850                 ret = err;
8851
8852 out:
8853         task_stop(ctx.info);
8854         if (repair) {
8855                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8856                 extent_io_tree_cleanup(&excluded_extents);
8857                 root->fs_info->fsck_extent_cache = NULL;
8858                 root->fs_info->free_extent_hook = NULL;
8859                 root->fs_info->corrupt_blocks = NULL;
8860                 root->fs_info->excluded_extents = NULL;
8861         }
8862         free(bits);
8863         free_chunk_cache_tree(&chunk_cache);
8864         free_device_cache_tree(&dev_cache);
8865         free_block_group_tree(&block_group_cache);
8866         free_device_extent_tree(&dev_extent_cache);
8867         free_extent_cache_tree(&seen);
8868         free_extent_cache_tree(&pending);
8869         free_extent_cache_tree(&reada);
8870         free_extent_cache_tree(&nodes);
8871         return ret;
8872 loop:
8873         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8874         free_extent_cache_tree(&seen);
8875         free_extent_cache_tree(&pending);
8876         free_extent_cache_tree(&reada);
8877         free_extent_cache_tree(&nodes);
8878         free_chunk_cache_tree(&chunk_cache);
8879         free_block_group_tree(&block_group_cache);
8880         free_device_cache_tree(&dev_cache);
8881         free_device_extent_tree(&dev_extent_cache);
8882         free_extent_record_cache(root->fs_info, &extent_cache);
8883         free_root_item_list(&normal_trees);
8884         free_root_item_list(&dropping_trees);
8885         extent_io_tree_cleanup(&excluded_extents);
8886         goto again;
8887 }
8888
8889 /*
8890  * Check backrefs of a tree block given by @bytenr or @eb.
8891  *
8892  * @root:       the root containing the @bytenr or @eb
8893  * @eb:         tree block extent buffer, can be NULL
8894  * @bytenr:     bytenr of the tree block to search
8895  * @level:      tree level of the tree block
8896  * @owner:      owner of the tree block
8897  *
8898  * Return >0 for any error found and output error message
8899  * Return 0 for no error found
8900  */
8901 static int check_tree_block_ref(struct btrfs_root *root,
8902                                 struct extent_buffer *eb, u64 bytenr,
8903                                 int level, u64 owner)
8904 {
8905         struct btrfs_key key;
8906         struct btrfs_root *extent_root = root->fs_info->extent_root;
8907         struct btrfs_path path;
8908         struct btrfs_extent_item *ei;
8909         struct btrfs_extent_inline_ref *iref;
8910         struct extent_buffer *leaf;
8911         unsigned long end;
8912         unsigned long ptr;
8913         int slot;
8914         int skinny_level;
8915         int type;
8916         u32 nodesize = root->nodesize;
8917         u32 item_size;
8918         u64 offset;
8919         int found_ref = 0;
8920         int err = 0;
8921         int ret;
8922
8923         btrfs_init_path(&path);
8924         key.objectid = bytenr;
8925         if (btrfs_fs_incompat(root->fs_info,
8926                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8927                 key.type = BTRFS_METADATA_ITEM_KEY;
8928         else
8929                 key.type = BTRFS_EXTENT_ITEM_KEY;
8930         key.offset = (u64)-1;
8931
8932         /* Search for the backref in extent tree */
8933         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8934         if (ret < 0) {
8935                 err |= BACKREF_MISSING;
8936                 goto out;
8937         }
8938         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8939         if (ret) {
8940                 err |= BACKREF_MISSING;
8941                 goto out;
8942         }
8943
8944         leaf = path.nodes[0];
8945         slot = path.slots[0];
8946         btrfs_item_key_to_cpu(leaf, &key, slot);
8947
8948         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8949
8950         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8951                 skinny_level = (int)key.offset;
8952                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8953         } else {
8954                 struct btrfs_tree_block_info *info;
8955
8956                 info = (struct btrfs_tree_block_info *)(ei + 1);
8957                 skinny_level = btrfs_tree_block_level(leaf, info);
8958                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8959         }
8960
8961         if (eb) {
8962                 u64 header_gen;
8963                 u64 extent_gen;
8964
8965                 if (!(btrfs_extent_flags(leaf, ei) &
8966                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8967                         error(
8968                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8969                                 key.objectid, nodesize,
8970                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8971                         err = BACKREF_MISMATCH;
8972                 }
8973                 header_gen = btrfs_header_generation(eb);
8974                 extent_gen = btrfs_extent_generation(leaf, ei);
8975                 if (header_gen != extent_gen) {
8976                         error(
8977         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8978                                 key.objectid, nodesize, header_gen,
8979                                 extent_gen);
8980                         err = BACKREF_MISMATCH;
8981                 }
8982                 if (level != skinny_level) {
8983                         error(
8984                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8985                                 key.objectid, nodesize, level, skinny_level);
8986                         err = BACKREF_MISMATCH;
8987                 }
8988                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8989                         error(
8990                         "extent[%llu %u] is referred by other roots than %llu",
8991                                 key.objectid, nodesize, root->objectid);
8992                         err = BACKREF_MISMATCH;
8993                 }
8994         }
8995
8996         /*
8997          * Iterate the extent/metadata item to find the exact backref
8998          */
8999         item_size = btrfs_item_size_nr(leaf, slot);
9000         ptr = (unsigned long)iref;
9001         end = (unsigned long)ei + item_size;
9002         while (ptr < end) {
9003                 iref = (struct btrfs_extent_inline_ref *)ptr;
9004                 type = btrfs_extent_inline_ref_type(leaf, iref);
9005                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9006
9007                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9008                         (offset == root->objectid || offset == owner)) {
9009                         found_ref = 1;
9010                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9011                         /* Check if the backref points to valid referencer */
9012                         found_ref = !check_tree_block_ref(root, NULL, offset,
9013                                                           level + 1, owner);
9014                 }
9015
9016                 if (found_ref)
9017                         break;
9018                 ptr += btrfs_extent_inline_ref_size(type);
9019         }
9020
9021         /*
9022          * Inlined extent item doesn't have what we need, check
9023          * TREE_BLOCK_REF_KEY
9024          */
9025         if (!found_ref) {
9026                 btrfs_release_path(&path);
9027                 key.objectid = bytenr;
9028                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9029                 key.offset = root->objectid;
9030
9031                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9032                 if (!ret)
9033                         found_ref = 1;
9034         }
9035         if (!found_ref)
9036                 err |= BACKREF_MISSING;
9037 out:
9038         btrfs_release_path(&path);
9039         if (eb && (err & BACKREF_MISSING))
9040                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9041                         bytenr, nodesize, owner, level);
9042         return err;
9043 }
9044
9045 /*
9046  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9047  *
9048  * Return >0 any error found and output error message
9049  * Return 0 for no error found
9050  */
9051 static int check_extent_data_item(struct btrfs_root *root,
9052                                   struct extent_buffer *eb, int slot)
9053 {
9054         struct btrfs_file_extent_item *fi;
9055         struct btrfs_path path;
9056         struct btrfs_root *extent_root = root->fs_info->extent_root;
9057         struct btrfs_key fi_key;
9058         struct btrfs_key dbref_key;
9059         struct extent_buffer *leaf;
9060         struct btrfs_extent_item *ei;
9061         struct btrfs_extent_inline_ref *iref;
9062         struct btrfs_extent_data_ref *dref;
9063         u64 owner;
9064         u64 file_extent_gen;
9065         u64 disk_bytenr;
9066         u64 disk_num_bytes;
9067         u64 extent_num_bytes;
9068         u64 extent_flags;
9069         u64 extent_gen;
9070         u32 item_size;
9071         unsigned long end;
9072         unsigned long ptr;
9073         int type;
9074         u64 ref_root;
9075         int found_dbackref = 0;
9076         int err = 0;
9077         int ret;
9078
9079         btrfs_item_key_to_cpu(eb, &fi_key, slot);
9080         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9081         file_extent_gen = btrfs_file_extent_generation(eb, fi);
9082
9083         /* Nothing to check for hole and inline data extents */
9084         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9085             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9086                 return 0;
9087
9088         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9089         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9090         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9091
9092         /* Check unaligned disk_num_bytes and num_bytes */
9093         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9094                 error(
9095 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9096                         fi_key.objectid, fi_key.offset, disk_num_bytes,
9097                         root->sectorsize);
9098                 err |= BYTES_UNALIGNED;
9099         } else {
9100                 data_bytes_allocated += disk_num_bytes;
9101         }
9102         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9103                 error(
9104 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9105                         fi_key.objectid, fi_key.offset, extent_num_bytes,
9106                         root->sectorsize);
9107                 err |= BYTES_UNALIGNED;
9108         } else {
9109                 data_bytes_referenced += extent_num_bytes;
9110         }
9111         owner = btrfs_header_owner(eb);
9112
9113         /* Check the extent item of the file extent in extent tree */
9114         btrfs_init_path(&path);
9115         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9116         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9117         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9118
9119         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9120         if (ret) {
9121                 err |= BACKREF_MISSING;
9122                 goto error;
9123         }
9124
9125         leaf = path.nodes[0];
9126         slot = path.slots[0];
9127         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9128
9129         extent_flags = btrfs_extent_flags(leaf, ei);
9130         extent_gen = btrfs_extent_generation(leaf, ei);
9131
9132         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9133                 error(
9134                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9135                     disk_bytenr, disk_num_bytes,
9136                     BTRFS_EXTENT_FLAG_DATA);
9137                 err |= BACKREF_MISMATCH;
9138         }
9139
9140         if (file_extent_gen < extent_gen) {
9141                 error(
9142 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9143                         disk_bytenr, disk_num_bytes, file_extent_gen,
9144                         extent_gen);
9145                 err |= BACKREF_MISMATCH;
9146         }
9147
9148         /* Check data backref inside that extent item */
9149         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9150         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9151         ptr = (unsigned long)iref;
9152         end = (unsigned long)ei + item_size;
9153         while (ptr < end) {
9154                 iref = (struct btrfs_extent_inline_ref *)ptr;
9155                 type = btrfs_extent_inline_ref_type(leaf, iref);
9156                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9157
9158                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9159                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9160                         if (ref_root == owner || ref_root == root->objectid)
9161                                 found_dbackref = 1;
9162                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9163                         found_dbackref = !check_tree_block_ref(root, NULL,
9164                                 btrfs_extent_inline_ref_offset(leaf, iref),
9165                                 0, owner);
9166                 }
9167
9168                 if (found_dbackref)
9169                         break;
9170                 ptr += btrfs_extent_inline_ref_size(type);
9171         }
9172
9173         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9174         if (!found_dbackref) {
9175                 btrfs_release_path(&path);
9176
9177                 btrfs_init_path(&path);
9178                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9179                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9180                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9181                                 fi_key.objectid, fi_key.offset);
9182
9183                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9184                                         &dbref_key, &path, 0, 0);
9185                 if (!ret)
9186                         found_dbackref = 1;
9187         }
9188
9189         if (!found_dbackref)
9190                 err |= BACKREF_MISSING;
9191 error:
9192         btrfs_release_path(&path);
9193         if (err & BACKREF_MISSING) {
9194                 error("data extent[%llu %llu] backref lost",
9195                       disk_bytenr, disk_num_bytes);
9196         }
9197         return err;
9198 }
9199
9200 /*
9201  * Get real tree block level for the case like shared block
9202  * Return >= 0 as tree level
9203  * Return <0 for error
9204  */
9205 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9206 {
9207         struct extent_buffer *eb;
9208         struct btrfs_path path;
9209         struct btrfs_key key;
9210         struct btrfs_extent_item *ei;
9211         u64 flags;
9212         u64 transid;
9213         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9214         u8 backref_level;
9215         u8 header_level;
9216         int ret;
9217
9218         /* Search extent tree for extent generation and level */
9219         key.objectid = bytenr;
9220         key.type = BTRFS_METADATA_ITEM_KEY;
9221         key.offset = (u64)-1;
9222
9223         btrfs_init_path(&path);
9224         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9225         if (ret < 0)
9226                 goto release_out;
9227         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9228         if (ret < 0)
9229                 goto release_out;
9230         if (ret > 0) {
9231                 ret = -ENOENT;
9232                 goto release_out;
9233         }
9234
9235         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9236         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9237                             struct btrfs_extent_item);
9238         flags = btrfs_extent_flags(path.nodes[0], ei);
9239         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9240                 ret = -ENOENT;
9241                 goto release_out;
9242         }
9243
9244         /* Get transid for later read_tree_block() check */
9245         transid = btrfs_extent_generation(path.nodes[0], ei);
9246
9247         /* Get backref level as one source */
9248         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9249                 backref_level = key.offset;
9250         } else {
9251                 struct btrfs_tree_block_info *info;
9252
9253                 info = (struct btrfs_tree_block_info *)(ei + 1);
9254                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9255         }
9256         btrfs_release_path(&path);
9257
9258         /* Get level from tree block as an alternative source */
9259         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9260         if (!extent_buffer_uptodate(eb)) {
9261                 free_extent_buffer(eb);
9262                 return -EIO;
9263         }
9264         header_level = btrfs_header_level(eb);
9265         free_extent_buffer(eb);
9266
9267         if (header_level != backref_level)
9268                 return -EIO;
9269         return header_level;
9270
9271 release_out:
9272         btrfs_release_path(&path);
9273         return ret;
9274 }
9275
9276 /*
9277  * Check if a tree block backref is valid (points to a valid tree block)
9278  * if level == -1, level will be resolved
9279  * Return >0 for any error found and print error message
9280  */
9281 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9282                                     u64 bytenr, int level)
9283 {
9284         struct btrfs_root *root;
9285         struct btrfs_key key;
9286         struct btrfs_path path;
9287         struct extent_buffer *eb;
9288         struct extent_buffer *node;
9289         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9290         int err = 0;
9291         int ret;
9292
9293         /* Query level for level == -1 special case */
9294         if (level == -1)
9295                 level = query_tree_block_level(fs_info, bytenr);
9296         if (level < 0) {
9297                 err |= REFERENCER_MISSING;
9298                 goto out;
9299         }
9300
9301         key.objectid = root_id;
9302         key.type = BTRFS_ROOT_ITEM_KEY;
9303         key.offset = (u64)-1;
9304
9305         root = btrfs_read_fs_root(fs_info, &key);
9306         if (IS_ERR(root)) {
9307                 err |= REFERENCER_MISSING;
9308                 goto out;
9309         }
9310
9311         /* Read out the tree block to get item/node key */
9312         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9313         if (!extent_buffer_uptodate(eb)) {
9314                 err |= REFERENCER_MISSING;
9315                 free_extent_buffer(eb);
9316                 goto out;
9317         }
9318
9319         /* Empty tree, no need to check key */
9320         if (!btrfs_header_nritems(eb) && !level) {
9321                 free_extent_buffer(eb);
9322                 goto out;
9323         }
9324
9325         if (level)
9326                 btrfs_node_key_to_cpu(eb, &key, 0);
9327         else
9328                 btrfs_item_key_to_cpu(eb, &key, 0);
9329
9330         free_extent_buffer(eb);
9331
9332         btrfs_init_path(&path);
9333         path.lowest_level = level;
9334         /* Search with the first key, to ensure we can reach it */
9335         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9336         if (ret < 0) {
9337                 err |= REFERENCER_MISSING;
9338                 goto release_out;
9339         }
9340
9341         node = path.nodes[level];
9342         if (btrfs_header_bytenr(node) != bytenr) {
9343                 error(
9344         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9345                         bytenr, nodesize, bytenr,
9346                         btrfs_header_bytenr(node));
9347                 err |= REFERENCER_MISMATCH;
9348         }
9349         if (btrfs_header_level(node) != level) {
9350                 error(
9351         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9352                         bytenr, nodesize, level,
9353                         btrfs_header_level(node));
9354                 err |= REFERENCER_MISMATCH;
9355         }
9356
9357 release_out:
9358         btrfs_release_path(&path);
9359 out:
9360         if (err & REFERENCER_MISSING) {
9361                 if (level < 0)
9362                         error("extent [%llu %d] lost referencer (owner: %llu)",
9363                                 bytenr, nodesize, root_id);
9364                 else
9365                         error(
9366                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9367                                 bytenr, nodesize, root_id, level);
9368         }
9369
9370         return err;
9371 }
9372
9373 /*
9374  * Check referencer for shared block backref
9375  * If level == -1, this function will resolve the level.
9376  */
9377 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9378                                      u64 parent, u64 bytenr, int level)
9379 {
9380         struct extent_buffer *eb;
9381         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9382         u32 nr;
9383         int found_parent = 0;
9384         int i;
9385
9386         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9387         if (!extent_buffer_uptodate(eb))
9388                 goto out;
9389
9390         if (level == -1)
9391                 level = query_tree_block_level(fs_info, bytenr);
9392         if (level < 0)
9393                 goto out;
9394
9395         if (level + 1 != btrfs_header_level(eb))
9396                 goto out;
9397
9398         nr = btrfs_header_nritems(eb);
9399         for (i = 0; i < nr; i++) {
9400                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9401                         found_parent = 1;
9402                         break;
9403                 }
9404         }
9405 out:
9406         free_extent_buffer(eb);
9407         if (!found_parent) {
9408                 error(
9409         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9410                         bytenr, nodesize, parent, level);
9411                 return REFERENCER_MISSING;
9412         }
9413         return 0;
9414 }
9415
9416 /*
9417  * Check referencer for normal (inlined) data ref
9418  * If len == 0, it will be resolved by searching in extent tree
9419  */
9420 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9421                                      u64 root_id, u64 objectid, u64 offset,
9422                                      u64 bytenr, u64 len, u32 count)
9423 {
9424         struct btrfs_root *root;
9425         struct btrfs_root *extent_root = fs_info->extent_root;
9426         struct btrfs_key key;
9427         struct btrfs_path path;
9428         struct extent_buffer *leaf;
9429         struct btrfs_file_extent_item *fi;
9430         u32 found_count = 0;
9431         int slot;
9432         int ret = 0;
9433
9434         if (!len) {
9435                 key.objectid = bytenr;
9436                 key.type = BTRFS_EXTENT_ITEM_KEY;
9437                 key.offset = (u64)-1;
9438
9439                 btrfs_init_path(&path);
9440                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9441                 if (ret < 0)
9442                         goto out;
9443                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9444                 if (ret)
9445                         goto out;
9446                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9447                 if (key.objectid != bytenr ||
9448                     key.type != BTRFS_EXTENT_ITEM_KEY)
9449                         goto out;
9450                 len = key.offset;
9451                 btrfs_release_path(&path);
9452         }
9453         key.objectid = root_id;
9454         key.type = BTRFS_ROOT_ITEM_KEY;
9455         key.offset = (u64)-1;
9456         btrfs_init_path(&path);
9457
9458         root = btrfs_read_fs_root(fs_info, &key);
9459         if (IS_ERR(root))
9460                 goto out;
9461
9462         key.objectid = objectid;
9463         key.type = BTRFS_EXTENT_DATA_KEY;
9464         /*
9465          * It can be nasty as data backref offset is
9466          * file offset - file extent offset, which is smaller or
9467          * equal to original backref offset.  The only special case is
9468          * overflow.  So we need to special check and do further search.
9469          */
9470         key.offset = offset & (1ULL << 63) ? 0 : offset;
9471
9472         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9473         if (ret < 0)
9474                 goto out;
9475
9476         /*
9477          * Search afterwards to get correct one
9478          * NOTE: As we must do a comprehensive check on the data backref to
9479          * make sure the dref count also matches, we must iterate all file
9480          * extents for that inode.
9481          */
9482         while (1) {
9483                 leaf = path.nodes[0];
9484                 slot = path.slots[0];
9485
9486                 btrfs_item_key_to_cpu(leaf, &key, slot);
9487                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9488                         break;
9489                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9490                 /*
9491                  * Except normal disk bytenr and disk num bytes, we still
9492                  * need to do extra check on dbackref offset as
9493                  * dbackref offset = file_offset - file_extent_offset
9494                  */
9495                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9496                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9497                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9498                     offset)
9499                         found_count++;
9500
9501                 ret = btrfs_next_item(root, &path);
9502                 if (ret)
9503                         break;
9504         }
9505 out:
9506         btrfs_release_path(&path);
9507         if (found_count != count) {
9508                 error(
9509 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9510                         bytenr, len, root_id, objectid, offset, count, found_count);
9511                 return REFERENCER_MISSING;
9512         }
9513         return 0;
9514 }
9515
9516 /*
9517  * Check if the referencer of a shared data backref exists
9518  */
9519 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9520                                      u64 parent, u64 bytenr)
9521 {
9522         struct extent_buffer *eb;
9523         struct btrfs_key key;
9524         struct btrfs_file_extent_item *fi;
9525         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9526         u32 nr;
9527         int found_parent = 0;
9528         int i;
9529
9530         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9531         if (!extent_buffer_uptodate(eb))
9532                 goto out;
9533
9534         nr = btrfs_header_nritems(eb);
9535         for (i = 0; i < nr; i++) {
9536                 btrfs_item_key_to_cpu(eb, &key, i);
9537                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9538                         continue;
9539
9540                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9541                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9542                         continue;
9543
9544                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9545                         found_parent = 1;
9546                         break;
9547                 }
9548         }
9549
9550 out:
9551         free_extent_buffer(eb);
9552         if (!found_parent) {
9553                 error("shared extent %llu referencer lost (parent: %llu)",
9554                         bytenr, parent);
9555                 return REFERENCER_MISSING;
9556         }
9557         return 0;
9558 }
9559
9560 /*
9561  * This function will check a given extent item, including its backref and
9562  * itself (like crossing stripe boundary and type)
9563  *
9564  * Since we don't use extent_record anymore, introduce new error bit
9565  */
9566 static int check_extent_item(struct btrfs_fs_info *fs_info,
9567                              struct extent_buffer *eb, int slot)
9568 {
9569         struct btrfs_extent_item *ei;
9570         struct btrfs_extent_inline_ref *iref;
9571         struct btrfs_extent_data_ref *dref;
9572         unsigned long end;
9573         unsigned long ptr;
9574         int type;
9575         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9576         u32 item_size = btrfs_item_size_nr(eb, slot);
9577         u64 flags;
9578         u64 offset;
9579         int metadata = 0;
9580         int level;
9581         struct btrfs_key key;
9582         int ret;
9583         int err = 0;
9584
9585         btrfs_item_key_to_cpu(eb, &key, slot);
9586         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9587                 bytes_used += key.offset;
9588         else
9589                 bytes_used += nodesize;
9590
9591         if (item_size < sizeof(*ei)) {
9592                 /*
9593                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9594                  * old thing when on disk format is still un-determined.
9595                  * No need to care about it anymore
9596                  */
9597                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9598                 return -ENOTTY;
9599         }
9600
9601         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9602         flags = btrfs_extent_flags(eb, ei);
9603
9604         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9605                 metadata = 1;
9606         if (metadata && check_crossing_stripes(global_info, key.objectid,
9607                                                eb->len)) {
9608                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9609                       key.objectid, key.objectid + nodesize);
9610                 err |= CROSSING_STRIPE_BOUNDARY;
9611         }
9612
9613         ptr = (unsigned long)(ei + 1);
9614
9615         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9616                 /* Old EXTENT_ITEM metadata */
9617                 struct btrfs_tree_block_info *info;
9618
9619                 info = (struct btrfs_tree_block_info *)ptr;
9620                 level = btrfs_tree_block_level(eb, info);
9621                 ptr += sizeof(struct btrfs_tree_block_info);
9622         } else {
9623                 /* New METADATA_ITEM */
9624                 level = key.offset;
9625         }
9626         end = (unsigned long)ei + item_size;
9627
9628         if (ptr >= end) {
9629                 err |= ITEM_SIZE_MISMATCH;
9630                 goto out;
9631         }
9632
9633         /* Now check every backref in this extent item */
9634 next:
9635         iref = (struct btrfs_extent_inline_ref *)ptr;
9636         type = btrfs_extent_inline_ref_type(eb, iref);
9637         offset = btrfs_extent_inline_ref_offset(eb, iref);
9638         switch (type) {
9639         case BTRFS_TREE_BLOCK_REF_KEY:
9640                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9641                                                level);
9642                 err |= ret;
9643                 break;
9644         case BTRFS_SHARED_BLOCK_REF_KEY:
9645                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9646                                                  level);
9647                 err |= ret;
9648                 break;
9649         case BTRFS_EXTENT_DATA_REF_KEY:
9650                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9651                 ret = check_extent_data_backref(fs_info,
9652                                 btrfs_extent_data_ref_root(eb, dref),
9653                                 btrfs_extent_data_ref_objectid(eb, dref),
9654                                 btrfs_extent_data_ref_offset(eb, dref),
9655                                 key.objectid, key.offset,
9656                                 btrfs_extent_data_ref_count(eb, dref));
9657                 err |= ret;
9658                 break;
9659         case BTRFS_SHARED_DATA_REF_KEY:
9660                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9661                 err |= ret;
9662                 break;
9663         default:
9664                 error("extent[%llu %d %llu] has unknown ref type: %d",
9665                         key.objectid, key.type, key.offset, type);
9666                 err |= UNKNOWN_TYPE;
9667                 goto out;
9668         }
9669
9670         ptr += btrfs_extent_inline_ref_size(type);
9671         if (ptr < end)
9672                 goto next;
9673
9674 out:
9675         return err;
9676 }
9677
9678 /*
9679  * Check if a dev extent item is referred correctly by its chunk
9680  */
9681 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9682                                  struct extent_buffer *eb, int slot)
9683 {
9684         struct btrfs_root *chunk_root = fs_info->chunk_root;
9685         struct btrfs_dev_extent *ptr;
9686         struct btrfs_path path;
9687         struct btrfs_key chunk_key;
9688         struct btrfs_key devext_key;
9689         struct btrfs_chunk *chunk;
9690         struct extent_buffer *l;
9691         int num_stripes;
9692         u64 length;
9693         int i;
9694         int found_chunk = 0;
9695         int ret;
9696
9697         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9698         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9699         length = btrfs_dev_extent_length(eb, ptr);
9700
9701         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9702         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9703         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9704
9705         btrfs_init_path(&path);
9706         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9707         if (ret)
9708                 goto out;
9709
9710         l = path.nodes[0];
9711         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9712         if (btrfs_chunk_length(l, chunk) != length)
9713                 goto out;
9714
9715         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9716         for (i = 0; i < num_stripes; i++) {
9717                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9718                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9719
9720                 if (devid == devext_key.objectid &&
9721                     offset == devext_key.offset) {
9722                         found_chunk = 1;
9723                         break;
9724                 }
9725         }
9726 out:
9727         btrfs_release_path(&path);
9728         if (!found_chunk) {
9729                 error(
9730                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9731                         devext_key.objectid, devext_key.offset, length);
9732                 return REFERENCER_MISSING;
9733         }
9734         return 0;
9735 }
9736
9737 /*
9738  * Check if the used space is correct with the dev item
9739  */
9740 static int check_dev_item(struct btrfs_fs_info *fs_info,
9741                           struct extent_buffer *eb, int slot)
9742 {
9743         struct btrfs_root *dev_root = fs_info->dev_root;
9744         struct btrfs_dev_item *dev_item;
9745         struct btrfs_path path;
9746         struct btrfs_key key;
9747         struct btrfs_dev_extent *ptr;
9748         u64 dev_id;
9749         u64 used;
9750         u64 total = 0;
9751         int ret;
9752
9753         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9754         dev_id = btrfs_device_id(eb, dev_item);
9755         used = btrfs_device_bytes_used(eb, dev_item);
9756
9757         key.objectid = dev_id;
9758         key.type = BTRFS_DEV_EXTENT_KEY;
9759         key.offset = 0;
9760
9761         btrfs_init_path(&path);
9762         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9763         if (ret < 0) {
9764                 btrfs_item_key_to_cpu(eb, &key, slot);
9765                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9766                         key.objectid, key.type, key.offset);
9767                 btrfs_release_path(&path);
9768                 return REFERENCER_MISSING;
9769         }
9770
9771         /* Iterate dev_extents to calculate the used space of a device */
9772         while (1) {
9773                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9774
9775                 if (key.objectid > dev_id)
9776                         break;
9777                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9778                         goto next;
9779
9780                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9781                                      struct btrfs_dev_extent);
9782                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9783 next:
9784                 ret = btrfs_next_item(dev_root, &path);
9785                 if (ret)
9786                         break;
9787         }
9788         btrfs_release_path(&path);
9789
9790         if (used != total) {
9791                 btrfs_item_key_to_cpu(eb, &key, slot);
9792                 error(
9793 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9794                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9795                         BTRFS_DEV_EXTENT_KEY, dev_id);
9796                 return ACCOUNTING_MISMATCH;
9797         }
9798         return 0;
9799 }
9800
9801 /*
9802  * Check a block group item with its referener (chunk) and its used space
9803  * with extent/metadata item
9804  */
9805 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9806                                   struct extent_buffer *eb, int slot)
9807 {
9808         struct btrfs_root *extent_root = fs_info->extent_root;
9809         struct btrfs_root *chunk_root = fs_info->chunk_root;
9810         struct btrfs_block_group_item *bi;
9811         struct btrfs_block_group_item bg_item;
9812         struct btrfs_path path;
9813         struct btrfs_key bg_key;
9814         struct btrfs_key chunk_key;
9815         struct btrfs_key extent_key;
9816         struct btrfs_chunk *chunk;
9817         struct extent_buffer *leaf;
9818         struct btrfs_extent_item *ei;
9819         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9820         u64 flags;
9821         u64 bg_flags;
9822         u64 used;
9823         u64 total = 0;
9824         int ret;
9825         int err = 0;
9826
9827         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9828         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9829         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9830         used = btrfs_block_group_used(&bg_item);
9831         bg_flags = btrfs_block_group_flags(&bg_item);
9832
9833         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9834         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9835         chunk_key.offset = bg_key.objectid;
9836
9837         btrfs_init_path(&path);
9838         /* Search for the referencer chunk */
9839         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9840         if (ret) {
9841                 error(
9842                 "block group[%llu %llu] did not find the related chunk item",
9843                         bg_key.objectid, bg_key.offset);
9844                 err |= REFERENCER_MISSING;
9845         } else {
9846                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9847                                         struct btrfs_chunk);
9848                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9849                                                 bg_key.offset) {
9850                         error(
9851         "block group[%llu %llu] related chunk item length does not match",
9852                                 bg_key.objectid, bg_key.offset);
9853                         err |= REFERENCER_MISMATCH;
9854                 }
9855         }
9856         btrfs_release_path(&path);
9857
9858         /* Search from the block group bytenr */
9859         extent_key.objectid = bg_key.objectid;
9860         extent_key.type = 0;
9861         extent_key.offset = 0;
9862
9863         btrfs_init_path(&path);
9864         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9865         if (ret < 0)
9866                 goto out;
9867
9868         /* Iterate extent tree to account used space */
9869         while (1) {
9870                 leaf = path.nodes[0];
9871                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9872                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9873                         break;
9874
9875                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9876                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9877                         goto next;
9878                 if (extent_key.objectid < bg_key.objectid)
9879                         goto next;
9880
9881                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9882                         total += nodesize;
9883                 else
9884                         total += extent_key.offset;
9885
9886                 ei = btrfs_item_ptr(leaf, path.slots[0],
9887                                     struct btrfs_extent_item);
9888                 flags = btrfs_extent_flags(leaf, ei);
9889                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9890                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9891                                 error(
9892                         "bad extent[%llu, %llu) type mismatch with chunk",
9893                                         extent_key.objectid,
9894                                         extent_key.objectid + extent_key.offset);
9895                                 err |= CHUNK_TYPE_MISMATCH;
9896                         }
9897                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9898                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9899                                     BTRFS_BLOCK_GROUP_METADATA))) {
9900                                 error(
9901                         "bad extent[%llu, %llu) type mismatch with chunk",
9902                                         extent_key.objectid,
9903                                         extent_key.objectid + nodesize);
9904                                 err |= CHUNK_TYPE_MISMATCH;
9905                         }
9906                 }
9907 next:
9908                 ret = btrfs_next_item(extent_root, &path);
9909                 if (ret)
9910                         break;
9911         }
9912
9913 out:
9914         btrfs_release_path(&path);
9915
9916         if (total != used) {
9917                 error(
9918                 "block group[%llu %llu] used %llu but extent items used %llu",
9919                         bg_key.objectid, bg_key.offset, used, total);
9920                 err |= ACCOUNTING_MISMATCH;
9921         }
9922         return err;
9923 }
9924
9925 /*
9926  * Check a chunk item.
9927  * Including checking all referred dev_extents and block group
9928  */
9929 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9930                             struct extent_buffer *eb, int slot)
9931 {
9932         struct btrfs_root *extent_root = fs_info->extent_root;
9933         struct btrfs_root *dev_root = fs_info->dev_root;
9934         struct btrfs_path path;
9935         struct btrfs_key chunk_key;
9936         struct btrfs_key bg_key;
9937         struct btrfs_key devext_key;
9938         struct btrfs_chunk *chunk;
9939         struct extent_buffer *leaf;
9940         struct btrfs_block_group_item *bi;
9941         struct btrfs_block_group_item bg_item;
9942         struct btrfs_dev_extent *ptr;
9943         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9944         u64 length;
9945         u64 chunk_end;
9946         u64 type;
9947         u64 profile;
9948         int num_stripes;
9949         u64 offset;
9950         u64 objectid;
9951         int i;
9952         int ret;
9953         int err = 0;
9954
9955         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9956         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9957         length = btrfs_chunk_length(eb, chunk);
9958         chunk_end = chunk_key.offset + length;
9959         if (!IS_ALIGNED(length, sectorsize)) {
9960                 error("chunk[%llu %llu) not aligned to %u",
9961                         chunk_key.offset, chunk_end, sectorsize);
9962                 err |= BYTES_UNALIGNED;
9963                 goto out;
9964         }
9965
9966         type = btrfs_chunk_type(eb, chunk);
9967         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9968         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9969                 error("chunk[%llu %llu) has no chunk type",
9970                         chunk_key.offset, chunk_end);
9971                 err |= UNKNOWN_TYPE;
9972         }
9973         if (profile && (profile & (profile - 1))) {
9974                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9975                         chunk_key.offset, chunk_end, profile);
9976                 err |= UNKNOWN_TYPE;
9977         }
9978
9979         bg_key.objectid = chunk_key.offset;
9980         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9981         bg_key.offset = length;
9982
9983         btrfs_init_path(&path);
9984         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9985         if (ret) {
9986                 error(
9987                 "chunk[%llu %llu) did not find the related block group item",
9988                         chunk_key.offset, chunk_end);
9989                 err |= REFERENCER_MISSING;
9990         } else{
9991                 leaf = path.nodes[0];
9992                 bi = btrfs_item_ptr(leaf, path.slots[0],
9993                                     struct btrfs_block_group_item);
9994                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9995                                    sizeof(bg_item));
9996                 if (btrfs_block_group_flags(&bg_item) != type) {
9997                         error(
9998 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9999                                 chunk_key.offset, chunk_end, type,
10000                                 btrfs_block_group_flags(&bg_item));
10001                         err |= REFERENCER_MISSING;
10002                 }
10003         }
10004
10005         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10006         for (i = 0; i < num_stripes; i++) {
10007                 btrfs_release_path(&path);
10008                 btrfs_init_path(&path);
10009                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10010                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10011                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10012
10013                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10014                                         0, 0);
10015                 if (ret)
10016                         goto not_match_dev;
10017
10018                 leaf = path.nodes[0];
10019                 ptr = btrfs_item_ptr(leaf, path.slots[0],
10020                                      struct btrfs_dev_extent);
10021                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10022                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10023                 if (objectid != chunk_key.objectid ||
10024                     offset != chunk_key.offset ||
10025                     btrfs_dev_extent_length(leaf, ptr) != length)
10026                         goto not_match_dev;
10027                 continue;
10028 not_match_dev:
10029                 err |= BACKREF_MISSING;
10030                 error(
10031                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10032                         chunk_key.objectid, chunk_end, i);
10033                 continue;
10034         }
10035         btrfs_release_path(&path);
10036 out:
10037         return err;
10038 }
10039
10040 /*
10041  * Main entry function to check known items and update related accounting info
10042  */
10043 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10044 {
10045         struct btrfs_fs_info *fs_info = root->fs_info;
10046         struct btrfs_key key;
10047         int slot = 0;
10048         int type;
10049         struct btrfs_extent_data_ref *dref;
10050         int ret;
10051         int err = 0;
10052
10053 next:
10054         btrfs_item_key_to_cpu(eb, &key, slot);
10055         type = key.type;
10056
10057         switch (type) {
10058         case BTRFS_EXTENT_DATA_KEY:
10059                 ret = check_extent_data_item(root, eb, slot);
10060                 err |= ret;
10061                 break;
10062         case BTRFS_BLOCK_GROUP_ITEM_KEY:
10063                 ret = check_block_group_item(fs_info, eb, slot);
10064                 err |= ret;
10065                 break;
10066         case BTRFS_DEV_ITEM_KEY:
10067                 ret = check_dev_item(fs_info, eb, slot);
10068                 err |= ret;
10069                 break;
10070         case BTRFS_CHUNK_ITEM_KEY:
10071                 ret = check_chunk_item(fs_info, eb, slot);
10072                 err |= ret;
10073                 break;
10074         case BTRFS_DEV_EXTENT_KEY:
10075                 ret = check_dev_extent_item(fs_info, eb, slot);
10076                 err |= ret;
10077                 break;
10078         case BTRFS_EXTENT_ITEM_KEY:
10079         case BTRFS_METADATA_ITEM_KEY:
10080                 ret = check_extent_item(fs_info, eb, slot);
10081                 err |= ret;
10082                 break;
10083         case BTRFS_EXTENT_CSUM_KEY:
10084                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10085                 break;
10086         case BTRFS_TREE_BLOCK_REF_KEY:
10087                 ret = check_tree_block_backref(fs_info, key.offset,
10088                                                key.objectid, -1);
10089                 err |= ret;
10090                 break;
10091         case BTRFS_EXTENT_DATA_REF_KEY:
10092                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10093                 ret = check_extent_data_backref(fs_info,
10094                                 btrfs_extent_data_ref_root(eb, dref),
10095                                 btrfs_extent_data_ref_objectid(eb, dref),
10096                                 btrfs_extent_data_ref_offset(eb, dref),
10097                                 key.objectid, 0,
10098                                 btrfs_extent_data_ref_count(eb, dref));
10099                 err |= ret;
10100                 break;
10101         case BTRFS_SHARED_BLOCK_REF_KEY:
10102                 ret = check_shared_block_backref(fs_info, key.offset,
10103                                                  key.objectid, -1);
10104                 err |= ret;
10105                 break;
10106         case BTRFS_SHARED_DATA_REF_KEY:
10107                 ret = check_shared_data_backref(fs_info, key.offset,
10108                                                 key.objectid);
10109                 err |= ret;
10110                 break;
10111         default:
10112                 break;
10113         }
10114
10115         if (++slot < btrfs_header_nritems(eb))
10116                 goto next;
10117
10118         return err;
10119 }
10120
10121 /*
10122  * Helper function for later fs/subvol tree check.  To determine if a tree
10123  * block should be checked.
10124  * This function will ensure only the direct referencer with lowest rootid to
10125  * check a fs/subvolume tree block.
10126  *
10127  * Backref check at extent tree would detect errors like missing subvolume
10128  * tree, so we can do aggressive check to reduce duplicated checks.
10129  */
10130 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10131 {
10132         struct btrfs_root *extent_root = root->fs_info->extent_root;
10133         struct btrfs_key key;
10134         struct btrfs_path path;
10135         struct extent_buffer *leaf;
10136         int slot;
10137         struct btrfs_extent_item *ei;
10138         unsigned long ptr;
10139         unsigned long end;
10140         int type;
10141         u32 item_size;
10142         u64 offset;
10143         struct btrfs_extent_inline_ref *iref;
10144         int ret;
10145
10146         btrfs_init_path(&path);
10147         key.objectid = btrfs_header_bytenr(eb);
10148         key.type = BTRFS_METADATA_ITEM_KEY;
10149         key.offset = (u64)-1;
10150
10151         /*
10152          * Any failure in backref resolving means we can't determine
10153          * whom the tree block belongs to.
10154          * So in that case, we need to check that tree block
10155          */
10156         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10157         if (ret < 0)
10158                 goto need_check;
10159
10160         ret = btrfs_previous_extent_item(extent_root, &path,
10161                                          btrfs_header_bytenr(eb));
10162         if (ret)
10163                 goto need_check;
10164
10165         leaf = path.nodes[0];
10166         slot = path.slots[0];
10167         btrfs_item_key_to_cpu(leaf, &key, slot);
10168         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10169
10170         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10171                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10172         } else {
10173                 struct btrfs_tree_block_info *info;
10174
10175                 info = (struct btrfs_tree_block_info *)(ei + 1);
10176                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10177         }
10178
10179         item_size = btrfs_item_size_nr(leaf, slot);
10180         ptr = (unsigned long)iref;
10181         end = (unsigned long)ei + item_size;
10182         while (ptr < end) {
10183                 iref = (struct btrfs_extent_inline_ref *)ptr;
10184                 type = btrfs_extent_inline_ref_type(leaf, iref);
10185                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10186
10187                 /*
10188                  * We only check the tree block if current root is
10189                  * the lowest referencer of it.
10190                  */
10191                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10192                     offset < root->objectid) {
10193                         btrfs_release_path(&path);
10194                         return 0;
10195                 }
10196
10197                 ptr += btrfs_extent_inline_ref_size(type);
10198         }
10199         /*
10200          * Normally we should also check keyed tree block ref, but that may be
10201          * very time consuming.  Inlined ref should already make us skip a lot
10202          * of refs now.  So skip search keyed tree block ref.
10203          */
10204
10205 need_check:
10206         btrfs_release_path(&path);
10207         return 1;
10208 }
10209
10210 /*
10211  * Traversal function for tree block. We will do:
10212  * 1) Skip shared fs/subvolume tree blocks
10213  * 2) Update related bytes accounting
10214  * 3) Pre-order traversal
10215  */
10216 static int traverse_tree_block(struct btrfs_root *root,
10217                                 struct extent_buffer *node)
10218 {
10219         struct extent_buffer *eb;
10220         struct btrfs_key key;
10221         struct btrfs_key drop_key;
10222         int level;
10223         u64 nr;
10224         int i;
10225         int err = 0;
10226         int ret;
10227
10228         /*
10229          * Skip shared fs/subvolume tree block, in that case they will
10230          * be checked by referencer with lowest rootid
10231          */
10232         if (is_fstree(root->objectid) && !should_check(root, node))
10233                 return 0;
10234
10235         /* Update bytes accounting */
10236         total_btree_bytes += node->len;
10237         if (fs_root_objectid(btrfs_header_owner(node)))
10238                 total_fs_tree_bytes += node->len;
10239         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10240                 total_extent_tree_bytes += node->len;
10241         if (!found_old_backref &&
10242             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10243             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10244             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10245                 found_old_backref = 1;
10246
10247         /* pre-order tranversal, check itself first */
10248         level = btrfs_header_level(node);
10249         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10250                                    btrfs_header_level(node),
10251                                    btrfs_header_owner(node));
10252         err |= ret;
10253         if (err)
10254                 error(
10255         "check %s failed root %llu bytenr %llu level %d, force continue check",
10256                         level ? "node":"leaf", root->objectid,
10257                         btrfs_header_bytenr(node), btrfs_header_level(node));
10258
10259         if (!level) {
10260                 btree_space_waste += btrfs_leaf_free_space(root, node);
10261                 ret = check_leaf_items(root, node);
10262                 err |= ret;
10263                 return err;
10264         }
10265
10266         nr = btrfs_header_nritems(node);
10267         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10268         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10269                 sizeof(struct btrfs_key_ptr);
10270
10271         /* Then check all its children */
10272         for (i = 0; i < nr; i++) {
10273                 u64 blocknr = btrfs_node_blockptr(node, i);
10274
10275                 btrfs_node_key_to_cpu(node, &key, i);
10276                 if (level == root->root_item.drop_level &&
10277                     is_dropped_key(&key, &drop_key))
10278                         continue;
10279
10280                 /*
10281                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10282                  * to call the function itself.
10283                  */
10284                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10285                 if (extent_buffer_uptodate(eb)) {
10286                         ret = traverse_tree_block(root, eb);
10287                         err |= ret;
10288                 }
10289                 free_extent_buffer(eb);
10290         }
10291
10292         return err;
10293 }
10294
10295 /*
10296  * Low memory usage version check_chunks_and_extents.
10297  */
10298 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10299 {
10300         struct btrfs_path path;
10301         struct btrfs_key key;
10302         struct btrfs_root *root1;
10303         struct btrfs_root *cur_root;
10304         int err = 0;
10305         int ret;
10306
10307         root1 = root->fs_info->chunk_root;
10308         ret = traverse_tree_block(root1, root1->node);
10309         err |= ret;
10310
10311         root1 = root->fs_info->tree_root;
10312         ret = traverse_tree_block(root1, root1->node);
10313         err |= ret;
10314
10315         btrfs_init_path(&path);
10316         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10317         key.offset = 0;
10318         key.type = BTRFS_ROOT_ITEM_KEY;
10319
10320         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10321         if (ret) {
10322                 error("cannot find extent treet in tree_root");
10323                 goto out;
10324         }
10325
10326         while (1) {
10327                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10328                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10329                         goto next;
10330                 key.offset = (u64)-1;
10331
10332                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10333                 if (IS_ERR(cur_root) || !cur_root) {
10334                         error("failed to read tree: %lld", key.objectid);
10335                         goto next;
10336                 }
10337
10338                 ret = traverse_tree_block(cur_root, cur_root->node);
10339                 err |= ret;
10340
10341 next:
10342                 ret = btrfs_next_item(root1, &path);
10343                 if (ret)
10344                         goto out;
10345         }
10346
10347 out:
10348         btrfs_release_path(&path);
10349         return err;
10350 }
10351
10352 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10353                            struct btrfs_root *root, int overwrite)
10354 {
10355         struct extent_buffer *c;
10356         struct extent_buffer *old = root->node;
10357         int level;
10358         int ret;
10359         struct btrfs_disk_key disk_key = {0,0,0};
10360
10361         level = 0;
10362
10363         if (overwrite) {
10364                 c = old;
10365                 extent_buffer_get(c);
10366                 goto init;
10367         }
10368         c = btrfs_alloc_free_block(trans, root,
10369                                    root->nodesize,
10370                                    root->root_key.objectid,
10371                                    &disk_key, level, 0, 0);
10372         if (IS_ERR(c)) {
10373                 c = old;
10374                 extent_buffer_get(c);
10375                 overwrite = 1;
10376         }
10377 init:
10378         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10379         btrfs_set_header_level(c, level);
10380         btrfs_set_header_bytenr(c, c->start);
10381         btrfs_set_header_generation(c, trans->transid);
10382         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10383         btrfs_set_header_owner(c, root->root_key.objectid);
10384
10385         write_extent_buffer(c, root->fs_info->fsid,
10386                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10387
10388         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10389                             btrfs_header_chunk_tree_uuid(c),
10390                             BTRFS_UUID_SIZE);
10391
10392         btrfs_mark_buffer_dirty(c);
10393         /*
10394          * this case can happen in the following case:
10395          *
10396          * 1.overwrite previous root.
10397          *
10398          * 2.reinit reloc data root, this is because we skip pin
10399          * down reloc data tree before which means we can allocate
10400          * same block bytenr here.
10401          */
10402         if (old->start == c->start) {
10403                 btrfs_set_root_generation(&root->root_item,
10404                                           trans->transid);
10405                 root->root_item.level = btrfs_header_level(root->node);
10406                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10407                                         &root->root_key, &root->root_item);
10408                 if (ret) {
10409                         free_extent_buffer(c);
10410                         return ret;
10411                 }
10412         }
10413         free_extent_buffer(old);
10414         root->node = c;
10415         add_root_to_dirty_list(root);
10416         return 0;
10417 }
10418
10419 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10420                                 struct extent_buffer *eb, int tree_root)
10421 {
10422         struct extent_buffer *tmp;
10423         struct btrfs_root_item *ri;
10424         struct btrfs_key key;
10425         u64 bytenr;
10426         u32 nodesize;
10427         int level = btrfs_header_level(eb);
10428         int nritems;
10429         int ret;
10430         int i;
10431
10432         /*
10433          * If we have pinned this block before, don't pin it again.
10434          * This can not only avoid forever loop with broken filesystem
10435          * but also give us some speedups.
10436          */
10437         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10438                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10439                 return 0;
10440
10441         btrfs_pin_extent(fs_info, eb->start, eb->len);
10442
10443         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10444         nritems = btrfs_header_nritems(eb);
10445         for (i = 0; i < nritems; i++) {
10446                 if (level == 0) {
10447                         btrfs_item_key_to_cpu(eb, &key, i);
10448                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10449                                 continue;
10450                         /* Skip the extent root and reloc roots */
10451                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10452                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10453                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10454                                 continue;
10455                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10456                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10457
10458                         /*
10459                          * If at any point we start needing the real root we
10460                          * will have to build a stump root for the root we are
10461                          * in, but for now this doesn't actually use the root so
10462                          * just pass in extent_root.
10463                          */
10464                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10465                                               nodesize, 0);
10466                         if (!extent_buffer_uptodate(tmp)) {
10467                                 fprintf(stderr, "Error reading root block\n");
10468                                 return -EIO;
10469                         }
10470                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10471                         free_extent_buffer(tmp);
10472                         if (ret)
10473                                 return ret;
10474                 } else {
10475                         bytenr = btrfs_node_blockptr(eb, i);
10476
10477                         /* If we aren't the tree root don't read the block */
10478                         if (level == 1 && !tree_root) {
10479                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10480                                 continue;
10481                         }
10482
10483                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10484                                               nodesize, 0);
10485                         if (!extent_buffer_uptodate(tmp)) {
10486                                 fprintf(stderr, "Error reading tree block\n");
10487                                 return -EIO;
10488                         }
10489                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10490                         free_extent_buffer(tmp);
10491                         if (ret)
10492                                 return ret;
10493                 }
10494         }
10495
10496         return 0;
10497 }
10498
10499 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10500 {
10501         int ret;
10502
10503         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10504         if (ret)
10505                 return ret;
10506
10507         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10508 }
10509
10510 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10511 {
10512         struct btrfs_block_group_cache *cache;
10513         struct btrfs_path path;
10514         struct extent_buffer *leaf;
10515         struct btrfs_chunk *chunk;
10516         struct btrfs_key key;
10517         int ret;
10518         u64 start;
10519
10520         btrfs_init_path(&path);
10521         key.objectid = 0;
10522         key.type = BTRFS_CHUNK_ITEM_KEY;
10523         key.offset = 0;
10524         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10525         if (ret < 0) {
10526                 btrfs_release_path(&path);
10527                 return ret;
10528         }
10529
10530         /*
10531          * We do this in case the block groups were screwed up and had alloc
10532          * bits that aren't actually set on the chunks.  This happens with
10533          * restored images every time and could happen in real life I guess.
10534          */
10535         fs_info->avail_data_alloc_bits = 0;
10536         fs_info->avail_metadata_alloc_bits = 0;
10537         fs_info->avail_system_alloc_bits = 0;
10538
10539         /* First we need to create the in-memory block groups */
10540         while (1) {
10541                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10542                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10543                         if (ret < 0) {
10544                                 btrfs_release_path(&path);
10545                                 return ret;
10546                         }
10547                         if (ret) {
10548                                 ret = 0;
10549                                 break;
10550                         }
10551                 }
10552                 leaf = path.nodes[0];
10553                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10554                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10555                         path.slots[0]++;
10556                         continue;
10557                 }
10558
10559                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10560                 btrfs_add_block_group(fs_info, 0,
10561                                       btrfs_chunk_type(leaf, chunk),
10562                                       key.objectid, key.offset,
10563                                       btrfs_chunk_length(leaf, chunk));
10564                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10565                                  key.offset + btrfs_chunk_length(leaf, chunk),
10566                                  GFP_NOFS);
10567                 path.slots[0]++;
10568         }
10569         start = 0;
10570         while (1) {
10571                 cache = btrfs_lookup_first_block_group(fs_info, start);
10572                 if (!cache)
10573                         break;
10574                 cache->cached = 1;
10575                 start = cache->key.objectid + cache->key.offset;
10576         }
10577
10578         btrfs_release_path(&path);
10579         return 0;
10580 }
10581
10582 static int reset_balance(struct btrfs_trans_handle *trans,
10583                          struct btrfs_fs_info *fs_info)
10584 {
10585         struct btrfs_root *root = fs_info->tree_root;
10586         struct btrfs_path path;
10587         struct extent_buffer *leaf;
10588         struct btrfs_key key;
10589         int del_slot, del_nr = 0;
10590         int ret;
10591         int found = 0;
10592
10593         btrfs_init_path(&path);
10594         key.objectid = BTRFS_BALANCE_OBJECTID;
10595         key.type = BTRFS_BALANCE_ITEM_KEY;
10596         key.offset = 0;
10597         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10598         if (ret) {
10599                 if (ret > 0)
10600                         ret = 0;
10601                 if (!ret)
10602                         goto reinit_data_reloc;
10603                 else
10604                         goto out;
10605         }
10606
10607         ret = btrfs_del_item(trans, root, &path);
10608         if (ret)
10609                 goto out;
10610         btrfs_release_path(&path);
10611
10612         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10613         key.type = BTRFS_ROOT_ITEM_KEY;
10614         key.offset = 0;
10615         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10616         if (ret < 0)
10617                 goto out;
10618         while (1) {
10619                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10620                         if (!found)
10621                                 break;
10622
10623                         if (del_nr) {
10624                                 ret = btrfs_del_items(trans, root, &path,
10625                                                       del_slot, del_nr);
10626                                 del_nr = 0;
10627                                 if (ret)
10628                                         goto out;
10629                         }
10630                         key.offset++;
10631                         btrfs_release_path(&path);
10632
10633                         found = 0;
10634                         ret = btrfs_search_slot(trans, root, &key, &path,
10635                                                 -1, 1);
10636                         if (ret < 0)
10637                                 goto out;
10638                         continue;
10639                 }
10640                 found = 1;
10641                 leaf = path.nodes[0];
10642                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10643                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10644                         break;
10645                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10646                         path.slots[0]++;
10647                         continue;
10648                 }
10649                 if (!del_nr) {
10650                         del_slot = path.slots[0];
10651                         del_nr = 1;
10652                 } else {
10653                         del_nr++;
10654                 }
10655                 path.slots[0]++;
10656         }
10657
10658         if (del_nr) {
10659                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
10660                 if (ret)
10661                         goto out;
10662         }
10663         btrfs_release_path(&path);
10664
10665 reinit_data_reloc:
10666         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10667         key.type = BTRFS_ROOT_ITEM_KEY;
10668         key.offset = (u64)-1;
10669         root = btrfs_read_fs_root(fs_info, &key);
10670         if (IS_ERR(root)) {
10671                 fprintf(stderr, "Error reading data reloc tree\n");
10672                 ret = PTR_ERR(root);
10673                 goto out;
10674         }
10675         record_root_in_trans(trans, root);
10676         ret = btrfs_fsck_reinit_root(trans, root, 0);
10677         if (ret)
10678                 goto out;
10679         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10680 out:
10681         btrfs_release_path(&path);
10682         return ret;
10683 }
10684
10685 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10686                               struct btrfs_fs_info *fs_info)
10687 {
10688         u64 start = 0;
10689         int ret;
10690
10691         /*
10692          * The only reason we don't do this is because right now we're just
10693          * walking the trees we find and pinning down their bytes, we don't look
10694          * at any of the leaves.  In order to do mixed groups we'd have to check
10695          * the leaves of any fs roots and pin down the bytes for any file
10696          * extents we find.  Not hard but why do it if we don't have to?
10697          */
10698         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10699                 fprintf(stderr, "We don't support re-initing the extent tree "
10700                         "for mixed block groups yet, please notify a btrfs "
10701                         "developer you want to do this so they can add this "
10702                         "functionality.\n");
10703                 return -EINVAL;
10704         }
10705
10706         /*
10707          * first we need to walk all of the trees except the extent tree and pin
10708          * down the bytes that are in use so we don't overwrite any existing
10709          * metadata.
10710          */
10711         ret = pin_metadata_blocks(fs_info);
10712         if (ret) {
10713                 fprintf(stderr, "error pinning down used bytes\n");
10714                 return ret;
10715         }
10716
10717         /*
10718          * Need to drop all the block groups since we're going to recreate all
10719          * of them again.
10720          */
10721         btrfs_free_block_groups(fs_info);
10722         ret = reset_block_groups(fs_info);
10723         if (ret) {
10724                 fprintf(stderr, "error resetting the block groups\n");
10725                 return ret;
10726         }
10727
10728         /* Ok we can allocate now, reinit the extent root */
10729         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10730         if (ret) {
10731                 fprintf(stderr, "extent root initialization failed\n");
10732                 /*
10733                  * When the transaction code is updated we should end the
10734                  * transaction, but for now progs only knows about commit so
10735                  * just return an error.
10736                  */
10737                 return ret;
10738         }
10739
10740         /*
10741          * Now we have all the in-memory block groups setup so we can make
10742          * allocations properly, and the metadata we care about is safe since we
10743          * pinned all of it above.
10744          */
10745         while (1) {
10746                 struct btrfs_block_group_cache *cache;
10747
10748                 cache = btrfs_lookup_first_block_group(fs_info, start);
10749                 if (!cache)
10750                         break;
10751                 start = cache->key.objectid + cache->key.offset;
10752                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10753                                         &cache->key, &cache->item,
10754                                         sizeof(cache->item));
10755                 if (ret) {
10756                         fprintf(stderr, "Error adding block group\n");
10757                         return ret;
10758                 }
10759                 btrfs_extent_post_op(trans, fs_info->extent_root);
10760         }
10761
10762         ret = reset_balance(trans, fs_info);
10763         if (ret)
10764                 fprintf(stderr, "error resetting the pending balance\n");
10765
10766         return ret;
10767 }
10768
10769 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10770 {
10771         struct btrfs_path path;
10772         struct btrfs_trans_handle *trans;
10773         struct btrfs_key key;
10774         int ret;
10775
10776         printf("Recowing metadata block %llu\n", eb->start);
10777         key.objectid = btrfs_header_owner(eb);
10778         key.type = BTRFS_ROOT_ITEM_KEY;
10779         key.offset = (u64)-1;
10780
10781         root = btrfs_read_fs_root(root->fs_info, &key);
10782         if (IS_ERR(root)) {
10783                 fprintf(stderr, "Couldn't find owner root %llu\n",
10784                         key.objectid);
10785                 return PTR_ERR(root);
10786         }
10787
10788         trans = btrfs_start_transaction(root, 1);
10789         if (IS_ERR(trans))
10790                 return PTR_ERR(trans);
10791
10792         btrfs_init_path(&path);
10793         path.lowest_level = btrfs_header_level(eb);
10794         if (path.lowest_level)
10795                 btrfs_node_key_to_cpu(eb, &key, 0);
10796         else
10797                 btrfs_item_key_to_cpu(eb, &key, 0);
10798
10799         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10800         btrfs_commit_transaction(trans, root);
10801         btrfs_release_path(&path);
10802         return ret;
10803 }
10804
10805 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10806 {
10807         struct btrfs_path path;
10808         struct btrfs_trans_handle *trans;
10809         struct btrfs_key key;
10810         int ret;
10811
10812         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10813                bad->key.type, bad->key.offset);
10814         key.objectid = bad->root_id;
10815         key.type = BTRFS_ROOT_ITEM_KEY;
10816         key.offset = (u64)-1;
10817
10818         root = btrfs_read_fs_root(root->fs_info, &key);
10819         if (IS_ERR(root)) {
10820                 fprintf(stderr, "Couldn't find owner root %llu\n",
10821                         key.objectid);
10822                 return PTR_ERR(root);
10823         }
10824
10825         trans = btrfs_start_transaction(root, 1);
10826         if (IS_ERR(trans))
10827                 return PTR_ERR(trans);
10828
10829         btrfs_init_path(&path);
10830         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
10831         if (ret) {
10832                 if (ret > 0)
10833                         ret = 0;
10834                 goto out;
10835         }
10836         ret = btrfs_del_item(trans, root, &path);
10837 out:
10838         btrfs_commit_transaction(trans, root);
10839         btrfs_release_path(&path);
10840         return ret;
10841 }
10842
10843 static int zero_log_tree(struct btrfs_root *root)
10844 {
10845         struct btrfs_trans_handle *trans;
10846         int ret;
10847
10848         trans = btrfs_start_transaction(root, 1);
10849         if (IS_ERR(trans)) {
10850                 ret = PTR_ERR(trans);
10851                 return ret;
10852         }
10853         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10854         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10855         ret = btrfs_commit_transaction(trans, root);
10856         return ret;
10857 }
10858
10859 static int populate_csum(struct btrfs_trans_handle *trans,
10860                          struct btrfs_root *csum_root, char *buf, u64 start,
10861                          u64 len)
10862 {
10863         u64 offset = 0;
10864         u64 sectorsize;
10865         int ret = 0;
10866
10867         while (offset < len) {
10868                 sectorsize = csum_root->sectorsize;
10869                 ret = read_extent_data(csum_root, buf, start + offset,
10870                                        &sectorsize, 0);
10871                 if (ret)
10872                         break;
10873                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10874                                             start + offset, buf, sectorsize);
10875                 if (ret)
10876                         break;
10877                 offset += sectorsize;
10878         }
10879         return ret;
10880 }
10881
10882 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10883                                       struct btrfs_root *csum_root,
10884                                       struct btrfs_root *cur_root)
10885 {
10886         struct btrfs_path path;
10887         struct btrfs_key key;
10888         struct extent_buffer *node;
10889         struct btrfs_file_extent_item *fi;
10890         char *buf = NULL;
10891         u64 start = 0;
10892         u64 len = 0;
10893         int slot = 0;
10894         int ret = 0;
10895
10896         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10897         if (!buf)
10898                 return -ENOMEM;
10899
10900         btrfs_init_path(&path);
10901         key.objectid = 0;
10902         key.offset = 0;
10903         key.type = 0;
10904         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
10905         if (ret < 0)
10906                 goto out;
10907         /* Iterate all regular file extents and fill its csum */
10908         while (1) {
10909                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10910
10911                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10912                         goto next;
10913                 node = path.nodes[0];
10914                 slot = path.slots[0];
10915                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10916                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10917                         goto next;
10918                 start = btrfs_file_extent_disk_bytenr(node, fi);
10919                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10920
10921                 ret = populate_csum(trans, csum_root, buf, start, len);
10922                 if (ret == -EEXIST)
10923                         ret = 0;
10924                 if (ret < 0)
10925                         goto out;
10926 next:
10927                 /*
10928                  * TODO: if next leaf is corrupted, jump to nearest next valid
10929                  * leaf.
10930                  */
10931                 ret = btrfs_next_item(cur_root, &path);
10932                 if (ret < 0)
10933                         goto out;
10934                 if (ret > 0) {
10935                         ret = 0;
10936                         goto out;
10937                 }
10938         }
10939
10940 out:
10941         btrfs_release_path(&path);
10942         free(buf);
10943         return ret;
10944 }
10945
10946 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10947                                   struct btrfs_root *csum_root)
10948 {
10949         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10950         struct btrfs_path path;
10951         struct btrfs_root *tree_root = fs_info->tree_root;
10952         struct btrfs_root *cur_root;
10953         struct extent_buffer *node;
10954         struct btrfs_key key;
10955         int slot = 0;
10956         int ret = 0;
10957
10958         btrfs_init_path(&path);
10959         key.objectid = BTRFS_FS_TREE_OBJECTID;
10960         key.offset = 0;
10961         key.type = BTRFS_ROOT_ITEM_KEY;
10962         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
10963         if (ret < 0)
10964                 goto out;
10965         if (ret > 0) {
10966                 ret = -ENOENT;
10967                 goto out;
10968         }
10969
10970         while (1) {
10971                 node = path.nodes[0];
10972                 slot = path.slots[0];
10973                 btrfs_item_key_to_cpu(node, &key, slot);
10974                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10975                         goto out;
10976                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10977                         goto next;
10978                 if (!is_fstree(key.objectid))
10979                         goto next;
10980                 key.offset = (u64)-1;
10981
10982                 cur_root = btrfs_read_fs_root(fs_info, &key);
10983                 if (IS_ERR(cur_root) || !cur_root) {
10984                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10985                                 key.objectid);
10986                         goto out;
10987                 }
10988                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10989                                 cur_root);
10990                 if (ret < 0)
10991                         goto out;
10992 next:
10993                 ret = btrfs_next_item(tree_root, &path);
10994                 if (ret > 0) {
10995                         ret = 0;
10996                         goto out;
10997                 }
10998                 if (ret < 0)
10999                         goto out;
11000         }
11001
11002 out:
11003         btrfs_release_path(&path);
11004         return ret;
11005 }
11006
11007 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11008                                       struct btrfs_root *csum_root)
11009 {
11010         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11011         struct btrfs_path path;
11012         struct btrfs_extent_item *ei;
11013         struct extent_buffer *leaf;
11014         char *buf;
11015         struct btrfs_key key;
11016         int ret;
11017
11018         btrfs_init_path(&path);
11019         key.objectid = 0;
11020         key.type = BTRFS_EXTENT_ITEM_KEY;
11021         key.offset = 0;
11022         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11023         if (ret < 0) {
11024                 btrfs_release_path(&path);
11025                 return ret;
11026         }
11027
11028         buf = malloc(csum_root->sectorsize);
11029         if (!buf) {
11030                 btrfs_release_path(&path);
11031                 return -ENOMEM;
11032         }
11033
11034         while (1) {
11035                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11036                         ret = btrfs_next_leaf(extent_root, &path);
11037                         if (ret < 0)
11038                                 break;
11039                         if (ret) {
11040                                 ret = 0;
11041                                 break;
11042                         }
11043                 }
11044                 leaf = path.nodes[0];
11045
11046                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11047                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11048                         path.slots[0]++;
11049                         continue;
11050                 }
11051
11052                 ei = btrfs_item_ptr(leaf, path.slots[0],
11053                                     struct btrfs_extent_item);
11054                 if (!(btrfs_extent_flags(leaf, ei) &
11055                       BTRFS_EXTENT_FLAG_DATA)) {
11056                         path.slots[0]++;
11057                         continue;
11058                 }
11059
11060                 ret = populate_csum(trans, csum_root, buf, key.objectid,
11061                                     key.offset);
11062                 if (ret)
11063                         break;
11064                 path.slots[0]++;
11065         }
11066
11067         btrfs_release_path(&path);
11068         free(buf);
11069         return ret;
11070 }
11071
11072 /*
11073  * Recalculate the csum and put it into the csum tree.
11074  *
11075  * Extent tree init will wipe out all the extent info, so in that case, we
11076  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
11077  * will use fs/subvol trees to init the csum tree.
11078  */
11079 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11080                           struct btrfs_root *csum_root,
11081                           int search_fs_tree)
11082 {
11083         if (search_fs_tree)
11084                 return fill_csum_tree_from_fs(trans, csum_root);
11085         else
11086                 return fill_csum_tree_from_extent(trans, csum_root);
11087 }
11088
11089 static void free_roots_info_cache(void)
11090 {
11091         if (!roots_info_cache)
11092                 return;
11093
11094         while (!cache_tree_empty(roots_info_cache)) {
11095                 struct cache_extent *entry;
11096                 struct root_item_info *rii;
11097
11098                 entry = first_cache_extent(roots_info_cache);
11099                 if (!entry)
11100                         break;
11101                 remove_cache_extent(roots_info_cache, entry);
11102                 rii = container_of(entry, struct root_item_info, cache_extent);
11103                 free(rii);
11104         }
11105
11106         free(roots_info_cache);
11107         roots_info_cache = NULL;
11108 }
11109
11110 static int build_roots_info_cache(struct btrfs_fs_info *info)
11111 {
11112         int ret = 0;
11113         struct btrfs_key key;
11114         struct extent_buffer *leaf;
11115         struct btrfs_path path;
11116
11117         if (!roots_info_cache) {
11118                 roots_info_cache = malloc(sizeof(*roots_info_cache));
11119                 if (!roots_info_cache)
11120                         return -ENOMEM;
11121                 cache_tree_init(roots_info_cache);
11122         }
11123
11124         btrfs_init_path(&path);
11125         key.objectid = 0;
11126         key.type = BTRFS_EXTENT_ITEM_KEY;
11127         key.offset = 0;
11128         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11129         if (ret < 0)
11130                 goto out;
11131         leaf = path.nodes[0];
11132
11133         while (1) {
11134                 struct btrfs_key found_key;
11135                 struct btrfs_extent_item *ei;
11136                 struct btrfs_extent_inline_ref *iref;
11137                 int slot = path.slots[0];
11138                 int type;
11139                 u64 flags;
11140                 u64 root_id;
11141                 u8 level;
11142                 struct cache_extent *entry;
11143                 struct root_item_info *rii;
11144
11145                 if (slot >= btrfs_header_nritems(leaf)) {
11146                         ret = btrfs_next_leaf(info->extent_root, &path);
11147                         if (ret < 0) {
11148                                 break;
11149                         } else if (ret) {
11150                                 ret = 0;
11151                                 break;
11152                         }
11153                         leaf = path.nodes[0];
11154                         slot = path.slots[0];
11155                 }
11156
11157                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11158
11159                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11160                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11161                         goto next;
11162
11163                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11164                 flags = btrfs_extent_flags(leaf, ei);
11165
11166                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11167                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11168                         goto next;
11169
11170                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11171                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11172                         level = found_key.offset;
11173                 } else {
11174                         struct btrfs_tree_block_info *binfo;
11175
11176                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11177                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11178                         level = btrfs_tree_block_level(leaf, binfo);
11179                 }
11180
11181                 /*
11182                  * For a root extent, it must be of the following type and the
11183                  * first (and only one) iref in the item.
11184                  */
11185                 type = btrfs_extent_inline_ref_type(leaf, iref);
11186                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11187                         goto next;
11188
11189                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11190                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11191                 if (!entry) {
11192                         rii = malloc(sizeof(struct root_item_info));
11193                         if (!rii) {
11194                                 ret = -ENOMEM;
11195                                 goto out;
11196                         }
11197                         rii->cache_extent.start = root_id;
11198                         rii->cache_extent.size = 1;
11199                         rii->level = (u8)-1;
11200                         entry = &rii->cache_extent;
11201                         ret = insert_cache_extent(roots_info_cache, entry);
11202                         ASSERT(ret == 0);
11203                 } else {
11204                         rii = container_of(entry, struct root_item_info,
11205                                            cache_extent);
11206                 }
11207
11208                 ASSERT(rii->cache_extent.start == root_id);
11209                 ASSERT(rii->cache_extent.size == 1);
11210
11211                 if (level > rii->level || rii->level == (u8)-1) {
11212                         rii->level = level;
11213                         rii->bytenr = found_key.objectid;
11214                         rii->gen = btrfs_extent_generation(leaf, ei);
11215                         rii->node_count = 1;
11216                 } else if (level == rii->level) {
11217                         rii->node_count++;
11218                 }
11219 next:
11220                 path.slots[0]++;
11221         }
11222
11223 out:
11224         btrfs_release_path(&path);
11225
11226         return ret;
11227 }
11228
11229 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11230                                   struct btrfs_path *path,
11231                                   const struct btrfs_key *root_key,
11232                                   const int read_only_mode)
11233 {
11234         const u64 root_id = root_key->objectid;
11235         struct cache_extent *entry;
11236         struct root_item_info *rii;
11237         struct btrfs_root_item ri;
11238         unsigned long offset;
11239
11240         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11241         if (!entry) {
11242                 fprintf(stderr,
11243                         "Error: could not find extent items for root %llu\n",
11244                         root_key->objectid);
11245                 return -ENOENT;
11246         }
11247
11248         rii = container_of(entry, struct root_item_info, cache_extent);
11249         ASSERT(rii->cache_extent.start == root_id);
11250         ASSERT(rii->cache_extent.size == 1);
11251
11252         if (rii->node_count != 1) {
11253                 fprintf(stderr,
11254                         "Error: could not find btree root extent for root %llu\n",
11255                         root_id);
11256                 return -ENOENT;
11257         }
11258
11259         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11260         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11261
11262         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11263             btrfs_root_level(&ri) != rii->level ||
11264             btrfs_root_generation(&ri) != rii->gen) {
11265
11266                 /*
11267                  * If we're in repair mode but our caller told us to not update
11268                  * the root item, i.e. just check if it needs to be updated, don't
11269                  * print this message, since the caller will call us again shortly
11270                  * for the same root item without read only mode (the caller will
11271                  * open a transaction first).
11272                  */
11273                 if (!(read_only_mode && repair))
11274                         fprintf(stderr,
11275                                 "%sroot item for root %llu,"
11276                                 " current bytenr %llu, current gen %llu, current level %u,"
11277                                 " new bytenr %llu, new gen %llu, new level %u\n",
11278                                 (read_only_mode ? "" : "fixing "),
11279                                 root_id,
11280                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11281                                 btrfs_root_level(&ri),
11282                                 rii->bytenr, rii->gen, rii->level);
11283
11284                 if (btrfs_root_generation(&ri) > rii->gen) {
11285                         fprintf(stderr,
11286                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11287                                 root_id, btrfs_root_generation(&ri), rii->gen);
11288                         return -EINVAL;
11289                 }
11290
11291                 if (!read_only_mode) {
11292                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11293                         btrfs_set_root_level(&ri, rii->level);
11294                         btrfs_set_root_generation(&ri, rii->gen);
11295                         write_extent_buffer(path->nodes[0], &ri,
11296                                             offset, sizeof(ri));
11297                 }
11298
11299                 return 1;
11300         }
11301
11302         return 0;
11303 }
11304
11305 /*
11306  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11307  * caused read-only snapshots to be corrupted if they were created at a moment
11308  * when the source subvolume/snapshot had orphan items. The issue was that the
11309  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11310  * node instead of the post orphan cleanup root node.
11311  * So this function, and its callees, just detects and fixes those cases. Even
11312  * though the regression was for read-only snapshots, this function applies to
11313  * any snapshot/subvolume root.
11314  * This must be run before any other repair code - not doing it so, makes other
11315  * repair code delete or modify backrefs in the extent tree for example, which
11316  * will result in an inconsistent fs after repairing the root items.
11317  */
11318 static int repair_root_items(struct btrfs_fs_info *info)
11319 {
11320         struct btrfs_path path;
11321         struct btrfs_key key;
11322         struct extent_buffer *leaf;
11323         struct btrfs_trans_handle *trans = NULL;
11324         int ret = 0;
11325         int bad_roots = 0;
11326         int need_trans = 0;
11327
11328         btrfs_init_path(&path);
11329
11330         ret = build_roots_info_cache(info);
11331         if (ret)
11332                 goto out;
11333
11334         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11335         key.type = BTRFS_ROOT_ITEM_KEY;
11336         key.offset = 0;
11337
11338 again:
11339         /*
11340          * Avoid opening and committing transactions if a leaf doesn't have
11341          * any root items that need to be fixed, so that we avoid rotating
11342          * backup roots unnecessarily.
11343          */
11344         if (need_trans) {
11345                 trans = btrfs_start_transaction(info->tree_root, 1);
11346                 if (IS_ERR(trans)) {
11347                         ret = PTR_ERR(trans);
11348                         goto out;
11349                 }
11350         }
11351
11352         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11353                                 0, trans ? 1 : 0);
11354         if (ret < 0)
11355                 goto out;
11356         leaf = path.nodes[0];
11357
11358         while (1) {
11359                 struct btrfs_key found_key;
11360
11361                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11362                         int no_more_keys = find_next_key(&path, &key);
11363
11364                         btrfs_release_path(&path);
11365                         if (trans) {
11366                                 ret = btrfs_commit_transaction(trans,
11367                                                                info->tree_root);
11368                                 trans = NULL;
11369                                 if (ret < 0)
11370                                         goto out;
11371                         }
11372                         need_trans = 0;
11373                         if (no_more_keys)
11374                                 break;
11375                         goto again;
11376                 }
11377
11378                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11379
11380                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11381                         goto next;
11382                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11383                         goto next;
11384
11385                 ret = maybe_repair_root_item(info, &path, &found_key,
11386                                              trans ? 0 : 1);
11387                 if (ret < 0)
11388                         goto out;
11389                 if (ret) {
11390                         if (!trans && repair) {
11391                                 need_trans = 1;
11392                                 key = found_key;
11393                                 btrfs_release_path(&path);
11394                                 goto again;
11395                         }
11396                         bad_roots++;
11397                 }
11398 next:
11399                 path.slots[0]++;
11400         }
11401         ret = 0;
11402 out:
11403         free_roots_info_cache();
11404         btrfs_release_path(&path);
11405         if (trans)
11406                 btrfs_commit_transaction(trans, info->tree_root);
11407         if (ret < 0)
11408                 return ret;
11409
11410         return bad_roots;
11411 }
11412
11413 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11414 {
11415         struct btrfs_trans_handle *trans;
11416         struct btrfs_block_group_cache *bg_cache;
11417         u64 current = 0;
11418         int ret = 0;
11419
11420         /* Clear all free space cache inodes and its extent data */
11421         while (1) {
11422                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11423                 if (!bg_cache)
11424                         break;
11425                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11426                 if (ret < 0)
11427                         return ret;
11428                 current = bg_cache->key.objectid + bg_cache->key.offset;
11429         }
11430
11431         /* Don't forget to set cache_generation to -1 */
11432         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11433         if (IS_ERR(trans)) {
11434                 error("failed to update super block cache generation");
11435                 return PTR_ERR(trans);
11436         }
11437         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11438         btrfs_commit_transaction(trans, fs_info->tree_root);
11439
11440         return ret;
11441 }
11442
11443 const char * const cmd_check_usage[] = {
11444         "btrfs check [options] <device>",
11445         "Check structural integrity of a filesystem (unmounted).",
11446         "Check structural integrity of an unmounted filesystem. Verify internal",
11447         "trees' consistency and item connectivity. In the repair mode try to",
11448         "fix the problems found. ",
11449         "WARNING: the repair mode is considered dangerous",
11450         "",
11451         "-s|--super <superblock>     use this superblock copy",
11452         "-b|--backup                 use the first valid backup root copy",
11453         "--repair                    try to repair the filesystem",
11454         "--readonly                  run in read-only mode (default)",
11455         "--init-csum-tree            create a new CRC tree",
11456         "--init-extent-tree          create a new extent tree",
11457         "--mode <MODE>               allows choice of memory/IO trade-offs",
11458         "                            where MODE is one of:",
11459         "                            original - read inodes and extents to memory (requires",
11460         "                                       more memory, does less IO)",
11461         "                            lowmem   - try to use less memory but read blocks again",
11462         "                                       when needed",
11463         "--check-data-csum           verify checksums of data blocks",
11464         "-Q|--qgroup-report          print a report on qgroup consistency",
11465         "-E|--subvol-extents <subvolid>",
11466         "                            print subvolume extents and sharing state",
11467         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11468         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11469         "-p|--progress               indicate progress",
11470         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11471         NULL
11472 };
11473
11474 int cmd_check(int argc, char **argv)
11475 {
11476         struct cache_tree root_cache;
11477         struct btrfs_root *root;
11478         struct btrfs_fs_info *info;
11479         u64 bytenr = 0;
11480         u64 subvolid = 0;
11481         u64 tree_root_bytenr = 0;
11482         u64 chunk_root_bytenr = 0;
11483         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11484         int ret;
11485         u64 num;
11486         int init_csum_tree = 0;
11487         int readonly = 0;
11488         int clear_space_cache = 0;
11489         int qgroup_report = 0;
11490         int qgroups_repaired = 0;
11491         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11492
11493         while(1) {
11494                 int c;
11495                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11496                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11497                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11498                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11499                 static const struct option long_options[] = {
11500                         { "super", required_argument, NULL, 's' },
11501                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11502                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11503                         { "init-csum-tree", no_argument, NULL,
11504                                 GETOPT_VAL_INIT_CSUM },
11505                         { "init-extent-tree", no_argument, NULL,
11506                                 GETOPT_VAL_INIT_EXTENT },
11507                         { "check-data-csum", no_argument, NULL,
11508                                 GETOPT_VAL_CHECK_CSUM },
11509                         { "backup", no_argument, NULL, 'b' },
11510                         { "subvol-extents", required_argument, NULL, 'E' },
11511                         { "qgroup-report", no_argument, NULL, 'Q' },
11512                         { "tree-root", required_argument, NULL, 'r' },
11513                         { "chunk-root", required_argument, NULL,
11514                                 GETOPT_VAL_CHUNK_TREE },
11515                         { "progress", no_argument, NULL, 'p' },
11516                         { "mode", required_argument, NULL,
11517                                 GETOPT_VAL_MODE },
11518                         { "clear-space-cache", required_argument, NULL,
11519                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11520                         { NULL, 0, NULL, 0}
11521                 };
11522
11523                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11524                 if (c < 0)
11525                         break;
11526                 switch(c) {
11527                         case 'a': /* ignored */ break;
11528                         case 'b':
11529                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11530                                 break;
11531                         case 's':
11532                                 num = arg_strtou64(optarg);
11533                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11534                                         error(
11535                                         "super mirror should be less than %d",
11536                                                 BTRFS_SUPER_MIRROR_MAX);
11537                                         exit(1);
11538                                 }
11539                                 bytenr = btrfs_sb_offset(((int)num));
11540                                 printf("using SB copy %llu, bytenr %llu\n", num,
11541                                        (unsigned long long)bytenr);
11542                                 break;
11543                         case 'Q':
11544                                 qgroup_report = 1;
11545                                 break;
11546                         case 'E':
11547                                 subvolid = arg_strtou64(optarg);
11548                                 break;
11549                         case 'r':
11550                                 tree_root_bytenr = arg_strtou64(optarg);
11551                                 break;
11552                         case GETOPT_VAL_CHUNK_TREE:
11553                                 chunk_root_bytenr = arg_strtou64(optarg);
11554                                 break;
11555                         case 'p':
11556                                 ctx.progress_enabled = true;
11557                                 break;
11558                         case '?':
11559                         case 'h':
11560                                 usage(cmd_check_usage);
11561                         case GETOPT_VAL_REPAIR:
11562                                 printf("enabling repair mode\n");
11563                                 repair = 1;
11564                                 ctree_flags |= OPEN_CTREE_WRITES;
11565                                 break;
11566                         case GETOPT_VAL_READONLY:
11567                                 readonly = 1;
11568                                 break;
11569                         case GETOPT_VAL_INIT_CSUM:
11570                                 printf("Creating a new CRC tree\n");
11571                                 init_csum_tree = 1;
11572                                 repair = 1;
11573                                 ctree_flags |= OPEN_CTREE_WRITES;
11574                                 break;
11575                         case GETOPT_VAL_INIT_EXTENT:
11576                                 init_extent_tree = 1;
11577                                 ctree_flags |= (OPEN_CTREE_WRITES |
11578                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11579                                 repair = 1;
11580                                 break;
11581                         case GETOPT_VAL_CHECK_CSUM:
11582                                 check_data_csum = 1;
11583                                 break;
11584                         case GETOPT_VAL_MODE:
11585                                 check_mode = parse_check_mode(optarg);
11586                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11587                                         error("unknown mode: %s", optarg);
11588                                         exit(1);
11589                                 }
11590                                 break;
11591                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11592                                 if (strcmp(optarg, "v1") == 0) {
11593                                         clear_space_cache = 1;
11594                                 } else if (strcmp(optarg, "v2") == 0) {
11595                                         clear_space_cache = 2;
11596                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11597                                 } else {
11598                                         error(
11599                 "invalid argument to --clear-space-cache, must be v1 or v2");
11600                                         exit(1);
11601                                 }
11602                                 ctree_flags |= OPEN_CTREE_WRITES;
11603                                 break;
11604                 }
11605         }
11606
11607         if (check_argc_exact(argc - optind, 1))
11608                 usage(cmd_check_usage);
11609
11610         if (ctx.progress_enabled) {
11611                 ctx.tp = TASK_NOTHING;
11612                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11613         }
11614
11615         /* This check is the only reason for --readonly to exist */
11616         if (readonly && repair) {
11617                 error("repair options are not compatible with --readonly");
11618                 exit(1);
11619         }
11620
11621         /*
11622          * Not supported yet
11623          */
11624         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11625                 error("low memory mode doesn't support repair yet");
11626                 exit(1);
11627         }
11628
11629         radix_tree_init();
11630         cache_tree_init(&root_cache);
11631
11632         if((ret = check_mounted(argv[optind])) < 0) {
11633                 error("could not check mount status: %s", strerror(-ret));
11634                 goto err_out;
11635         } else if(ret) {
11636                 error("%s is currently mounted, aborting", argv[optind]);
11637                 ret = -EBUSY;
11638                 goto err_out;
11639         }
11640
11641         /* only allow partial opening under repair mode */
11642         if (repair)
11643                 ctree_flags |= OPEN_CTREE_PARTIAL;
11644
11645         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11646                                   chunk_root_bytenr, ctree_flags);
11647         if (!info) {
11648                 error("cannot open file system");
11649                 ret = -EIO;
11650                 goto err_out;
11651         }
11652
11653         global_info = info;
11654         root = info->fs_root;
11655         if (clear_space_cache == 1) {
11656                 if (btrfs_fs_compat_ro(info,
11657                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11658                         error(
11659                 "free space cache v2 detected, use --clear-space-cache v2");
11660                         ret = 1;
11661                         goto close_out;
11662                 }
11663                 printf("Clearing free space cache\n");
11664                 ret = clear_free_space_cache(info);
11665                 if (ret) {
11666                         error("failed to clear free space cache");
11667                         ret = 1;
11668                 } else {
11669                         printf("Free space cache cleared\n");
11670                 }
11671                 goto close_out;
11672         } else if (clear_space_cache == 2) {
11673                 if (!btrfs_fs_compat_ro(info,
11674                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11675                         printf("no free space cache v2 to clear\n");
11676                         ret = 0;
11677                         goto close_out;
11678                 }
11679                 printf("Clear free space cache v2\n");
11680                 ret = btrfs_clear_free_space_tree(info);
11681                 if (ret) {
11682                         error("failed to clear free space cache v2: %d", ret);
11683                         ret = 1;
11684                 } else {
11685                         printf("free space cache v2 cleared\n");
11686                 }
11687                 goto close_out;
11688         }
11689
11690         /*
11691          * repair mode will force us to commit transaction which
11692          * will make us fail to load log tree when mounting.
11693          */
11694         if (repair && btrfs_super_log_root(info->super_copy)) {
11695                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11696                 if (!ret) {
11697                         ret = 1;
11698                         goto close_out;
11699                 }
11700                 ret = zero_log_tree(root);
11701                 if (ret) {
11702                         error("failed to zero log tree: %d", ret);
11703                         goto close_out;
11704                 }
11705         }
11706
11707         uuid_unparse(info->super_copy->fsid, uuidbuf);
11708         if (qgroup_report) {
11709                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11710                        uuidbuf);
11711                 ret = qgroup_verify_all(info);
11712                 if (ret == 0)
11713                         report_qgroups(1);
11714                 goto close_out;
11715         }
11716         if (subvolid) {
11717                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11718                        subvolid, argv[optind], uuidbuf);
11719                 ret = print_extent_state(info, subvolid);
11720                 goto close_out;
11721         }
11722         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11723
11724         if (!extent_buffer_uptodate(info->tree_root->node) ||
11725             !extent_buffer_uptodate(info->dev_root->node) ||
11726             !extent_buffer_uptodate(info->chunk_root->node)) {
11727                 error("critical roots corrupted, unable to check the filesystem");
11728                 ret = -EIO;
11729                 goto close_out;
11730         }
11731
11732         if (init_extent_tree || init_csum_tree) {
11733                 struct btrfs_trans_handle *trans;
11734
11735                 trans = btrfs_start_transaction(info->extent_root, 0);
11736                 if (IS_ERR(trans)) {
11737                         error("error starting transaction");
11738                         ret = PTR_ERR(trans);
11739                         goto close_out;
11740                 }
11741
11742                 if (init_extent_tree) {
11743                         printf("Creating a new extent tree\n");
11744                         ret = reinit_extent_tree(trans, info);
11745                         if (ret)
11746                                 goto close_out;
11747                 }
11748
11749                 if (init_csum_tree) {
11750                         printf("Reinitialize checksum tree\n");
11751                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11752                         if (ret) {
11753                                 error("checksum tree initialization failed: %d",
11754                                                 ret);
11755                                 ret = -EIO;
11756                                 goto close_out;
11757                         }
11758
11759                         ret = fill_csum_tree(trans, info->csum_root,
11760                                              init_extent_tree);
11761                         if (ret) {
11762                                 error("checksum tree refilling failed: %d", ret);
11763                                 return -EIO;
11764                         }
11765                 }
11766                 /*
11767                  * Ok now we commit and run the normal fsck, which will add
11768                  * extent entries for all of the items it finds.
11769                  */
11770                 ret = btrfs_commit_transaction(trans, info->extent_root);
11771                 if (ret)
11772                         goto close_out;
11773         }
11774         if (!extent_buffer_uptodate(info->extent_root->node)) {
11775                 error("critical: extent_root, unable to check the filesystem");
11776                 ret = -EIO;
11777                 goto close_out;
11778         }
11779         if (!extent_buffer_uptodate(info->csum_root->node)) {
11780                 error("critical: csum_root, unable to check the filesystem");
11781                 ret = -EIO;
11782                 goto close_out;
11783         }
11784
11785         if (!ctx.progress_enabled)
11786                 fprintf(stderr, "checking extents\n");
11787         if (check_mode == CHECK_MODE_LOWMEM)
11788                 ret = check_chunks_and_extents_v2(root);
11789         else
11790                 ret = check_chunks_and_extents(root);
11791         if (ret)
11792                 error(
11793                 "errors found in extent allocation tree or chunk allocation");
11794
11795         ret = repair_root_items(info);
11796         if (ret < 0)
11797                 goto close_out;
11798         if (repair) {
11799                 fprintf(stderr, "Fixed %d roots.\n", ret);
11800                 ret = 0;
11801         } else if (ret > 0) {
11802                 fprintf(stderr,
11803                        "Found %d roots with an outdated root item.\n",
11804                        ret);
11805                 fprintf(stderr,
11806                         "Please run a filesystem check with the option --repair to fix them.\n");
11807                 ret = 1;
11808                 goto close_out;
11809         }
11810
11811         if (!ctx.progress_enabled) {
11812                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11813                         fprintf(stderr, "checking free space tree\n");
11814                 else
11815                         fprintf(stderr, "checking free space cache\n");
11816         }
11817         ret = check_space_cache(root);
11818         if (ret)
11819                 goto out;
11820
11821         /*
11822          * We used to have to have these hole extents in between our real
11823          * extents so if we don't have this flag set we need to make sure there
11824          * are no gaps in the file extents for inodes, otherwise we can just
11825          * ignore it when this happens.
11826          */
11827         no_holes = btrfs_fs_incompat(root->fs_info,
11828                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11829         if (!ctx.progress_enabled)
11830                 fprintf(stderr, "checking fs roots\n");
11831         ret = check_fs_roots(root, &root_cache);
11832         if (ret)
11833                 goto out;
11834
11835         fprintf(stderr, "checking csums\n");
11836         ret = check_csums(root);
11837         if (ret)
11838                 goto out;
11839
11840         fprintf(stderr, "checking root refs\n");
11841         ret = check_root_refs(root, &root_cache);
11842         if (ret)
11843                 goto out;
11844
11845         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11846                 struct extent_buffer *eb;
11847
11848                 eb = list_first_entry(&root->fs_info->recow_ebs,
11849                                       struct extent_buffer, recow);
11850                 list_del_init(&eb->recow);
11851                 ret = recow_extent_buffer(root, eb);
11852                 if (ret)
11853                         break;
11854         }
11855
11856         while (!list_empty(&delete_items)) {
11857                 struct bad_item *bad;
11858
11859                 bad = list_first_entry(&delete_items, struct bad_item, list);
11860                 list_del_init(&bad->list);
11861                 if (repair)
11862                         ret = delete_bad_item(root, bad);
11863                 free(bad);
11864         }
11865
11866         if (info->quota_enabled) {
11867                 int err;
11868                 fprintf(stderr, "checking quota groups\n");
11869                 err = qgroup_verify_all(info);
11870                 if (err)
11871                         goto out;
11872                 report_qgroups(0);
11873                 err = repair_qgroups(info, &qgroups_repaired);
11874                 if (err)
11875                         goto out;
11876         }
11877
11878         if (!list_empty(&root->fs_info->recow_ebs)) {
11879                 error("transid errors in file system");
11880                 ret = 1;
11881         }
11882 out:
11883         /* Don't override original ret */
11884         if (!ret && qgroups_repaired)
11885                 ret = qgroups_repaired;
11886
11887         if (found_old_backref) { /*
11888                  * there was a disk format change when mixed
11889                  * backref was in testing tree. The old format
11890                  * existed about one week.
11891                  */
11892                 printf("\n * Found old mixed backref format. "
11893                        "The old format is not supported! *"
11894                        "\n * Please mount the FS in readonly mode, "
11895                        "backup data and re-format the FS. *\n\n");
11896                 ret = 1;
11897         }
11898         printf("found %llu bytes used err is %d\n",
11899                (unsigned long long)bytes_used, ret);
11900         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11901         printf("total tree bytes: %llu\n",
11902                (unsigned long long)total_btree_bytes);
11903         printf("total fs tree bytes: %llu\n",
11904                (unsigned long long)total_fs_tree_bytes);
11905         printf("total extent tree bytes: %llu\n",
11906                (unsigned long long)total_extent_tree_bytes);
11907         printf("btree space waste bytes: %llu\n",
11908                (unsigned long long)btree_space_waste);
11909         printf("file data blocks allocated: %llu\n referenced %llu\n",
11910                 (unsigned long long)data_bytes_allocated,
11911                 (unsigned long long)data_bytes_referenced);
11912
11913         free_qgroup_counts();
11914         free_root_recs_tree(&root_cache);
11915 close_out:
11916         close_ctree(root);
11917 err_out:
11918         if (ctx.progress_enabled)
11919                 task_deinit(ctx.info);
11920
11921         return ret;
11922 }