btrfs-progs: check: introduce function to check dir_item
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 {
118         return container_of(back, struct data_backref, node);
119 }
120
121 /*
122  * Much like data_backref, just removed the undetermined members
123  * and change it to use list_head.
124  * During extent scan, it is stored in root->orphan_data_extent.
125  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126  */
127 struct orphan_data_extent {
128         struct list_head list;
129         u64 root;
130         u64 objectid;
131         u64 offset;
132         u64 disk_bytenr;
133         u64 disk_len;
134 };
135
136 struct tree_backref {
137         struct extent_backref node;
138         union {
139                 u64 parent;
140                 u64 root;
141         };
142 };
143
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 {
146         return container_of(back, struct tree_backref, node);
147 }
148
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
151
152 struct extent_record {
153         struct list_head backrefs;
154         struct list_head dups;
155         struct list_head list;
156         struct cache_extent cache;
157         struct btrfs_disk_key parent_key;
158         u64 start;
159         u64 max_size;
160         u64 nr;
161         u64 refs;
162         u64 extent_item_refs;
163         u64 generation;
164         u64 parent_generation;
165         u64 info_objectid;
166         u32 num_duplicates;
167         u8 info_level;
168         unsigned int flag_block_full_backref:2;
169         unsigned int found_rec:1;
170         unsigned int content_checked:1;
171         unsigned int owner_ref_checked:1;
172         unsigned int is_root:1;
173         unsigned int metadata:1;
174         unsigned int bad_full_backref:1;
175         unsigned int crossing_stripes:1;
176         unsigned int wrong_chunk_type:1;
177 };
178
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 {
181         return container_of(entry, struct extent_record, list);
182 }
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         u8 filetype;
190         u8 ref_type;
191         int errors;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 {
200         return list_entry(entry, struct inode_backref, list);
201 }
202
203 struct root_item_record {
204         struct list_head list;
205         u64 objectid;
206         u64 bytenr;
207         u64 last_snapshot;
208         u8 level;
209         u8 drop_level;
210         int level_size;
211         struct btrfs_key drop_key;
212 };
213
214 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
216 #define REF_ERR_NO_INODE_REF            (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
219 #define REF_ERR_DUP_INODE_REF           (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF             (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
227
228 struct file_extent_hole {
229         struct rb_node node;
230         u64 start;
231         u64 len;
232 };
233
234 struct inode_record {
235         struct list_head backrefs;
236         unsigned int checked:1;
237         unsigned int merging:1;
238         unsigned int found_inode_item:1;
239         unsigned int found_dir_item:1;
240         unsigned int found_file_extent:1;
241         unsigned int found_csum_item:1;
242         unsigned int some_csum_missing:1;
243         unsigned int nodatasum:1;
244         int errors;
245
246         u64 ino;
247         u32 nlink;
248         u32 imode;
249         u64 isize;
250         u64 nbytes;
251
252         u32 found_link;
253         u64 found_size;
254         u64 extent_start;
255         u64 extent_end;
256         struct rb_root holes;
257         struct list_head orphan_extents;
258
259         u32 refs;
260 };
261
262 #define I_ERR_NO_INODE_ITEM             (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
277
278 struct root_backref {
279         struct list_head list;
280         unsigned int found_dir_item:1;
281         unsigned int found_dir_index:1;
282         unsigned int found_back_ref:1;
283         unsigned int found_forward_ref:1;
284         unsigned int reachable:1;
285         int errors;
286         u64 ref_root;
287         u64 dir;
288         u64 index;
289         u16 namelen;
290         char name[0];
291 };
292
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 {
295         return list_entry(entry, struct root_backref, list);
296 }
297
298 struct root_record {
299         struct list_head backrefs;
300         struct cache_extent cache;
301         unsigned int found_root_item:1;
302         u64 objectid;
303         u32 found_ref;
304 };
305
306 struct ptr_node {
307         struct cache_extent cache;
308         void *data;
309 };
310
311 struct shared_node {
312         struct cache_extent cache;
313         struct cache_tree root_cache;
314         struct cache_tree inode_cache;
315         struct inode_record *current;
316         u32 refs;
317 };
318
319 struct block_info {
320         u64 start;
321         u32 size;
322 };
323
324 struct walk_control {
325         struct cache_tree shared;
326         struct shared_node *nodes[BTRFS_MAX_LEVEL];
327         int active_node;
328         int root_level;
329 };
330
331 struct bad_item {
332         struct btrfs_key key;
333         u64 root_id;
334         struct list_head list;
335 };
336
337 struct extent_entry {
338         u64 bytenr;
339         u64 bytes;
340         int count;
341         int broken;
342         struct list_head list;
343 };
344
345 struct root_item_info {
346         /* level of the root */
347         u8 level;
348         /* number of nodes at this level, must be 1 for a root */
349         int node_count;
350         u64 bytenr;
351         u64 gen;
352         struct cache_extent cache_extent;
353 };
354
355 /*
356  * Error bit for low memory mode check.
357  *
358  * Currently no caller cares about it yet.  Just internal use for error
359  * classification.
360  */
361 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH     (1 << 8)
371
372 static void *print_status_check(void *p)
373 {
374         struct task_ctx *priv = p;
375         const char work_indicator[] = { '.', 'o', 'O', 'o' };
376         uint32_t count = 0;
377         static char *task_position_string[] = {
378                 "checking extents",
379                 "checking free space cache",
380                 "checking fs roots",
381         };
382
383         task_period_start(priv->info, 1000 /* 1s */);
384
385         if (priv->tp == TASK_NOTHING)
386                 return NULL;
387
388         while (1) {
389                 printf("%s [%c]\r", task_position_string[priv->tp],
390                                 work_indicator[count % 4]);
391                 count++;
392                 fflush(stdout);
393                 task_period_wait(priv->info);
394         }
395         return NULL;
396 }
397
398 static int print_status_return(void *p)
399 {
400         printf("\n");
401         fflush(stdout);
402
403         return 0;
404 }
405
406 static enum btrfs_check_mode parse_check_mode(const char *str)
407 {
408         if (strcmp(str, "lowmem") == 0)
409                 return CHECK_MODE_LOWMEM;
410         if (strcmp(str, "orig") == 0)
411                 return CHECK_MODE_ORIGINAL;
412         if (strcmp(str, "original") == 0)
413                 return CHECK_MODE_ORIGINAL;
414
415         return CHECK_MODE_UNKNOWN;
416 }
417
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
420 {
421         struct file_extent_hole *hole;
422
423         if (RB_EMPTY_ROOT(holes))
424                 return (u64)-1;
425
426         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
427         return hole->start;
428 }
429
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 {
432         struct file_extent_hole *hole1;
433         struct file_extent_hole *hole2;
434
435         hole1 = rb_entry(node1, struct file_extent_hole, node);
436         hole2 = rb_entry(node2, struct file_extent_hole, node);
437
438         if (hole1->start > hole2->start)
439                 return -1;
440         if (hole1->start < hole2->start)
441                 return 1;
442         /* Now hole1->start == hole2->start */
443         if (hole1->len >= hole2->len)
444                 /*
445                  * Hole 1 will be merge center
446                  * Same hole will be merged later
447                  */
448                 return -1;
449         /* Hole 2 will be merge center */
450         return 1;
451 }
452
453 /*
454  * Add a hole to the record
455  *
456  * This will do hole merge for copy_file_extent_holes(),
457  * which will ensure there won't be continuous holes.
458  */
459 static int add_file_extent_hole(struct rb_root *holes,
460                                 u64 start, u64 len)
461 {
462         struct file_extent_hole *hole;
463         struct file_extent_hole *prev = NULL;
464         struct file_extent_hole *next = NULL;
465
466         hole = malloc(sizeof(*hole));
467         if (!hole)
468                 return -ENOMEM;
469         hole->start = start;
470         hole->len = len;
471         /* Since compare will not return 0, no -EEXIST will happen */
472         rb_insert(holes, &hole->node, compare_hole);
473
474         /* simple merge with previous hole */
475         if (rb_prev(&hole->node))
476                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477                                 node);
478         if (prev && prev->start + prev->len >= hole->start) {
479                 hole->len = hole->start + hole->len - prev->start;
480                 hole->start = prev->start;
481                 rb_erase(&prev->node, holes);
482                 free(prev);
483                 prev = NULL;
484         }
485
486         /* iterate merge with next holes */
487         while (1) {
488                 if (!rb_next(&hole->node))
489                         break;
490                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491                                         node);
492                 if (hole->start + hole->len >= next->start) {
493                         if (hole->start + hole->len <= next->start + next->len)
494                                 hole->len = next->start + next->len -
495                                             hole->start;
496                         rb_erase(&next->node, holes);
497                         free(next);
498                         next = NULL;
499                 } else
500                         break;
501         }
502         return 0;
503 }
504
505 static int compare_hole_range(struct rb_node *node, void *data)
506 {
507         struct file_extent_hole *hole;
508         u64 start;
509
510         hole = (struct file_extent_hole *)data;
511         start = hole->start;
512
513         hole = rb_entry(node, struct file_extent_hole, node);
514         if (start < hole->start)
515                 return -1;
516         if (start >= hole->start && start < hole->start + hole->len)
517                 return 0;
518         return 1;
519 }
520
521 /*
522  * Delete a hole in the record
523  *
524  * This will do the hole split and is much restrict than add.
525  */
526 static int del_file_extent_hole(struct rb_root *holes,
527                                 u64 start, u64 len)
528 {
529         struct file_extent_hole *hole;
530         struct file_extent_hole tmp;
531         u64 prev_start = 0;
532         u64 prev_len = 0;
533         u64 next_start = 0;
534         u64 next_len = 0;
535         struct rb_node *node;
536         int have_prev = 0;
537         int have_next = 0;
538         int ret = 0;
539
540         tmp.start = start;
541         tmp.len = len;
542         node = rb_search(holes, &tmp, compare_hole_range, NULL);
543         if (!node)
544                 return -EEXIST;
545         hole = rb_entry(node, struct file_extent_hole, node);
546         if (start + len > hole->start + hole->len)
547                 return -EEXIST;
548
549         /*
550          * Now there will be no overlap, delete the hole and re-add the
551          * split(s) if they exists.
552          */
553         if (start > hole->start) {
554                 prev_start = hole->start;
555                 prev_len = start - hole->start;
556                 have_prev = 1;
557         }
558         if (hole->start + hole->len > start + len) {
559                 next_start = start + len;
560                 next_len = hole->start + hole->len - start - len;
561                 have_next = 1;
562         }
563         rb_erase(node, holes);
564         free(hole);
565         if (have_prev) {
566                 ret = add_file_extent_hole(holes, prev_start, prev_len);
567                 if (ret < 0)
568                         return ret;
569         }
570         if (have_next) {
571                 ret = add_file_extent_hole(holes, next_start, next_len);
572                 if (ret < 0)
573                         return ret;
574         }
575         return 0;
576 }
577
578 static int copy_file_extent_holes(struct rb_root *dst,
579                                   struct rb_root *src)
580 {
581         struct file_extent_hole *hole;
582         struct rb_node *node;
583         int ret = 0;
584
585         node = rb_first(src);
586         while (node) {
587                 hole = rb_entry(node, struct file_extent_hole, node);
588                 ret = add_file_extent_hole(dst, hole->start, hole->len);
589                 if (ret)
590                         break;
591                 node = rb_next(node);
592         }
593         return ret;
594 }
595
596 static void free_file_extent_holes(struct rb_root *holes)
597 {
598         struct rb_node *node;
599         struct file_extent_hole *hole;
600
601         node = rb_first(holes);
602         while (node) {
603                 hole = rb_entry(node, struct file_extent_hole, node);
604                 rb_erase(node, holes);
605                 free(hole);
606                 node = rb_first(holes);
607         }
608 }
609
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613                                  struct btrfs_root *root)
614 {
615         if (root->last_trans != trans->transid) {
616                 root->track_dirty = 1;
617                 root->last_trans = trans->transid;
618                 root->commit_root = root->node;
619                 extent_buffer_get(root->node);
620         }
621 }
622
623 static u8 imode_to_type(u32 imode)
624 {
625 #define S_SHIFT 12
626         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
628                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
629                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
630                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
631                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
632                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
633                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
634         };
635
636         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
637 #undef S_SHIFT
638 }
639
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 {
642         struct device_record *rec1;
643         struct device_record *rec2;
644
645         rec1 = rb_entry(node1, struct device_record, node);
646         rec2 = rb_entry(node2, struct device_record, node);
647         if (rec1->devid > rec2->devid)
648                 return -1;
649         else if (rec1->devid < rec2->devid)
650                 return 1;
651         else
652                 return 0;
653 }
654
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 {
657         struct inode_record *rec;
658         struct inode_backref *backref;
659         struct inode_backref *orig;
660         struct inode_backref *tmp;
661         struct orphan_data_extent *src_orphan;
662         struct orphan_data_extent *dst_orphan;
663         struct rb_node *rb;
664         size_t size;
665         int ret;
666
667         rec = malloc(sizeof(*rec));
668         if (!rec)
669                 return ERR_PTR(-ENOMEM);
670         memcpy(rec, orig_rec, sizeof(*rec));
671         rec->refs = 1;
672         INIT_LIST_HEAD(&rec->backrefs);
673         INIT_LIST_HEAD(&rec->orphan_extents);
674         rec->holes = RB_ROOT;
675
676         list_for_each_entry(orig, &orig_rec->backrefs, list) {
677                 size = sizeof(*orig) + orig->namelen + 1;
678                 backref = malloc(size);
679                 if (!backref) {
680                         ret = -ENOMEM;
681                         goto cleanup;
682                 }
683                 memcpy(backref, orig, size);
684                 list_add_tail(&backref->list, &rec->backrefs);
685         }
686         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687                 dst_orphan = malloc(sizeof(*dst_orphan));
688                 if (!dst_orphan) {
689                         ret = -ENOMEM;
690                         goto cleanup;
691                 }
692                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694         }
695         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
696         if (ret < 0)
697                 goto cleanup_rb;
698
699         return rec;
700
701 cleanup_rb:
702         rb = rb_first(&rec->holes);
703         while (rb) {
704                 struct file_extent_hole *hole;
705
706                 hole = rb_entry(rb, struct file_extent_hole, node);
707                 rb = rb_next(rb);
708                 free(hole);
709         }
710
711 cleanup:
712         if (!list_empty(&rec->backrefs))
713                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714                         list_del(&orig->list);
715                         free(orig);
716                 }
717
718         if (!list_empty(&rec->orphan_extents))
719                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720                         list_del(&orig->list);
721                         free(orig);
722                 }
723
724         free(rec);
725
726         return ERR_PTR(ret);
727 }
728
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
730                                       u64 objectid)
731 {
732         struct orphan_data_extent *orphan;
733
734         if (list_empty(orphan_extents))
735                 return;
736         printf("The following data extent is lost in tree %llu:\n",
737                objectid);
738         list_for_each_entry(orphan, orphan_extents, list) {
739                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
741                        orphan->disk_len);
742         }
743 }
744
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 {
747         u64 root_objectid = root->root_key.objectid;
748         int errors = rec->errors;
749
750         if (!errors)
751                 return;
752         /* reloc root errors, we print its corresponding fs root objectid*/
753         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754                 root_objectid = root->root_key.offset;
755                 fprintf(stderr, "reloc");
756         }
757         fprintf(stderr, "root %llu inode %llu errors %x",
758                 (unsigned long long) root_objectid,
759                 (unsigned long long) rec->ino, rec->errors);
760
761         if (errors & I_ERR_NO_INODE_ITEM)
762                 fprintf(stderr, ", no inode item");
763         if (errors & I_ERR_NO_ORPHAN_ITEM)
764                 fprintf(stderr, ", no orphan item");
765         if (errors & I_ERR_DUP_INODE_ITEM)
766                 fprintf(stderr, ", dup inode item");
767         if (errors & I_ERR_DUP_DIR_INDEX)
768                 fprintf(stderr, ", dup dir index");
769         if (errors & I_ERR_ODD_DIR_ITEM)
770                 fprintf(stderr, ", odd dir item");
771         if (errors & I_ERR_ODD_FILE_EXTENT)
772                 fprintf(stderr, ", odd file extent");
773         if (errors & I_ERR_BAD_FILE_EXTENT)
774                 fprintf(stderr, ", bad file extent");
775         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776                 fprintf(stderr, ", file extent overlap");
777         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778                 fprintf(stderr, ", file extent discount");
779         if (errors & I_ERR_DIR_ISIZE_WRONG)
780                 fprintf(stderr, ", dir isize wrong");
781         if (errors & I_ERR_FILE_NBYTES_WRONG)
782                 fprintf(stderr, ", nbytes wrong");
783         if (errors & I_ERR_ODD_CSUM_ITEM)
784                 fprintf(stderr, ", odd csum item");
785         if (errors & I_ERR_SOME_CSUM_MISSING)
786                 fprintf(stderr, ", some csum missing");
787         if (errors & I_ERR_LINK_COUNT_WRONG)
788                 fprintf(stderr, ", link count wrong");
789         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790                 fprintf(stderr, ", orphan file extent");
791         fprintf(stderr, "\n");
792         /* Print the orphan extents if needed */
793         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795
796         /* Print the holes if needed */
797         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798                 struct file_extent_hole *hole;
799                 struct rb_node *node;
800                 int found = 0;
801
802                 node = rb_first(&rec->holes);
803                 fprintf(stderr, "Found file extent holes:\n");
804                 while (node) {
805                         found = 1;
806                         hole = rb_entry(node, struct file_extent_hole, node);
807                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
808                                 hole->start, hole->len);
809                         node = rb_next(node);
810                 }
811                 if (!found)
812                         fprintf(stderr, "\tstart: 0, len: %llu\n",
813                                 round_up(rec->isize, root->sectorsize));
814         }
815 }
816
817 static void print_ref_error(int errors)
818 {
819         if (errors & REF_ERR_NO_DIR_ITEM)
820                 fprintf(stderr, ", no dir item");
821         if (errors & REF_ERR_NO_DIR_INDEX)
822                 fprintf(stderr, ", no dir index");
823         if (errors & REF_ERR_NO_INODE_REF)
824                 fprintf(stderr, ", no inode ref");
825         if (errors & REF_ERR_DUP_DIR_ITEM)
826                 fprintf(stderr, ", dup dir item");
827         if (errors & REF_ERR_DUP_DIR_INDEX)
828                 fprintf(stderr, ", dup dir index");
829         if (errors & REF_ERR_DUP_INODE_REF)
830                 fprintf(stderr, ", dup inode ref");
831         if (errors & REF_ERR_INDEX_UNMATCH)
832                 fprintf(stderr, ", index mismatch");
833         if (errors & REF_ERR_FILETYPE_UNMATCH)
834                 fprintf(stderr, ", filetype mismatch");
835         if (errors & REF_ERR_NAME_TOO_LONG)
836                 fprintf(stderr, ", name too long");
837         if (errors & REF_ERR_NO_ROOT_REF)
838                 fprintf(stderr, ", no root ref");
839         if (errors & REF_ERR_NO_ROOT_BACKREF)
840                 fprintf(stderr, ", no root backref");
841         if (errors & REF_ERR_DUP_ROOT_REF)
842                 fprintf(stderr, ", dup root ref");
843         if (errors & REF_ERR_DUP_ROOT_BACKREF)
844                 fprintf(stderr, ", dup root backref");
845         fprintf(stderr, "\n");
846 }
847
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
849                                           u64 ino, int mod)
850 {
851         struct ptr_node *node;
852         struct cache_extent *cache;
853         struct inode_record *rec = NULL;
854         int ret;
855
856         cache = lookup_cache_extent(inode_cache, ino, 1);
857         if (cache) {
858                 node = container_of(cache, struct ptr_node, cache);
859                 rec = node->data;
860                 if (mod && rec->refs > 1) {
861                         node->data = clone_inode_rec(rec);
862                         if (IS_ERR(node->data))
863                                 return node->data;
864                         rec->refs--;
865                         rec = node->data;
866                 }
867         } else if (mod) {
868                 rec = calloc(1, sizeof(*rec));
869                 if (!rec)
870                         return ERR_PTR(-ENOMEM);
871                 rec->ino = ino;
872                 rec->extent_start = (u64)-1;
873                 rec->refs = 1;
874                 INIT_LIST_HEAD(&rec->backrefs);
875                 INIT_LIST_HEAD(&rec->orphan_extents);
876                 rec->holes = RB_ROOT;
877
878                 node = malloc(sizeof(*node));
879                 if (!node) {
880                         free(rec);
881                         return ERR_PTR(-ENOMEM);
882                 }
883                 node->cache.start = ino;
884                 node->cache.size = 1;
885                 node->data = rec;
886
887                 if (ino == BTRFS_FREE_INO_OBJECTID)
888                         rec->found_link = 1;
889
890                 ret = insert_cache_extent(inode_cache, &node->cache);
891                 if (ret)
892                         return ERR_PTR(-EEXIST);
893         }
894         return rec;
895 }
896
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 {
899         struct orphan_data_extent *orphan;
900
901         while (!list_empty(orphan_extents)) {
902                 orphan = list_entry(orphan_extents->next,
903                                     struct orphan_data_extent, list);
904                 list_del(&orphan->list);
905                 free(orphan);
906         }
907 }
908
909 static void free_inode_rec(struct inode_record *rec)
910 {
911         struct inode_backref *backref;
912
913         if (--rec->refs > 0)
914                 return;
915
916         while (!list_empty(&rec->backrefs)) {
917                 backref = to_inode_backref(rec->backrefs.next);
918                 list_del(&backref->list);
919                 free(backref);
920         }
921         free_orphan_data_extents(&rec->orphan_extents);
922         free_file_extent_holes(&rec->holes);
923         free(rec);
924 }
925
926 static int can_free_inode_rec(struct inode_record *rec)
927 {
928         if (!rec->errors && rec->checked && rec->found_inode_item &&
929             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
930                 return 1;
931         return 0;
932 }
933
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935                                  struct inode_record *rec)
936 {
937         struct cache_extent *cache;
938         struct inode_backref *tmp, *backref;
939         struct ptr_node *node;
940         u8 filetype;
941
942         if (!rec->found_inode_item)
943                 return;
944
945         filetype = imode_to_type(rec->imode);
946         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947                 if (backref->found_dir_item && backref->found_dir_index) {
948                         if (backref->filetype != filetype)
949                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950                         if (!backref->errors && backref->found_inode_ref &&
951                             rec->nlink == rec->found_link) {
952                                 list_del(&backref->list);
953                                 free(backref);
954                         }
955                 }
956         }
957
958         if (!rec->checked || rec->merging)
959                 return;
960
961         if (S_ISDIR(rec->imode)) {
962                 if (rec->found_size != rec->isize)
963                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964                 if (rec->found_file_extent)
965                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
966         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967                 if (rec->found_dir_item)
968                         rec->errors |= I_ERR_ODD_DIR_ITEM;
969                 if (rec->found_size != rec->nbytes)
970                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971                 if (rec->nlink > 0 && !no_holes &&
972                     (rec->extent_end < rec->isize ||
973                      first_extent_gap(&rec->holes) < rec->isize))
974                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
975         }
976
977         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978                 if (rec->found_csum_item && rec->nodatasum)
979                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
980                 if (rec->some_csum_missing && !rec->nodatasum)
981                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
982         }
983
984         BUG_ON(rec->refs != 1);
985         if (can_free_inode_rec(rec)) {
986                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987                 node = container_of(cache, struct ptr_node, cache);
988                 BUG_ON(node->data != rec);
989                 remove_cache_extent(inode_cache, &node->cache);
990                 free(node);
991                 free_inode_rec(rec);
992         }
993 }
994
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 {
997         struct btrfs_path path;
998         struct btrfs_key key;
999         int ret;
1000
1001         key.objectid = BTRFS_ORPHAN_OBJECTID;
1002         key.type = BTRFS_ORPHAN_ITEM_KEY;
1003         key.offset = ino;
1004
1005         btrfs_init_path(&path);
1006         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007         btrfs_release_path(&path);
1008         if (ret > 0)
1009                 ret = -ENOENT;
1010         return ret;
1011 }
1012
1013 static int process_inode_item(struct extent_buffer *eb,
1014                               int slot, struct btrfs_key *key,
1015                               struct shared_node *active_node)
1016 {
1017         struct inode_record *rec;
1018         struct btrfs_inode_item *item;
1019
1020         rec = active_node->current;
1021         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022         if (rec->found_inode_item) {
1023                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1024                 return 1;
1025         }
1026         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027         rec->nlink = btrfs_inode_nlink(eb, item);
1028         rec->isize = btrfs_inode_size(eb, item);
1029         rec->nbytes = btrfs_inode_nbytes(eb, item);
1030         rec->imode = btrfs_inode_mode(eb, item);
1031         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032                 rec->nodatasum = 1;
1033         rec->found_inode_item = 1;
1034         if (rec->nlink == 0)
1035                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036         maybe_free_inode_rec(&active_node->inode_cache, rec);
1037         return 0;
1038 }
1039
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041                                                 const char *name,
1042                                                 int namelen, u64 dir)
1043 {
1044         struct inode_backref *backref;
1045
1046         list_for_each_entry(backref, &rec->backrefs, list) {
1047                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048                         break;
1049                 if (backref->dir != dir || backref->namelen != namelen)
1050                         continue;
1051                 if (memcmp(name, backref->name, namelen))
1052                         continue;
1053                 return backref;
1054         }
1055
1056         backref = malloc(sizeof(*backref) + namelen + 1);
1057         if (!backref)
1058                 return NULL;
1059         memset(backref, 0, sizeof(*backref));
1060         backref->dir = dir;
1061         backref->namelen = namelen;
1062         memcpy(backref->name, name, namelen);
1063         backref->name[namelen] = '\0';
1064         list_add_tail(&backref->list, &rec->backrefs);
1065         return backref;
1066 }
1067
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069                              u64 ino, u64 dir, u64 index,
1070                              const char *name, int namelen,
1071                              u8 filetype, u8 itemtype, int errors)
1072 {
1073         struct inode_record *rec;
1074         struct inode_backref *backref;
1075
1076         rec = get_inode_rec(inode_cache, ino, 1);
1077         BUG_ON(IS_ERR(rec));
1078         backref = get_inode_backref(rec, name, namelen, dir);
1079         BUG_ON(!backref);
1080         if (errors)
1081                 backref->errors |= errors;
1082         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083                 if (backref->found_dir_index)
1084                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085                 if (backref->found_inode_ref && backref->index != index)
1086                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1087                 if (backref->found_dir_item && backref->filetype != filetype)
1088                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089
1090                 backref->index = index;
1091                 backref->filetype = filetype;
1092                 backref->found_dir_index = 1;
1093         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094                 rec->found_link++;
1095                 if (backref->found_dir_item)
1096                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097                 if (backref->found_dir_index && backref->filetype != filetype)
1098                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099
1100                 backref->filetype = filetype;
1101                 backref->found_dir_item = 1;
1102         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104                 if (backref->found_inode_ref)
1105                         backref->errors |= REF_ERR_DUP_INODE_REF;
1106                 if (backref->found_dir_index && backref->index != index)
1107                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1108                 else
1109                         backref->index = index;
1110
1111                 backref->ref_type = itemtype;
1112                 backref->found_inode_ref = 1;
1113         } else {
1114                 BUG_ON(1);
1115         }
1116
1117         maybe_free_inode_rec(inode_cache, rec);
1118         return 0;
1119 }
1120
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122                             struct cache_tree *dst_cache)
1123 {
1124         struct inode_backref *backref;
1125         u32 dir_count = 0;
1126         int ret = 0;
1127
1128         dst->merging = 1;
1129         list_for_each_entry(backref, &src->backrefs, list) {
1130                 if (backref->found_dir_index) {
1131                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1132                                         backref->index, backref->name,
1133                                         backref->namelen, backref->filetype,
1134                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1135                 }
1136                 if (backref->found_dir_item) {
1137                         dir_count++;
1138                         add_inode_backref(dst_cache, dst->ino,
1139                                         backref->dir, 0, backref->name,
1140                                         backref->namelen, backref->filetype,
1141                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1142                 }
1143                 if (backref->found_inode_ref) {
1144                         add_inode_backref(dst_cache, dst->ino,
1145                                         backref->dir, backref->index,
1146                                         backref->name, backref->namelen, 0,
1147                                         backref->ref_type, backref->errors);
1148                 }
1149         }
1150
1151         if (src->found_dir_item)
1152                 dst->found_dir_item = 1;
1153         if (src->found_file_extent)
1154                 dst->found_file_extent = 1;
1155         if (src->found_csum_item)
1156                 dst->found_csum_item = 1;
1157         if (src->some_csum_missing)
1158                 dst->some_csum_missing = 1;
1159         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1161                 if (ret < 0)
1162                         return ret;
1163         }
1164
1165         BUG_ON(src->found_link < dir_count);
1166         dst->found_link += src->found_link - dir_count;
1167         dst->found_size += src->found_size;
1168         if (src->extent_start != (u64)-1) {
1169                 if (dst->extent_start == (u64)-1) {
1170                         dst->extent_start = src->extent_start;
1171                         dst->extent_end = src->extent_end;
1172                 } else {
1173                         if (dst->extent_end > src->extent_start)
1174                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175                         else if (dst->extent_end < src->extent_start) {
1176                                 ret = add_file_extent_hole(&dst->holes,
1177                                         dst->extent_end,
1178                                         src->extent_start - dst->extent_end);
1179                         }
1180                         if (dst->extent_end < src->extent_end)
1181                                 dst->extent_end = src->extent_end;
1182                 }
1183         }
1184
1185         dst->errors |= src->errors;
1186         if (src->found_inode_item) {
1187                 if (!dst->found_inode_item) {
1188                         dst->nlink = src->nlink;
1189                         dst->isize = src->isize;
1190                         dst->nbytes = src->nbytes;
1191                         dst->imode = src->imode;
1192                         dst->nodatasum = src->nodatasum;
1193                         dst->found_inode_item = 1;
1194                 } else {
1195                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1196                 }
1197         }
1198         dst->merging = 0;
1199
1200         return 0;
1201 }
1202
1203 static int splice_shared_node(struct shared_node *src_node,
1204                               struct shared_node *dst_node)
1205 {
1206         struct cache_extent *cache;
1207         struct ptr_node *node, *ins;
1208         struct cache_tree *src, *dst;
1209         struct inode_record *rec, *conflict;
1210         u64 current_ino = 0;
1211         int splice = 0;
1212         int ret;
1213
1214         if (--src_node->refs == 0)
1215                 splice = 1;
1216         if (src_node->current)
1217                 current_ino = src_node->current->ino;
1218
1219         src = &src_node->root_cache;
1220         dst = &dst_node->root_cache;
1221 again:
1222         cache = search_cache_extent(src, 0);
1223         while (cache) {
1224                 node = container_of(cache, struct ptr_node, cache);
1225                 rec = node->data;
1226                 cache = next_cache_extent(cache);
1227
1228                 if (splice) {
1229                         remove_cache_extent(src, &node->cache);
1230                         ins = node;
1231                 } else {
1232                         ins = malloc(sizeof(*ins));
1233                         BUG_ON(!ins);
1234                         ins->cache.start = node->cache.start;
1235                         ins->cache.size = node->cache.size;
1236                         ins->data = rec;
1237                         rec->refs++;
1238                 }
1239                 ret = insert_cache_extent(dst, &ins->cache);
1240                 if (ret == -EEXIST) {
1241                         conflict = get_inode_rec(dst, rec->ino, 1);
1242                         BUG_ON(IS_ERR(conflict));
1243                         merge_inode_recs(rec, conflict, dst);
1244                         if (rec->checked) {
1245                                 conflict->checked = 1;
1246                                 if (dst_node->current == conflict)
1247                                         dst_node->current = NULL;
1248                         }
1249                         maybe_free_inode_rec(dst, conflict);
1250                         free_inode_rec(rec);
1251                         free(ins);
1252                 } else {
1253                         BUG_ON(ret);
1254                 }
1255         }
1256
1257         if (src == &src_node->root_cache) {
1258                 src = &src_node->inode_cache;
1259                 dst = &dst_node->inode_cache;
1260                 goto again;
1261         }
1262
1263         if (current_ino > 0 && (!dst_node->current ||
1264             current_ino > dst_node->current->ino)) {
1265                 if (dst_node->current) {
1266                         dst_node->current->checked = 1;
1267                         maybe_free_inode_rec(dst, dst_node->current);
1268                 }
1269                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270                 BUG_ON(IS_ERR(dst_node->current));
1271         }
1272         return 0;
1273 }
1274
1275 static void free_inode_ptr(struct cache_extent *cache)
1276 {
1277         struct ptr_node *node;
1278         struct inode_record *rec;
1279
1280         node = container_of(cache, struct ptr_node, cache);
1281         rec = node->data;
1282         free_inode_rec(rec);
1283         free(node);
1284 }
1285
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1289                                             u64 bytenr)
1290 {
1291         struct cache_extent *cache;
1292         struct shared_node *node;
1293
1294         cache = lookup_cache_extent(shared, bytenr, 1);
1295         if (cache) {
1296                 node = container_of(cache, struct shared_node, cache);
1297                 return node;
1298         }
1299         return NULL;
1300 }
1301
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1303 {
1304         int ret;
1305         struct shared_node *node;
1306
1307         node = calloc(1, sizeof(*node));
1308         if (!node)
1309                 return -ENOMEM;
1310         node->cache.start = bytenr;
1311         node->cache.size = 1;
1312         cache_tree_init(&node->root_cache);
1313         cache_tree_init(&node->inode_cache);
1314         node->refs = refs;
1315
1316         ret = insert_cache_extent(shared, &node->cache);
1317
1318         return ret;
1319 }
1320
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322                              struct walk_control *wc, int level)
1323 {
1324         struct shared_node *node;
1325         struct shared_node *dest;
1326         int ret;
1327
1328         if (level == wc->active_node)
1329                 return 0;
1330
1331         BUG_ON(wc->active_node <= level);
1332         node = find_shared_node(&wc->shared, bytenr);
1333         if (!node) {
1334                 ret = add_shared_node(&wc->shared, bytenr, refs);
1335                 BUG_ON(ret);
1336                 node = find_shared_node(&wc->shared, bytenr);
1337                 wc->nodes[level] = node;
1338                 wc->active_node = level;
1339                 return 0;
1340         }
1341
1342         if (wc->root_level == wc->active_node &&
1343             btrfs_root_refs(&root->root_item) == 0) {
1344                 if (--node->refs == 0) {
1345                         free_inode_recs_tree(&node->root_cache);
1346                         free_inode_recs_tree(&node->inode_cache);
1347                         remove_cache_extent(&wc->shared, &node->cache);
1348                         free(node);
1349                 }
1350                 return 1;
1351         }
1352
1353         dest = wc->nodes[wc->active_node];
1354         splice_shared_node(node, dest);
1355         if (node->refs == 0) {
1356                 remove_cache_extent(&wc->shared, &node->cache);
1357                 free(node);
1358         }
1359         return 1;
1360 }
1361
1362 static int leave_shared_node(struct btrfs_root *root,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int i;
1368
1369         if (level == wc->root_level)
1370                 return 0;
1371
1372         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1373                 if (wc->nodes[i])
1374                         break;
1375         }
1376         BUG_ON(i >= BTRFS_MAX_LEVEL);
1377
1378         node = wc->nodes[wc->active_node];
1379         wc->nodes[wc->active_node] = NULL;
1380         wc->active_node = i;
1381
1382         dest = wc->nodes[wc->active_node];
1383         if (wc->active_node < wc->root_level ||
1384             btrfs_root_refs(&root->root_item) > 0) {
1385                 BUG_ON(node->refs <= 1);
1386                 splice_shared_node(node, dest);
1387         } else {
1388                 BUG_ON(node->refs < 2);
1389                 node->refs--;
1390         }
1391         return 0;
1392 }
1393
1394 /*
1395  * Returns:
1396  * < 0 - on error
1397  * 1   - if the root with id child_root_id is a child of root parent_root_id
1398  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1399  *       has other root(s) as parent(s)
1400  * 2   - if the root child_root_id doesn't have any parent roots
1401  */
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1403                          u64 child_root_id)
1404 {
1405         struct btrfs_path path;
1406         struct btrfs_key key;
1407         struct extent_buffer *leaf;
1408         int has_parent = 0;
1409         int ret;
1410
1411         btrfs_init_path(&path);
1412
1413         key.objectid = parent_root_id;
1414         key.type = BTRFS_ROOT_REF_KEY;
1415         key.offset = child_root_id;
1416         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1417                                 0, 0);
1418         if (ret < 0)
1419                 return ret;
1420         btrfs_release_path(&path);
1421         if (!ret)
1422                 return 1;
1423
1424         key.objectid = child_root_id;
1425         key.type = BTRFS_ROOT_BACKREF_KEY;
1426         key.offset = 0;
1427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1428                                 0, 0);
1429         if (ret < 0)
1430                 goto out;
1431
1432         while (1) {
1433                 leaf = path.nodes[0];
1434                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1436                         if (ret)
1437                                 break;
1438                         leaf = path.nodes[0];
1439                 }
1440
1441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442                 if (key.objectid != child_root_id ||
1443                     key.type != BTRFS_ROOT_BACKREF_KEY)
1444                         break;
1445
1446                 has_parent = 1;
1447
1448                 if (key.offset == parent_root_id) {
1449                         btrfs_release_path(&path);
1450                         return 1;
1451                 }
1452
1453                 path.slots[0]++;
1454         }
1455 out:
1456         btrfs_release_path(&path);
1457         if (ret < 0)
1458                 return ret;
1459         return has_parent ? 0 : 2;
1460 }
1461
1462 static int process_dir_item(struct btrfs_root *root,
1463                             struct extent_buffer *eb,
1464                             int slot, struct btrfs_key *key,
1465                             struct shared_node *active_node)
1466 {
1467         u32 total;
1468         u32 cur = 0;
1469         u32 len;
1470         u32 name_len;
1471         u32 data_len;
1472         int error;
1473         int nritems = 0;
1474         u8 filetype;
1475         struct btrfs_dir_item *di;
1476         struct inode_record *rec;
1477         struct cache_tree *root_cache;
1478         struct cache_tree *inode_cache;
1479         struct btrfs_key location;
1480         char namebuf[BTRFS_NAME_LEN];
1481
1482         root_cache = &active_node->root_cache;
1483         inode_cache = &active_node->inode_cache;
1484         rec = active_node->current;
1485         rec->found_dir_item = 1;
1486
1487         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 nritems++;
1491                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492                 name_len = btrfs_dir_name_len(eb, di);
1493                 data_len = btrfs_dir_data_len(eb, di);
1494                 filetype = btrfs_dir_type(eb, di);
1495
1496                 rec->found_size += name_len;
1497                 if (name_len <= BTRFS_NAME_LEN) {
1498                         len = name_len;
1499                         error = 0;
1500                 } else {
1501                         len = BTRFS_NAME_LEN;
1502                         error = REF_ERR_NAME_TOO_LONG;
1503                 }
1504                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505
1506                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507                         add_inode_backref(inode_cache, location.objectid,
1508                                           key->objectid, key->offset, namebuf,
1509                                           len, filetype, key->type, error);
1510                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511                         add_inode_backref(root_cache, location.objectid,
1512                                           key->objectid, key->offset,
1513                                           namebuf, len, filetype,
1514                                           key->type, error);
1515                 } else {
1516                         fprintf(stderr, "invalid location in dir item %u\n",
1517                                 location.type);
1518                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519                                           key->objectid, key->offset, namebuf,
1520                                           len, filetype, key->type, error);
1521                 }
1522
1523                 len = sizeof(*di) + name_len + data_len;
1524                 di = (struct btrfs_dir_item *)((char *)di + len);
1525                 cur += len;
1526         }
1527         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1529
1530         return 0;
1531 }
1532
1533 static int process_inode_ref(struct extent_buffer *eb,
1534                              int slot, struct btrfs_key *key,
1535                              struct shared_node *active_node)
1536 {
1537         u32 total;
1538         u32 cur = 0;
1539         u32 len;
1540         u32 name_len;
1541         u64 index;
1542         int error;
1543         struct cache_tree *inode_cache;
1544         struct btrfs_inode_ref *ref;
1545         char namebuf[BTRFS_NAME_LEN];
1546
1547         inode_cache = &active_node->inode_cache;
1548
1549         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550         total = btrfs_item_size_nr(eb, slot);
1551         while (cur < total) {
1552                 name_len = btrfs_inode_ref_name_len(eb, ref);
1553                 index = btrfs_inode_ref_index(eb, ref);
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562                 add_inode_backref(inode_cache, key->objectid, key->offset,
1563                                   index, namebuf, len, 0, key->type, error);
1564
1565                 len = sizeof(*ref) + name_len;
1566                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1567                 cur += len;
1568         }
1569         return 0;
1570 }
1571
1572 static int process_inode_extref(struct extent_buffer *eb,
1573                                 int slot, struct btrfs_key *key,
1574                                 struct shared_node *active_node)
1575 {
1576         u32 total;
1577         u32 cur = 0;
1578         u32 len;
1579         u32 name_len;
1580         u64 index;
1581         u64 parent;
1582         int error;
1583         struct cache_tree *inode_cache;
1584         struct btrfs_inode_extref *extref;
1585         char namebuf[BTRFS_NAME_LEN];
1586
1587         inode_cache = &active_node->inode_cache;
1588
1589         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590         total = btrfs_item_size_nr(eb, slot);
1591         while (cur < total) {
1592                 name_len = btrfs_inode_extref_name_len(eb, extref);
1593                 index = btrfs_inode_extref_index(eb, extref);
1594                 parent = btrfs_inode_extref_parent(eb, extref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf,
1603                                    (unsigned long)(extref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, parent,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*extref) + name_len;
1608                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612
1613 }
1614
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616                             u64 len, u64 *found)
1617 {
1618         struct btrfs_key key;
1619         struct btrfs_path path;
1620         struct extent_buffer *leaf;
1621         int ret;
1622         size_t size;
1623         *found = 0;
1624         u64 csum_end;
1625         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626
1627         btrfs_init_path(&path);
1628
1629         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630         key.offset = start;
1631         key.type = BTRFS_EXTENT_CSUM_KEY;
1632
1633         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1634                                 &key, &path, 0, 0);
1635         if (ret < 0)
1636                 goto out;
1637         if (ret > 0 && path.slots[0] > 0) {
1638                 leaf = path.nodes[0];
1639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641                     key.type == BTRFS_EXTENT_CSUM_KEY)
1642                         path.slots[0]--;
1643         }
1644
1645         while (len > 0) {
1646                 leaf = path.nodes[0];
1647                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1649                         if (ret > 0)
1650                                 break;
1651                         else if (ret < 0)
1652                                 goto out;
1653                         leaf = path.nodes[0];
1654                 }
1655
1656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658                     key.type != BTRFS_EXTENT_CSUM_KEY)
1659                         break;
1660
1661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662                 if (key.offset >= start + len)
1663                         break;
1664
1665                 if (key.offset > start)
1666                         start = key.offset;
1667
1668                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670                 if (csum_end > start) {
1671                         size = min(csum_end - start, len);
1672                         len -= size;
1673                         start += size;
1674                         *found += size;
1675                 }
1676
1677                 path.slots[0]++;
1678         }
1679 out:
1680         btrfs_release_path(&path);
1681         if (ret < 0)
1682                 return ret;
1683         return 0;
1684 }
1685
1686 static int process_file_extent(struct btrfs_root *root,
1687                                 struct extent_buffer *eb,
1688                                 int slot, struct btrfs_key *key,
1689                                 struct shared_node *active_node)
1690 {
1691         struct inode_record *rec;
1692         struct btrfs_file_extent_item *fi;
1693         u64 num_bytes = 0;
1694         u64 disk_bytenr = 0;
1695         u64 extent_offset = 0;
1696         u64 mask = root->sectorsize - 1;
1697         int extent_type;
1698         int ret;
1699
1700         rec = active_node->current;
1701         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702         rec->found_file_extent = 1;
1703
1704         if (rec->extent_start == (u64)-1) {
1705                 rec->extent_start = key->offset;
1706                 rec->extent_end = key->offset;
1707         }
1708
1709         if (rec->extent_end > key->offset)
1710                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711         else if (rec->extent_end < key->offset) {
1712                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713                                            key->offset - rec->extent_end);
1714                 if (ret < 0)
1715                         return ret;
1716         }
1717
1718         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719         extent_type = btrfs_file_extent_type(eb, fi);
1720
1721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723                 if (num_bytes == 0)
1724                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725                 rec->found_size += num_bytes;
1726                 num_bytes = (num_bytes + mask) & ~mask;
1727         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731                 extent_offset = btrfs_file_extent_offset(eb, fi);
1732                 if (num_bytes == 0 || (num_bytes & mask))
1733                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734                 if (num_bytes + extent_offset >
1735                     btrfs_file_extent_ram_bytes(eb, fi))
1736                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738                     (btrfs_file_extent_compression(eb, fi) ||
1739                      btrfs_file_extent_encryption(eb, fi) ||
1740                      btrfs_file_extent_other_encoding(eb, fi)))
1741                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742                 if (disk_bytenr > 0)
1743                         rec->found_size += num_bytes;
1744         } else {
1745                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746         }
1747         rec->extent_end = key->offset + num_bytes;
1748
1749         /*
1750          * The data reloc tree will copy full extents into its inode and then
1751          * copy the corresponding csums.  Because the extent it copied could be
1752          * a preallocated extent that hasn't been written to yet there may be no
1753          * csums to copy, ergo we won't have csums for our file extent.  This is
1754          * ok so just don't bother checking csums if the inode belongs to the
1755          * data reloc tree.
1756          */
1757         if (disk_bytenr > 0 &&
1758             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759                 u64 found;
1760                 if (btrfs_file_extent_compression(eb, fi))
1761                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762                 else
1763                         disk_bytenr += extent_offset;
1764
1765                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1766                 if (ret < 0)
1767                         return ret;
1768                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769                         if (found > 0)
1770                                 rec->found_csum_item = 1;
1771                         if (found < num_bytes)
1772                                 rec->some_csum_missing = 1;
1773                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774                         if (found > 0)
1775                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782                             struct walk_control *wc)
1783 {
1784         struct btrfs_key key;
1785         u32 nritems;
1786         int i;
1787         int ret = 0;
1788         struct cache_tree *inode_cache;
1789         struct shared_node *active_node;
1790
1791         if (wc->root_level == wc->active_node &&
1792             btrfs_root_refs(&root->root_item) == 0)
1793                 return 0;
1794
1795         active_node = wc->nodes[wc->active_node];
1796         inode_cache = &active_node->inode_cache;
1797         nritems = btrfs_header_nritems(eb);
1798         for (i = 0; i < nritems; i++) {
1799                 btrfs_item_key_to_cpu(eb, &key, i);
1800
1801                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802                         continue;
1803                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1804                         continue;
1805
1806                 if (active_node->current == NULL ||
1807                     active_node->current->ino < key.objectid) {
1808                         if (active_node->current) {
1809                                 active_node->current->checked = 1;
1810                                 maybe_free_inode_rec(inode_cache,
1811                                                      active_node->current);
1812                         }
1813                         active_node->current = get_inode_rec(inode_cache,
1814                                                              key.objectid, 1);
1815                         BUG_ON(IS_ERR(active_node->current));
1816                 }
1817                 switch (key.type) {
1818                 case BTRFS_DIR_ITEM_KEY:
1819                 case BTRFS_DIR_INDEX_KEY:
1820                         ret = process_dir_item(root, eb, i, &key, active_node);
1821                         break;
1822                 case BTRFS_INODE_REF_KEY:
1823                         ret = process_inode_ref(eb, i, &key, active_node);
1824                         break;
1825                 case BTRFS_INODE_EXTREF_KEY:
1826                         ret = process_inode_extref(eb, i, &key, active_node);
1827                         break;
1828                 case BTRFS_INODE_ITEM_KEY:
1829                         ret = process_inode_item(eb, i, &key, active_node);
1830                         break;
1831                 case BTRFS_EXTENT_DATA_KEY:
1832                         ret = process_file_extent(root, eb, i, &key,
1833                                                   active_node);
1834                         break;
1835                 default:
1836                         break;
1837                 };
1838         }
1839         return ret;
1840 }
1841
1842 static void reada_walk_down(struct btrfs_root *root,
1843                             struct extent_buffer *node, int slot)
1844 {
1845         u64 bytenr;
1846         u64 ptr_gen;
1847         u32 nritems;
1848         u32 blocksize;
1849         int i;
1850         int level;
1851
1852         level = btrfs_header_level(node);
1853         if (level != 1)
1854                 return;
1855
1856         nritems = btrfs_header_nritems(node);
1857         blocksize = root->nodesize;
1858         for (i = slot; i < nritems; i++) {
1859                 bytenr = btrfs_node_blockptr(node, i);
1860                 ptr_gen = btrfs_node_ptr_generation(node, i);
1861                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1862         }
1863 }
1864
1865 /*
1866  * Check the child node/leaf by the following condition:
1867  * 1. the first item key of the node/leaf should be the same with the one
1868  *    in parent.
1869  * 2. block in parent node should match the child node/leaf.
1870  * 3. generation of parent node and child's header should be consistent.
1871  *
1872  * Or the child node/leaf pointed by the key in parent is not valid.
1873  *
1874  * We hope to check leaf owner too, but since subvol may share leaves,
1875  * which makes leaf owner check not so strong, key check should be
1876  * sufficient enough for that case.
1877  */
1878 static int check_child_node(struct btrfs_root *root,
1879                             struct extent_buffer *parent, int slot,
1880                             struct extent_buffer *child)
1881 {
1882         struct btrfs_key parent_key;
1883         struct btrfs_key child_key;
1884         int ret = 0;
1885
1886         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887         if (btrfs_header_level(child) == 0)
1888                 btrfs_item_key_to_cpu(child, &child_key, 0);
1889         else
1890                 btrfs_node_key_to_cpu(child, &child_key, 0);
1891
1892         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1893                 ret = -EINVAL;
1894                 fprintf(stderr,
1895                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896                         parent_key.objectid, parent_key.type, parent_key.offset,
1897                         child_key.objectid, child_key.type, child_key.offset);
1898         }
1899         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900                 ret = -EINVAL;
1901                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902                         btrfs_node_blockptr(parent, slot),
1903                         btrfs_header_bytenr(child));
1904         }
1905         if (btrfs_node_ptr_generation(parent, slot) !=
1906             btrfs_header_generation(child)) {
1907                 ret = -EINVAL;
1908                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909                         btrfs_header_generation(child),
1910                         btrfs_node_ptr_generation(parent, slot));
1911         }
1912         return ret;
1913 }
1914
1915 struct node_refs {
1916         u64 bytenr[BTRFS_MAX_LEVEL];
1917         u64 refs[BTRFS_MAX_LEVEL];
1918 };
1919
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                           struct walk_control *wc, int *level,
1922                           struct node_refs *nrefs)
1923 {
1924         enum btrfs_tree_block_status status;
1925         u64 bytenr;
1926         u64 ptr_gen;
1927         struct extent_buffer *next;
1928         struct extent_buffer *cur;
1929         u32 blocksize;
1930         int ret, err = 0;
1931         u64 refs;
1932
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937                 refs = nrefs->refs[*level];
1938                 ret = 0;
1939         } else {
1940                 ret = btrfs_lookup_extent_info(NULL, root,
1941                                        path->nodes[*level]->start,
1942                                        *level, 1, &refs, NULL);
1943                 if (ret < 0) {
1944                         err = ret;
1945                         goto out;
1946                 }
1947                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948                 nrefs->refs[*level] = refs;
1949         }
1950
1951         if (refs > 1) {
1952                 ret = enter_shared_node(root, path->nodes[*level]->start,
1953                                         refs, wc, *level);
1954                 if (ret > 0) {
1955                         err = ret;
1956                         goto out;
1957                 }
1958         }
1959
1960         while (*level >= 0) {
1961                 WARN_ON(*level < 0);
1962                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963                 cur = path->nodes[*level];
1964
1965                 if (btrfs_header_level(cur) != *level)
1966                         WARN_ON(1);
1967
1968                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1969                         break;
1970                 if (*level == 0) {
1971                         ret = process_one_leaf(root, cur, wc);
1972                         if (ret < 0)
1973                                 err = ret;
1974                         break;
1975                 }
1976                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978                 blocksize = root->nodesize;
1979
1980                 if (bytenr == nrefs->bytenr[*level - 1]) {
1981                         refs = nrefs->refs[*level - 1];
1982                 } else {
1983                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984                                         *level - 1, 1, &refs, NULL);
1985                         if (ret < 0) {
1986                                 refs = 0;
1987                         } else {
1988                                 nrefs->bytenr[*level - 1] = bytenr;
1989                                 nrefs->refs[*level - 1] = refs;
1990                         }
1991                 }
1992
1993                 if (refs > 1) {
1994                         ret = enter_shared_node(root, bytenr, refs,
1995                                                 wc, *level - 1);
1996                         if (ret > 0) {
1997                                 path->slots[*level]++;
1998                                 continue;
1999                         }
2000                 }
2001
2002                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004                         free_extent_buffer(next);
2005                         reada_walk_down(root, cur, path->slots[*level]);
2006                         next = read_tree_block(root, bytenr, blocksize,
2007                                                ptr_gen);
2008                         if (!extent_buffer_uptodate(next)) {
2009                                 struct btrfs_key node_key;
2010
2011                                 btrfs_node_key_to_cpu(path->nodes[*level],
2012                                                       &node_key,
2013                                                       path->slots[*level]);
2014                                 btrfs_add_corrupt_extent_record(root->fs_info,
2015                                                 &node_key,
2016                                                 path->nodes[*level]->start,
2017                                                 root->nodesize, *level);
2018                                 err = -EIO;
2019                                 goto out;
2020                         }
2021                 }
2022
2023                 ret = check_child_node(root, cur, path->slots[*level], next);
2024                 if (ret) {
2025                         err = ret;
2026                         goto out;
2027                 }
2028
2029                 if (btrfs_is_leaf(next))
2030                         status = btrfs_check_leaf(root, NULL, next);
2031                 else
2032                         status = btrfs_check_node(root, NULL, next);
2033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034                         free_extent_buffer(next);
2035                         err = -EIO;
2036                         goto out;
2037                 }
2038
2039                 *level = *level - 1;
2040                 free_extent_buffer(path->nodes[*level]);
2041                 path->nodes[*level] = next;
2042                 path->slots[*level] = 0;
2043         }
2044 out:
2045         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2046         return err;
2047 }
2048
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050                         struct walk_control *wc, int *level)
2051 {
2052         int i;
2053         struct extent_buffer *leaf;
2054
2055         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056                 leaf = path->nodes[i];
2057                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2058                         path->slots[i]++;
2059                         *level = i;
2060                         return 0;
2061                 } else {
2062                         free_extent_buffer(path->nodes[*level]);
2063                         path->nodes[*level] = NULL;
2064                         BUG_ON(*level > wc->active_node);
2065                         if (*level == wc->active_node)
2066                                 leave_shared_node(root, wc, *level);
2067                         *level = i + 1;
2068                 }
2069         }
2070         return 1;
2071 }
2072
2073 static int check_root_dir(struct inode_record *rec)
2074 {
2075         struct inode_backref *backref;
2076         int ret = -1;
2077
2078         if (!rec->found_inode_item || rec->errors)
2079                 goto out;
2080         if (rec->nlink != 1 || rec->found_link != 0)
2081                 goto out;
2082         if (list_empty(&rec->backrefs))
2083                 goto out;
2084         backref = to_inode_backref(rec->backrefs.next);
2085         if (!backref->found_inode_ref)
2086                 goto out;
2087         if (backref->index != 0 || backref->namelen != 2 ||
2088             memcmp(backref->name, "..", 2))
2089                 goto out;
2090         if (backref->found_dir_index || backref->found_dir_item)
2091                 goto out;
2092         ret = 0;
2093 out:
2094         return ret;
2095 }
2096
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098                               struct btrfs_root *root, struct btrfs_path *path,
2099                               struct inode_record *rec)
2100 {
2101         struct btrfs_inode_item *ei;
2102         struct btrfs_key key;
2103         int ret;
2104
2105         key.objectid = rec->ino;
2106         key.type = BTRFS_INODE_ITEM_KEY;
2107         key.offset = (u64)-1;
2108
2109         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2110         if (ret < 0)
2111                 goto out;
2112         if (ret) {
2113                 if (!path->slots[0]) {
2114                         ret = -ENOENT;
2115                         goto out;
2116                 }
2117                 path->slots[0]--;
2118                 ret = 0;
2119         }
2120         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121         if (key.objectid != rec->ino) {
2122                 ret = -ENOENT;
2123                 goto out;
2124         }
2125
2126         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127                             struct btrfs_inode_item);
2128         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129         btrfs_mark_buffer_dirty(path->nodes[0]);
2130         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132                root->root_key.objectid);
2133 out:
2134         btrfs_release_path(path);
2135         return ret;
2136 }
2137
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139                                     struct btrfs_root *root,
2140                                     struct btrfs_path *path,
2141                                     struct inode_record *rec)
2142 {
2143         int ret;
2144
2145         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146         btrfs_release_path(path);
2147         if (!ret)
2148                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2149         return ret;
2150 }
2151
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153                                struct btrfs_root *root,
2154                                struct btrfs_path *path,
2155                                struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret = 0;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = 0;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret) {
2167                 if (ret > 0)
2168                         ret = -ENOENT;
2169                 goto out;
2170         }
2171
2172         /* Since ret == 0, no need to check anything */
2173         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174                             struct btrfs_inode_item);
2175         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176         btrfs_mark_buffer_dirty(path->nodes[0]);
2177         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178         printf("reset nbytes for ino %llu root %llu\n",
2179                rec->ino, root->root_key.objectid);
2180 out:
2181         btrfs_release_path(path);
2182         return ret;
2183 }
2184
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186                                  struct cache_tree *inode_cache,
2187                                  struct inode_record *rec,
2188                                  struct inode_backref *backref)
2189 {
2190         struct btrfs_path path;
2191         struct btrfs_trans_handle *trans;
2192         struct btrfs_dir_item *dir_item;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_disk_key disk_key;
2196         struct inode_record *dir_rec;
2197         unsigned long name_ptr;
2198         u32 data_size = sizeof(*dir_item) + backref->namelen;
2199         int ret;
2200
2201         trans = btrfs_start_transaction(root, 1);
2202         if (IS_ERR(trans))
2203                 return PTR_ERR(trans);
2204
2205         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206                 (unsigned long long)rec->ino);
2207
2208         btrfs_init_path(&path);
2209         key.objectid = backref->dir;
2210         key.type = BTRFS_DIR_INDEX_KEY;
2211         key.offset = backref->index;
2212         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2213         BUG_ON(ret);
2214
2215         leaf = path.nodes[0];
2216         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217
2218         disk_key.objectid = cpu_to_le64(rec->ino);
2219         disk_key.type = BTRFS_INODE_ITEM_KEY;
2220         disk_key.offset = 0;
2221
2222         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224         btrfs_set_dir_data_len(leaf, dir_item, 0);
2225         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226         name_ptr = (unsigned long)(dir_item + 1);
2227         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228         btrfs_mark_buffer_dirty(leaf);
2229         btrfs_release_path(&path);
2230         btrfs_commit_transaction(trans, root);
2231
2232         backref->found_dir_index = 1;
2233         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234         BUG_ON(IS_ERR(dir_rec));
2235         if (!dir_rec)
2236                 return 0;
2237         dir_rec->found_size += backref->namelen;
2238         if (dir_rec->found_size == dir_rec->isize &&
2239             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241         if (dir_rec->found_size != dir_rec->isize)
2242                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2243
2244         return 0;
2245 }
2246
2247 static int delete_dir_index(struct btrfs_root *root,
2248                             struct cache_tree *inode_cache,
2249                             struct inode_record *rec,
2250                             struct inode_backref *backref)
2251 {
2252         struct btrfs_trans_handle *trans;
2253         struct btrfs_dir_item *di;
2254         struct btrfs_path path;
2255         int ret = 0;
2256
2257         trans = btrfs_start_transaction(root, 1);
2258         if (IS_ERR(trans))
2259                 return PTR_ERR(trans);
2260
2261         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262                 (unsigned long long)backref->dir,
2263                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264                 (unsigned long long)root->objectid);
2265
2266         btrfs_init_path(&path);
2267         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268                                     backref->name, backref->namelen,
2269                                     backref->index, -1);
2270         if (IS_ERR(di)) {
2271                 ret = PTR_ERR(di);
2272                 btrfs_release_path(&path);
2273                 btrfs_commit_transaction(trans, root);
2274                 if (ret == -ENOENT)
2275                         return 0;
2276                 return ret;
2277         }
2278
2279         if (!di)
2280                 ret = btrfs_del_item(trans, root, &path);
2281         else
2282                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283         BUG_ON(ret);
2284         btrfs_release_path(&path);
2285         btrfs_commit_transaction(trans, root);
2286         return ret;
2287 }
2288
2289 static int create_inode_item(struct btrfs_root *root,
2290                              struct inode_record *rec,
2291                              struct inode_backref *backref, int root_dir)
2292 {
2293         struct btrfs_trans_handle *trans;
2294         struct btrfs_inode_item inode_item;
2295         time_t now = time(NULL);
2296         int ret;
2297
2298         trans = btrfs_start_transaction(root, 1);
2299         if (IS_ERR(trans)) {
2300                 ret = PTR_ERR(trans);
2301                 return ret;
2302         }
2303
2304         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305                 "be incomplete, please check permissions and content after "
2306                 "the fsck completes.\n", (unsigned long long)root->objectid,
2307                 (unsigned long long)rec->ino);
2308
2309         memset(&inode_item, 0, sizeof(inode_item));
2310         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311         if (root_dir)
2312                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313         else
2314                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316         if (rec->found_dir_item) {
2317                 if (rec->found_file_extent)
2318                         fprintf(stderr, "root %llu inode %llu has both a dir "
2319                                 "item and extents, unsure if it is a dir or a "
2320                                 "regular file so setting it as a directory\n",
2321                                 (unsigned long long)root->objectid,
2322                                 (unsigned long long)rec->ino);
2323                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325         } else if (!rec->found_dir_item) {
2326                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328         }
2329         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337
2338         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339         BUG_ON(ret);
2340         btrfs_commit_transaction(trans, root);
2341         return 0;
2342 }
2343
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345                                  struct inode_record *rec,
2346                                  struct cache_tree *inode_cache,
2347                                  int delete)
2348 {
2349         struct inode_backref *tmp, *backref;
2350         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2351         int ret = 0;
2352         int repaired = 0;
2353
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 if (!delete && rec->ino == root_dirid) {
2356                         if (!rec->found_inode_item) {
2357                                 ret = create_inode_item(root, rec, backref, 1);
2358                                 if (ret)
2359                                         break;
2360                                 repaired++;
2361                         }
2362                 }
2363
2364                 /* Index 0 for root dir's are special, don't mess with it */
2365                 if (rec->ino == root_dirid && backref->index == 0)
2366                         continue;
2367
2368                 if (delete &&
2369                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2370                      (backref->found_dir_index && backref->found_inode_ref &&
2371                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372                         ret = delete_dir_index(root, inode_cache, rec, backref);
2373                         if (ret)
2374                                 break;
2375                         repaired++;
2376                         list_del(&backref->list);
2377                         free(backref);
2378                 }
2379
2380                 if (!delete && !backref->found_dir_index &&
2381                     backref->found_dir_item && backref->found_inode_ref) {
2382                         ret = add_missing_dir_index(root, inode_cache, rec,
2383                                                     backref);
2384                         if (ret)
2385                                 break;
2386                         repaired++;
2387                         if (backref->found_dir_item &&
2388                             backref->found_dir_index &&
2389                             backref->found_dir_index) {
2390                                 if (!backref->errors &&
2391                                     backref->found_inode_ref) {
2392                                         list_del(&backref->list);
2393                                         free(backref);
2394                                 }
2395                         }
2396                 }
2397
2398                 if (!delete && (!backref->found_dir_index &&
2399                                 !backref->found_dir_item &&
2400                                 backref->found_inode_ref)) {
2401                         struct btrfs_trans_handle *trans;
2402                         struct btrfs_key location;
2403
2404                         ret = check_dir_conflict(root, backref->name,
2405                                                  backref->namelen,
2406                                                  backref->dir,
2407                                                  backref->index);
2408                         if (ret) {
2409                                 /*
2410                                  * let nlink fixing routine to handle it,
2411                                  * which can do it better.
2412                                  */
2413                                 ret = 0;
2414                                 break;
2415                         }
2416                         location.objectid = rec->ino;
2417                         location.type = BTRFS_INODE_ITEM_KEY;
2418                         location.offset = 0;
2419
2420                         trans = btrfs_start_transaction(root, 1);
2421                         if (IS_ERR(trans)) {
2422                                 ret = PTR_ERR(trans);
2423                                 break;
2424                         }
2425                         fprintf(stderr, "adding missing dir index/item pair "
2426                                 "for inode %llu\n",
2427                                 (unsigned long long)rec->ino);
2428                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2429                                                     backref->namelen,
2430                                                     backref->dir, &location,
2431                                                     imode_to_type(rec->imode),
2432                                                     backref->index);
2433                         BUG_ON(ret);
2434                         btrfs_commit_transaction(trans, root);
2435                         repaired++;
2436                 }
2437
2438                 if (!delete && (backref->found_inode_ref &&
2439                                 backref->found_dir_index &&
2440                                 backref->found_dir_item &&
2441                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442                                 !rec->found_inode_item)) {
2443                         ret = create_inode_item(root, rec, backref, 0);
2444                         if (ret)
2445                                 break;
2446                         repaired++;
2447                 }
2448
2449         }
2450         return ret ? ret : repaired;
2451 }
2452
2453 /*
2454  * To determine the file type for nlink/inode_item repair
2455  *
2456  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457  * Return -ENOENT if file type is not found.
2458  */
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2460 {
2461         struct inode_backref *backref;
2462
2463         /* For inode item recovered case */
2464         if (rec->found_inode_item) {
2465                 *type = imode_to_type(rec->imode);
2466                 return 0;
2467         }
2468
2469         list_for_each_entry(backref, &rec->backrefs, list) {
2470                 if (backref->found_dir_index || backref->found_dir_item) {
2471                         *type = backref->filetype;
2472                         return 0;
2473                 }
2474         }
2475         return -ENOENT;
2476 }
2477
2478 /*
2479  * To determine the file name for nlink repair
2480  *
2481  * Return 0 if file name is found, set name and namelen.
2482  * Return -ENOENT if file name is not found.
2483  */
2484 static int find_file_name(struct inode_record *rec,
2485                           char *name, int *namelen)
2486 {
2487         struct inode_backref *backref;
2488
2489         list_for_each_entry(backref, &rec->backrefs, list) {
2490                 if (backref->found_dir_index || backref->found_dir_item ||
2491                     backref->found_inode_ref) {
2492                         memcpy(name, backref->name, backref->namelen);
2493                         *namelen = backref->namelen;
2494                         return 0;
2495                 }
2496         }
2497         return -ENOENT;
2498 }
2499
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502                        struct btrfs_root *root,
2503                        struct btrfs_path *path,
2504                        struct inode_record *rec)
2505 {
2506         struct inode_backref *backref;
2507         struct inode_backref *tmp;
2508         struct btrfs_key key;
2509         struct btrfs_inode_item *inode_item;
2510         int ret = 0;
2511
2512         /* We don't believe this either, reset it and iterate backref */
2513         rec->found_link = 0;
2514
2515         /* Remove all backref including the valid ones */
2516         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518                                    backref->index, backref->name,
2519                                    backref->namelen, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 /* remove invalid backref, so it won't be added back */
2524                 if (!(backref->found_dir_index &&
2525                       backref->found_dir_item &&
2526                       backref->found_inode_ref)) {
2527                         list_del(&backref->list);
2528                         free(backref);
2529                 } else {
2530                         rec->found_link++;
2531                 }
2532         }
2533
2534         /* Set nlink to 0 */
2535         key.objectid = rec->ino;
2536         key.type = BTRFS_INODE_ITEM_KEY;
2537         key.offset = 0;
2538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2539         if (ret < 0)
2540                 goto out;
2541         if (ret > 0) {
2542                 ret = -ENOENT;
2543                 goto out;
2544         }
2545         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546                                     struct btrfs_inode_item);
2547         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_release_path(path);
2550
2551         /*
2552          * Add back valid inode_ref/dir_item/dir_index,
2553          * add_link() will handle the nlink inc, so new nlink must be correct
2554          */
2555         list_for_each_entry(backref, &rec->backrefs, list) {
2556                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557                                      backref->name, backref->namelen,
2558                                      backref->filetype, &backref->index, 1);
2559                 if (ret < 0)
2560                         goto out;
2561         }
2562 out:
2563         btrfs_release_path(path);
2564         return ret;
2565 }
2566
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568                                struct btrfs_root *root,
2569                                struct btrfs_path *path,
2570                                struct inode_record *rec)
2571 {
2572         char *dir_name = "lost+found";
2573         char namebuf[BTRFS_NAME_LEN] = {0};
2574         u64 lost_found_ino;
2575         u32 mode = 0700;
2576         u8 type = 0;
2577         int namelen = 0;
2578         int name_recovered = 0;
2579         int type_recovered = 0;
2580         int ret = 0;
2581
2582         /*
2583          * Get file name and type first before these invalid inode ref
2584          * are deleted by remove_all_invalid_backref()
2585          */
2586         name_recovered = !find_file_name(rec, namebuf, &namelen);
2587         type_recovered = !find_file_type(rec, &type);
2588
2589         if (!name_recovered) {
2590                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591                        rec->ino, rec->ino);
2592                 namelen = count_digits(rec->ino);
2593                 sprintf(namebuf, "%llu", rec->ino);
2594                 name_recovered = 1;
2595         }
2596         if (!type_recovered) {
2597                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598                        rec->ino);
2599                 type = BTRFS_FT_REG_FILE;
2600                 type_recovered = 1;
2601         }
2602
2603         ret = reset_nlink(trans, root, path, rec);
2604         if (ret < 0) {
2605                 fprintf(stderr,
2606                         "Failed to reset nlink for inode %llu: %s\n",
2607                         rec->ino, strerror(-ret));
2608                 goto out;
2609         }
2610
2611         if (rec->found_link == 0) {
2612                 lost_found_ino = root->highest_inode;
2613                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2614                         ret = -EOVERFLOW;
2615                         goto out;
2616                 }
2617                 lost_found_ino++;
2618                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620                                   mode);
2621                 if (ret < 0) {
2622                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623                                 dir_name, strerror(-ret));
2624                         goto out;
2625                 }
2626                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627                                      namebuf, namelen, type, NULL, 1);
2628                 /*
2629                  * Add ".INO" suffix several times to handle case where
2630                  * "FILENAME.INO" is already taken by another file.
2631                  */
2632                 while (ret == -EEXIST) {
2633                         /*
2634                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635                          */
2636                         if (namelen + count_digits(rec->ino) + 1 >
2637                             BTRFS_NAME_LEN) {
2638                                 ret = -EFBIG;
2639                                 goto out;
2640                         }
2641                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642                                  ".%llu", rec->ino);
2643                         namelen += count_digits(rec->ino) + 1;
2644                         ret = btrfs_add_link(trans, root, rec->ino,
2645                                              lost_found_ino, namebuf,
2646                                              namelen, type, NULL, 1);
2647                 }
2648                 if (ret < 0) {
2649                         fprintf(stderr,
2650                                 "Failed to link the inode %llu to %s dir: %s\n",
2651                                 rec->ino, dir_name, strerror(-ret));
2652                         goto out;
2653                 }
2654                 /*
2655                  * Just increase the found_link, don't actually add the
2656                  * backref. This will make things easier and this inode
2657                  * record will be freed after the repair is done.
2658                  * So fsck will not report problem about this inode.
2659                  */
2660                 rec->found_link++;
2661                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662                        namelen, namebuf, dir_name);
2663         }
2664         printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 out:
2666         /*
2667          * Clear the flag anyway, or we will loop forever for the same inode
2668          * as it will not be removed from the bad inode list and the dead loop
2669          * happens.
2670          */
2671         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672         btrfs_release_path(path);
2673         return ret;
2674 }
2675
2676 /*
2677  * Check if there is any normal(reg or prealloc) file extent for given
2678  * ino.
2679  * This is used to determine the file type when neither its dir_index/item or
2680  * inode_item exists.
2681  *
2682  * This will *NOT* report error, if any error happens, just consider it does
2683  * not have any normal file extent.
2684  */
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_key key;
2689         struct btrfs_key found_key;
2690         struct btrfs_file_extent_item *fi;
2691         u8 type;
2692         int ret = 0;
2693
2694         btrfs_init_path(&path);
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705                 ret = btrfs_next_leaf(root, &path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713                                       path.slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path.nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_release_path(&path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         /*
2930          * For nlink repair, it may create a dir and add link, so
2931          * 2 for parent(256)'s dir_index and dir_item
2932          * 2 for lost+found dir's inode_item and inode_ref
2933          * 1 for the new inode_ref of the file
2934          * 2 for lost+found dir's dir_index and dir_item for the file
2935          */
2936         trans = btrfs_start_transaction(root, 7);
2937         if (IS_ERR(trans))
2938                 return PTR_ERR(trans);
2939
2940         btrfs_init_path(&path);
2941         if (rec->errors & I_ERR_NO_INODE_ITEM)
2942                 ret = repair_inode_no_item(trans, root, &path, rec);
2943         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948                 ret = repair_inode_isize(trans, root, &path, rec);
2949         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952                 ret = repair_inode_nlinks(trans, root, &path, rec);
2953         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954                 ret = repair_inode_nbytes(trans, root, &path, rec);
2955         btrfs_commit_transaction(trans, root);
2956         btrfs_release_path(&path);
2957         return ret;
2958 }
2959
2960 static int check_inode_recs(struct btrfs_root *root,
2961                             struct cache_tree *inode_cache)
2962 {
2963         struct cache_extent *cache;
2964         struct ptr_node *node;
2965         struct inode_record *rec;
2966         struct inode_backref *backref;
2967         int stage = 0;
2968         int ret = 0;
2969         int err = 0;
2970         u64 error = 0;
2971         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972
2973         if (btrfs_root_refs(&root->root_item) == 0) {
2974                 if (!cache_tree_empty(inode_cache))
2975                         fprintf(stderr, "warning line %d\n", __LINE__);
2976                 return 0;
2977         }
2978
2979         /*
2980          * We need to record the highest inode number for later 'lost+found'
2981          * dir creation.
2982          * We must select an ino not used/referred by any existing inode, or
2983          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984          * this may cause 'lost+found' dir has wrong nlinks.
2985          */
2986         cache = last_cache_extent(inode_cache);
2987         if (cache) {
2988                 node = container_of(cache, struct ptr_node, cache);
2989                 rec = node->data;
2990                 if (rec->ino > root->highest_inode)
2991                         root->highest_inode = rec->ino;
2992         }
2993
2994         /*
2995          * We need to repair backrefs first because we could change some of the
2996          * errors in the inode recs.
2997          *
2998          * We also need to go through and delete invalid backrefs first and then
2999          * add the correct ones second.  We do this because we may get EEXIST
3000          * when adding back the correct index because we hadn't yet deleted the
3001          * invalid index.
3002          *
3003          * For example, if we were missing a dir index then the directories
3004          * isize would be wrong, so if we fixed the isize to what we thought it
3005          * would be and then fixed the backref we'd still have a invalid fs, so
3006          * we need to add back the dir index and then check to see if the isize
3007          * is still wrong.
3008          */
3009         while (stage < 3) {
3010                 stage++;
3011                 if (stage == 3 && !err)
3012                         break;
3013
3014                 cache = search_cache_extent(inode_cache, 0);
3015                 while (repair && cache) {
3016                         node = container_of(cache, struct ptr_node, cache);
3017                         rec = node->data;
3018                         cache = next_cache_extent(cache);
3019
3020                         /* Need to free everything up and rescan */
3021                         if (stage == 3) {
3022                                 remove_cache_extent(inode_cache, &node->cache);
3023                                 free(node);
3024                                 free_inode_rec(rec);
3025                                 continue;
3026                         }
3027
3028                         if (list_empty(&rec->backrefs))
3029                                 continue;
3030
3031                         ret = repair_inode_backrefs(root, rec, inode_cache,
3032                                                     stage == 1);
3033                         if (ret < 0) {
3034                                 err = ret;
3035                                 stage = 2;
3036                                 break;
3037                         } if (ret > 0) {
3038                                 err = -EAGAIN;
3039                         }
3040                 }
3041         }
3042         if (err)
3043                 return err;
3044
3045         rec = get_inode_rec(inode_cache, root_dirid, 0);
3046         BUG_ON(IS_ERR(rec));
3047         if (rec) {
3048                 ret = check_root_dir(rec);
3049                 if (ret) {
3050                         fprintf(stderr, "root %llu root dir %llu error\n",
3051                                 (unsigned long long)root->root_key.objectid,
3052                                 (unsigned long long)root_dirid);
3053                         print_inode_error(root, rec);
3054                         error++;
3055                 }
3056         } else {
3057                 if (repair) {
3058                         struct btrfs_trans_handle *trans;
3059
3060                         trans = btrfs_start_transaction(root, 1);
3061                         if (IS_ERR(trans)) {
3062                                 err = PTR_ERR(trans);
3063                                 return err;
3064                         }
3065
3066                         fprintf(stderr,
3067                                 "root %llu missing its root dir, recreating\n",
3068                                 (unsigned long long)root->objectid);
3069
3070                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3071                         BUG_ON(ret);
3072
3073                         btrfs_commit_transaction(trans, root);
3074                         return -EAGAIN;
3075                 }
3076
3077                 fprintf(stderr, "root %llu root dir %llu not found\n",
3078                         (unsigned long long)root->root_key.objectid,
3079                         (unsigned long long)root_dirid);
3080         }
3081
3082         while (1) {
3083                 cache = search_cache_extent(inode_cache, 0);
3084                 if (!cache)
3085                         break;
3086                 node = container_of(cache, struct ptr_node, cache);
3087                 rec = node->data;
3088                 remove_cache_extent(inode_cache, &node->cache);
3089                 free(node);
3090                 if (rec->ino == root_dirid ||
3091                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092                         free_inode_rec(rec);
3093                         continue;
3094                 }
3095
3096                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097                         ret = check_orphan_item(root, rec->ino);
3098                         if (ret == 0)
3099                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100                         if (can_free_inode_rec(rec)) {
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104                 }
3105
3106                 if (!rec->found_inode_item)
3107                         rec->errors |= I_ERR_NO_INODE_ITEM;
3108                 if (rec->found_link != rec->nlink)
3109                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110                 if (repair) {
3111                         ret = try_repair_inode(root, rec);
3112                         if (ret == 0 && can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                         ret = 0;
3117                 }
3118
3119                 if (!(repair && ret == 0))
3120                         error++;
3121                 print_inode_error(root, rec);
3122                 list_for_each_entry(backref, &rec->backrefs, list) {
3123                         if (!backref->found_dir_item)
3124                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125                         if (!backref->found_dir_index)
3126                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127                         if (!backref->found_inode_ref)
3128                                 backref->errors |= REF_ERR_NO_INODE_REF;
3129                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130                                 " namelen %u name %s filetype %d errors %x",
3131                                 (unsigned long long)backref->dir,
3132                                 (unsigned long long)backref->index,
3133                                 backref->namelen, backref->name,
3134                                 backref->filetype, backref->errors);
3135                         print_ref_error(backref->errors);
3136                 }
3137                 free_inode_rec(rec);
3138         }
3139         return (error > 0) ? -1 : 0;
3140 }
3141
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3143                                         u64 objectid)
3144 {
3145         struct cache_extent *cache;
3146         struct root_record *rec = NULL;
3147         int ret;
3148
3149         cache = lookup_cache_extent(root_cache, objectid, 1);
3150         if (cache) {
3151                 rec = container_of(cache, struct root_record, cache);
3152         } else {
3153                 rec = calloc(1, sizeof(*rec));
3154                 if (!rec)
3155                         return ERR_PTR(-ENOMEM);
3156                 rec->objectid = objectid;
3157                 INIT_LIST_HEAD(&rec->backrefs);
3158                 rec->cache.start = objectid;
3159                 rec->cache.size = 1;
3160
3161                 ret = insert_cache_extent(root_cache, &rec->cache);
3162                 if (ret)
3163                         return ERR_PTR(-EEXIST);
3164         }
3165         return rec;
3166 }
3167
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169                                              u64 ref_root, u64 dir, u64 index,
3170                                              const char *name, int namelen)
3171 {
3172         struct root_backref *backref;
3173
3174         list_for_each_entry(backref, &rec->backrefs, list) {
3175                 if (backref->ref_root != ref_root || backref->dir != dir ||
3176                     backref->namelen != namelen)
3177                         continue;
3178                 if (memcmp(name, backref->name, namelen))
3179                         continue;
3180                 return backref;
3181         }
3182
3183         backref = calloc(1, sizeof(*backref) + namelen + 1);
3184         if (!backref)
3185                 return NULL;
3186         backref->ref_root = ref_root;
3187         backref->dir = dir;
3188         backref->index = index;
3189         backref->namelen = namelen;
3190         memcpy(backref->name, name, namelen);
3191         backref->name[namelen] = '\0';
3192         list_add_tail(&backref->list, &rec->backrefs);
3193         return backref;
3194 }
3195
3196 static void free_root_record(struct cache_extent *cache)
3197 {
3198         struct root_record *rec;
3199         struct root_backref *backref;
3200
3201         rec = container_of(cache, struct root_record, cache);
3202         while (!list_empty(&rec->backrefs)) {
3203                 backref = to_root_backref(rec->backrefs.next);
3204                 list_del(&backref->list);
3205                 free(backref);
3206         }
3207
3208         free(rec);
3209 }
3210
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212
3213 static int add_root_backref(struct cache_tree *root_cache,
3214                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3215                             const char *name, int namelen,
3216                             int item_type, int errors)
3217 {
3218         struct root_record *rec;
3219         struct root_backref *backref;
3220
3221         rec = get_root_rec(root_cache, root_id);
3222         BUG_ON(IS_ERR(rec));
3223         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3224         BUG_ON(!backref);
3225
3226         backref->errors |= errors;
3227
3228         if (item_type != BTRFS_DIR_ITEM_KEY) {
3229                 if (backref->found_dir_index || backref->found_back_ref ||
3230                     backref->found_forward_ref) {
3231                         if (backref->index != index)
3232                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233                 } else {
3234                         backref->index = index;
3235                 }
3236         }
3237
3238         if (item_type == BTRFS_DIR_ITEM_KEY) {
3239                 if (backref->found_forward_ref)
3240                         rec->found_ref++;
3241                 backref->found_dir_item = 1;
3242         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243                 backref->found_dir_index = 1;
3244         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245                 if (backref->found_forward_ref)
3246                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3247                 else if (backref->found_dir_item)
3248                         rec->found_ref++;
3249                 backref->found_forward_ref = 1;
3250         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251                 if (backref->found_back_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253                 backref->found_back_ref = 1;
3254         } else {
3255                 BUG_ON(1);
3256         }
3257
3258         if (backref->found_forward_ref && backref->found_dir_item)
3259                 backref->reachable = 1;
3260         return 0;
3261 }
3262
3263 static int merge_root_recs(struct btrfs_root *root,
3264                            struct cache_tree *src_cache,
3265                            struct cache_tree *dst_cache)
3266 {
3267         struct cache_extent *cache;
3268         struct ptr_node *node;
3269         struct inode_record *rec;
3270         struct inode_backref *backref;
3271         int ret = 0;
3272
3273         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274                 free_inode_recs_tree(src_cache);
3275                 return 0;
3276         }
3277
3278         while (1) {
3279                 cache = search_cache_extent(src_cache, 0);
3280                 if (!cache)
3281                         break;
3282                 node = container_of(cache, struct ptr_node, cache);
3283                 rec = node->data;
3284                 remove_cache_extent(src_cache, &node->cache);
3285                 free(node);
3286
3287                 ret = is_child_root(root, root->objectid, rec->ino);
3288                 if (ret < 0)
3289                         break;
3290                 else if (ret == 0)
3291                         goto skip;
3292
3293                 list_for_each_entry(backref, &rec->backrefs, list) {
3294                         BUG_ON(backref->found_inode_ref);
3295                         if (backref->found_dir_item)
3296                                 add_root_backref(dst_cache, rec->ino,
3297                                         root->root_key.objectid, backref->dir,
3298                                         backref->index, backref->name,
3299                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3300                                         backref->errors);
3301                         if (backref->found_dir_index)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3306                                         backref->errors);
3307                 }
3308 skip:
3309                 free_inode_rec(rec);
3310         }
3311         if (ret < 0)
3312                 return ret;
3313         return 0;
3314 }
3315
3316 static int check_root_refs(struct btrfs_root *root,
3317                            struct cache_tree *root_cache)
3318 {
3319         struct root_record *rec;
3320         struct root_record *ref_root;
3321         struct root_backref *backref;
3322         struct cache_extent *cache;
3323         int loop = 1;
3324         int ret;
3325         int error;
3326         int errors = 0;
3327
3328         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329         BUG_ON(IS_ERR(rec));
3330         rec->found_ref = 1;
3331
3332         /* fixme: this can not detect circular references */
3333         while (loop) {
3334                 loop = 0;
3335                 cache = search_cache_extent(root_cache, 0);
3336                 while (1) {
3337                         if (!cache)
3338                                 break;
3339                         rec = container_of(cache, struct root_record, cache);
3340                         cache = next_cache_extent(cache);
3341
3342                         if (rec->found_ref == 0)
3343                                 continue;
3344
3345                         list_for_each_entry(backref, &rec->backrefs, list) {
3346                                 if (!backref->reachable)
3347                                         continue;
3348
3349                                 ref_root = get_root_rec(root_cache,
3350                                                         backref->ref_root);
3351                                 BUG_ON(IS_ERR(ref_root));
3352                                 if (ref_root->found_ref > 0)
3353                                         continue;
3354
3355                                 backref->reachable = 0;
3356                                 rec->found_ref--;
3357                                 if (rec->found_ref == 0)
3358                                         loop = 1;
3359                         }
3360                 }
3361         }
3362
3363         cache = search_cache_extent(root_cache, 0);
3364         while (1) {
3365                 if (!cache)
3366                         break;
3367                 rec = container_of(cache, struct root_record, cache);
3368                 cache = next_cache_extent(cache);
3369
3370                 if (rec->found_ref == 0 &&
3371                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373                         ret = check_orphan_item(root->fs_info->tree_root,
3374                                                 rec->objectid);
3375                         if (ret == 0)
3376                                 continue;
3377
3378                         /*
3379                          * If we don't have a root item then we likely just have
3380                          * a dir item in a snapshot for this root but no actual
3381                          * ref key or anything so it's meaningless.
3382                          */
3383                         if (!rec->found_root_item)
3384                                 continue;
3385                         errors++;
3386                         fprintf(stderr, "fs tree %llu not referenced\n",
3387                                 (unsigned long long)rec->objectid);
3388                 }
3389
3390                 error = 0;
3391                 if (rec->found_ref > 0 && !rec->found_root_item)
3392                         error = 1;
3393                 list_for_each_entry(backref, &rec->backrefs, list) {
3394                         if (!backref->found_dir_item)
3395                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396                         if (!backref->found_dir_index)
3397                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398                         if (!backref->found_back_ref)
3399                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400                         if (!backref->found_forward_ref)
3401                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3402                         if (backref->reachable && backref->errors)
3403                                 error = 1;
3404                 }
3405                 if (!error)
3406                         continue;
3407
3408                 errors++;
3409                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410                         (unsigned long long)rec->objectid, rec->found_ref,
3411                          rec->found_root_item ? "" : "not found");
3412
3413                 list_for_each_entry(backref, &rec->backrefs, list) {
3414                         if (!backref->reachable)
3415                                 continue;
3416                         if (!backref->errors && rec->found_root_item)
3417                                 continue;
3418                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419                                 " index %llu namelen %u name %s errors %x\n",
3420                                 (unsigned long long)backref->ref_root,
3421                                 (unsigned long long)backref->dir,
3422                                 (unsigned long long)backref->index,
3423                                 backref->namelen, backref->name,
3424                                 backref->errors);
3425                         print_ref_error(backref->errors);
3426                 }
3427         }
3428         return errors > 0 ? 1 : 0;
3429 }
3430
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432                             struct btrfs_key *key,
3433                             struct cache_tree *root_cache)
3434 {
3435         u64 dirid;
3436         u64 index;
3437         u32 len;
3438         u32 name_len;
3439         struct btrfs_root_ref *ref;
3440         char namebuf[BTRFS_NAME_LEN];
3441         int error;
3442
3443         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444
3445         dirid = btrfs_root_ref_dirid(eb, ref);
3446         index = btrfs_root_ref_sequence(eb, ref);
3447         name_len = btrfs_root_ref_name_len(eb, ref);
3448
3449         if (name_len <= BTRFS_NAME_LEN) {
3450                 len = name_len;
3451                 error = 0;
3452         } else {
3453                 len = BTRFS_NAME_LEN;
3454                 error = REF_ERR_NAME_TOO_LONG;
3455         }
3456         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457
3458         if (key->type == BTRFS_ROOT_REF_KEY) {
3459                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460                                  index, namebuf, len, key->type, error);
3461         } else {
3462                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463                                  index, namebuf, len, key->type, error);
3464         }
3465         return 0;
3466 }
3467
3468 static void free_corrupt_block(struct cache_extent *cache)
3469 {
3470         struct btrfs_corrupt_block *corrupt;
3471
3472         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3473         free(corrupt);
3474 }
3475
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3477
3478 /*
3479  * Repair the btree of the given root.
3480  *
3481  * The fix is to remove the node key in corrupt_blocks cache_tree.
3482  * and rebalance the tree.
3483  * After the fix, the btree should be writeable.
3484  */
3485 static int repair_btree(struct btrfs_root *root,
3486                         struct cache_tree *corrupt_blocks)
3487 {
3488         struct btrfs_trans_handle *trans;
3489         struct btrfs_path path;
3490         struct btrfs_corrupt_block *corrupt;
3491         struct cache_extent *cache;
3492         struct btrfs_key key;
3493         u64 offset;
3494         int level;
3495         int ret = 0;
3496
3497         if (cache_tree_empty(corrupt_blocks))
3498                 return 0;
3499
3500         trans = btrfs_start_transaction(root, 1);
3501         if (IS_ERR(trans)) {
3502                 ret = PTR_ERR(trans);
3503                 fprintf(stderr, "Error starting transaction: %s\n",
3504                         strerror(-ret));
3505                 return ret;
3506         }
3507         btrfs_init_path(&path);
3508         cache = first_cache_extent(corrupt_blocks);
3509         while (cache) {
3510                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511                                        cache);
3512                 level = corrupt->level;
3513                 path.lowest_level = level;
3514                 key.objectid = corrupt->key.objectid;
3515                 key.type = corrupt->key.type;
3516                 key.offset = corrupt->key.offset;
3517
3518                 /*
3519                  * Here we don't want to do any tree balance, since it may
3520                  * cause a balance with corrupted brother leaf/node,
3521                  * so ins_len set to 0 here.
3522                  * Balance will be done after all corrupt node/leaf is deleted.
3523                  */
3524                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3525                 if (ret < 0)
3526                         goto out;
3527                 offset = btrfs_node_blockptr(path.nodes[level],
3528                                              path.slots[level]);
3529
3530                 /* Remove the ptr */
3531                 ret = btrfs_del_ptr(trans, root, &path, level,
3532                                     path.slots[level]);
3533                 if (ret < 0)
3534                         goto out;
3535                 /*
3536                  * Remove the corresponding extent
3537                  * return value is not concerned.
3538                  */
3539                 btrfs_release_path(&path);
3540                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541                                         0, root->root_key.objectid,
3542                                         level - 1, 0);
3543                 cache = next_cache_extent(cache);
3544         }
3545
3546         /* Balance the btree using btrfs_search_slot() */
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 memcpy(&key, &corrupt->key, sizeof(key));
3552                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3553                 if (ret < 0)
3554                         goto out;
3555                 /* return will always >0 since it won't find the item */
3556                 ret = 0;
3557                 btrfs_release_path(&path);
3558                 cache = next_cache_extent(cache);
3559         }
3560 out:
3561         btrfs_commit_transaction(trans, root);
3562         btrfs_release_path(&path);
3563         return ret;
3564 }
3565
3566 static int check_fs_root(struct btrfs_root *root,
3567                          struct cache_tree *root_cache,
3568                          struct walk_control *wc)
3569 {
3570         int ret = 0;
3571         int err = 0;
3572         int wret;
3573         int level;
3574         struct btrfs_path path;
3575         struct shared_node root_node;
3576         struct root_record *rec;
3577         struct btrfs_root_item *root_item = &root->root_item;
3578         struct cache_tree corrupt_blocks;
3579         struct orphan_data_extent *orphan;
3580         struct orphan_data_extent *tmp;
3581         enum btrfs_tree_block_status status;
3582         struct node_refs nrefs;
3583
3584         /*
3585          * Reuse the corrupt_block cache tree to record corrupted tree block
3586          *
3587          * Unlike the usage in extent tree check, here we do it in a per
3588          * fs/subvol tree base.
3589          */
3590         cache_tree_init(&corrupt_blocks);
3591         root->fs_info->corrupt_blocks = &corrupt_blocks;
3592
3593         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594                 rec = get_root_rec(root_cache, root->root_key.objectid);
3595                 BUG_ON(IS_ERR(rec));
3596                 if (btrfs_root_refs(root_item) > 0)
3597                         rec->found_root_item = 1;
3598         }
3599
3600         btrfs_init_path(&path);
3601         memset(&root_node, 0, sizeof(root_node));
3602         cache_tree_init(&root_node.root_cache);
3603         cache_tree_init(&root_node.inode_cache);
3604         memset(&nrefs, 0, sizeof(nrefs));
3605
3606         /* Move the orphan extent record to corresponding inode_record */
3607         list_for_each_entry_safe(orphan, tmp,
3608                                  &root->orphan_data_extents, list) {
3609                 struct inode_record *inode;
3610
3611                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612                                       1);
3613                 BUG_ON(IS_ERR(inode));
3614                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615                 list_move(&orphan->list, &inode->orphan_extents);
3616         }
3617
3618         level = btrfs_header_level(root->node);
3619         memset(wc->nodes, 0, sizeof(wc->nodes));
3620         wc->nodes[level] = &root_node;
3621         wc->active_node = level;
3622         wc->root_level = level;
3623
3624         /* We may not have checked the root block, lets do that now */
3625         if (btrfs_is_leaf(root->node))
3626                 status = btrfs_check_leaf(root, NULL, root->node);
3627         else
3628                 status = btrfs_check_node(root, NULL, root->node);
3629         if (status != BTRFS_TREE_BLOCK_CLEAN)
3630                 return -EIO;
3631
3632         if (btrfs_root_refs(root_item) > 0 ||
3633             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634                 path.nodes[level] = root->node;
3635                 extent_buffer_get(root->node);
3636                 path.slots[level] = 0;
3637         } else {
3638                 struct btrfs_key key;
3639                 struct btrfs_disk_key found_key;
3640
3641                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642                 level = root_item->drop_level;
3643                 path.lowest_level = level;
3644                 if (level > btrfs_header_level(root->node) ||
3645                     level >= BTRFS_MAX_LEVEL) {
3646                         error("ignoring invalid drop level: %u", level);
3647                         goto skip_walking;
3648                 }
3649                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3650                 if (wret < 0)
3651                         goto skip_walking;
3652                 btrfs_node_key(path.nodes[level], &found_key,
3653                                 path.slots[level]);
3654                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655                                         sizeof(found_key)));
3656         }
3657
3658         while (1) {
3659                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3660                 if (wret < 0)
3661                         ret = wret;
3662                 if (wret != 0)
3663                         break;
3664
3665                 wret = walk_up_tree(root, &path, wc, &level);
3666                 if (wret < 0)
3667                         ret = wret;
3668                 if (wret != 0)
3669                         break;
3670         }
3671 skip_walking:
3672         btrfs_release_path(&path);
3673
3674         if (!cache_tree_empty(&corrupt_blocks)) {
3675                 struct cache_extent *cache;
3676                 struct btrfs_corrupt_block *corrupt;
3677
3678                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679                        root->root_key.objectid);
3680                 cache = first_cache_extent(&corrupt_blocks);
3681                 while (cache) {
3682                         corrupt = container_of(cache,
3683                                                struct btrfs_corrupt_block,
3684                                                cache);
3685                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686                                cache->start, corrupt->level,
3687                                corrupt->key.objectid, corrupt->key.type,
3688                                corrupt->key.offset);
3689                         cache = next_cache_extent(cache);
3690                 }
3691                 if (repair) {
3692                         printf("Try to repair the btree for root %llu\n",
3693                                root->root_key.objectid);
3694                         ret = repair_btree(root, &corrupt_blocks);
3695                         if (ret < 0)
3696                                 fprintf(stderr, "Failed to repair btree: %s\n",
3697                                         strerror(-ret));
3698                         if (!ret)
3699                                 printf("Btree for root %llu is fixed\n",
3700                                        root->root_key.objectid);
3701                 }
3702         }
3703
3704         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3705         if (err < 0)
3706                 ret = err;
3707
3708         if (root_node.current) {
3709                 root_node.current->checked = 1;
3710                 maybe_free_inode_rec(&root_node.inode_cache,
3711                                 root_node.current);
3712         }
3713
3714         err = check_inode_recs(root, &root_node.inode_cache);
3715         if (!ret)
3716                 ret = err;
3717
3718         free_corrupt_blocks_tree(&corrupt_blocks);
3719         root->fs_info->corrupt_blocks = NULL;
3720         free_orphan_data_extents(&root->orphan_data_extents);
3721         return ret;
3722 }
3723
3724 static int fs_root_objectid(u64 objectid)
3725 {
3726         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728                 return 1;
3729         return is_fstree(objectid);
3730 }
3731
3732 static int check_fs_roots(struct btrfs_root *root,
3733                           struct cache_tree *root_cache)
3734 {
3735         struct btrfs_path path;
3736         struct btrfs_key key;
3737         struct walk_control wc;
3738         struct extent_buffer *leaf, *tree_node;
3739         struct btrfs_root *tmp_root;
3740         struct btrfs_root *tree_root = root->fs_info->tree_root;
3741         int ret;
3742         int err = 0;
3743
3744         if (ctx.progress_enabled) {
3745                 ctx.tp = TASK_FS_ROOTS;
3746                 task_start(ctx.info);
3747         }
3748
3749         /*
3750          * Just in case we made any changes to the extent tree that weren't
3751          * reflected into the free space cache yet.
3752          */
3753         if (repair)
3754                 reset_cached_block_groups(root->fs_info);
3755         memset(&wc, 0, sizeof(wc));
3756         cache_tree_init(&wc.shared);
3757         btrfs_init_path(&path);
3758
3759 again:
3760         key.offset = 0;
3761         key.objectid = 0;
3762         key.type = BTRFS_ROOT_ITEM_KEY;
3763         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3764         if (ret < 0) {
3765                 err = 1;
3766                 goto out;
3767         }
3768         tree_node = tree_root->node;
3769         while (1) {
3770                 if (tree_node != tree_root->node) {
3771                         free_root_recs_tree(root_cache);
3772                         btrfs_release_path(&path);
3773                         goto again;
3774                 }
3775                 leaf = path.nodes[0];
3776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777                         ret = btrfs_next_leaf(tree_root, &path);
3778                         if (ret) {
3779                                 if (ret < 0)
3780                                         err = 1;
3781                                 break;
3782                         }
3783                         leaf = path.nodes[0];
3784                 }
3785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787                     fs_root_objectid(key.objectid)) {
3788                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789                                 tmp_root = btrfs_read_fs_root_no_cache(
3790                                                 root->fs_info, &key);
3791                         } else {
3792                                 key.offset = (u64)-1;
3793                                 tmp_root = btrfs_read_fs_root(
3794                                                 root->fs_info, &key);
3795                         }
3796                         if (IS_ERR(tmp_root)) {
3797                                 err = 1;
3798                                 goto next;
3799                         }
3800                         ret = check_fs_root(tmp_root, root_cache, &wc);
3801                         if (ret == -EAGAIN) {
3802                                 free_root_recs_tree(root_cache);
3803                                 btrfs_release_path(&path);
3804                                 goto again;
3805                         }
3806                         if (ret)
3807                                 err = 1;
3808                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809                                 btrfs_free_fs_root(tmp_root);
3810                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3812                         process_root_ref(leaf, path.slots[0], &key,
3813                                          root_cache);
3814                 }
3815 next:
3816                 path.slots[0]++;
3817         }
3818 out:
3819         btrfs_release_path(&path);
3820         if (err)
3821                 free_extent_cache_tree(&wc.shared);
3822         if (!cache_tree_empty(&wc.shared))
3823                 fprintf(stderr, "warning line %d\n", __LINE__);
3824
3825         task_stop(ctx.info);
3826
3827         return err;
3828 }
3829
3830 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
3836
3837 /*
3838  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3839  * INODE_REF/INODE_EXTREF match.
3840  *
3841  * @root:       the root of the fs/file tree
3842  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3843  * @key:        the key of the DIR_ITEM/DIR_INDEX
3844  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3845  *              distinguish root_dir between normal dir/file
3846  * @name:       the name in the INODE_REF/INODE_EXTREF
3847  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3848  * @mode:       the st_mode of INODE_ITEM
3849  *
3850  * Return 0 if no error occurred.
3851  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3852  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3853  * dir/file.
3854  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3855  * not match for normal dir/file.
3856  */
3857 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3858                          struct btrfs_key *key, u64 index, char *name,
3859                          u32 namelen, u32 mode)
3860 {
3861         struct btrfs_path path;
3862         struct extent_buffer *node;
3863         struct btrfs_dir_item *di;
3864         struct btrfs_key location;
3865         char namebuf[BTRFS_NAME_LEN] = {0};
3866         u32 total;
3867         u32 cur = 0;
3868         u32 len;
3869         u32 name_len;
3870         u32 data_len;
3871         u8 filetype;
3872         int slot;
3873         int ret;
3874
3875         btrfs_init_path(&path);
3876         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3877         if (ret < 0) {
3878                 ret = DIR_ITEM_MISSING;
3879                 goto out;
3880         }
3881
3882         /* Process root dir and goto out*/
3883         if (index == 0) {
3884                 if (ret == 0) {
3885                         ret = ROOT_DIR_ERROR;
3886                         error(
3887                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3888                                 root->objectid,
3889                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3890                                         "REF" : "EXTREF",
3891                                 ref_key->objectid, ref_key->offset,
3892                                 key->type == BTRFS_DIR_ITEM_KEY ?
3893                                         "DIR_ITEM" : "DIR_INDEX");
3894                 } else {
3895                         ret = 0;
3896                 }
3897
3898                 goto out;
3899         }
3900
3901         /* Process normal file/dir */
3902         if (ret > 0) {
3903                 ret = DIR_ITEM_MISSING;
3904                 error(
3905                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3906                         root->objectid,
3907                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3908                         ref_key->objectid, ref_key->offset,
3909                         key->type == BTRFS_DIR_ITEM_KEY ?
3910                                 "DIR_ITEM" : "DIR_INDEX",
3911                         key->objectid, key->offset, namelen, name,
3912                         imode_to_type(mode));
3913                 goto out;
3914         }
3915
3916         /* Check whether inode_id/filetype/name match */
3917         node = path.nodes[0];
3918         slot = path.slots[0];
3919         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3920         total = btrfs_item_size_nr(node, slot);
3921         while (cur < total) {
3922                 ret = DIR_ITEM_MISMATCH;
3923                 name_len = btrfs_dir_name_len(node, di);
3924                 data_len = btrfs_dir_data_len(node, di);
3925
3926                 btrfs_dir_item_key_to_cpu(node, di, &location);
3927                 if (location.objectid != ref_key->objectid ||
3928                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3929                     location.offset != 0)
3930                         goto next;
3931
3932                 filetype = btrfs_dir_type(node, di);
3933                 if (imode_to_type(mode) != filetype)
3934                         goto next;
3935
3936                 if (name_len <= BTRFS_NAME_LEN) {
3937                         len = name_len;
3938                 } else {
3939                         len = BTRFS_NAME_LEN;
3940                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3941                         root->objectid,
3942                         key->type == BTRFS_DIR_ITEM_KEY ?
3943                         "DIR_ITEM" : "DIR_INDEX",
3944                         key->objectid, key->offset, name_len);
3945                 }
3946                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3947                 if (len != namelen || strncmp(namebuf, name, len))
3948                         goto next;
3949
3950                 ret = 0;
3951                 goto out;
3952 next:
3953                 len = sizeof(*di) + name_len + data_len;
3954                 di = (struct btrfs_dir_item *)((char *)di + len);
3955                 cur += len;
3956         }
3957         if (ret == DIR_ITEM_MISMATCH)
3958                 error(
3959                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3960                         root->objectid,
3961                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3962                         ref_key->objectid, ref_key->offset,
3963                         key->type == BTRFS_DIR_ITEM_KEY ?
3964                                 "DIR_ITEM" : "DIR_INDEX",
3965                         key->objectid, key->offset, namelen, name,
3966                         imode_to_type(mode));
3967 out:
3968         btrfs_release_path(&path);
3969         return ret;
3970 }
3971
3972 /*
3973  * Traverse the given INODE_REF and call find_dir_item() to find related
3974  * DIR_ITEM/DIR_INDEX.
3975  *
3976  * @root:       the root of the fs/file tree
3977  * @ref_key:    the key of the INODE_REF
3978  * @refs:       the count of INODE_REF
3979  * @mode:       the st_mode of INODE_ITEM
3980  *
3981  * Return 0 if no error occurred.
3982  */
3983 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3984                            struct extent_buffer *node, int slot, u64 *refs,
3985                            int mode)
3986 {
3987         struct btrfs_key key;
3988         struct btrfs_inode_ref *ref;
3989         char namebuf[BTRFS_NAME_LEN] = {0};
3990         u32 total;
3991         u32 cur = 0;
3992         u32 len;
3993         u32 name_len;
3994         u64 index;
3995         int ret, err = 0;
3996
3997         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
3998         total = btrfs_item_size_nr(node, slot);
3999
4000 next:
4001         /* Update inode ref count */
4002         (*refs)++;
4003
4004         index = btrfs_inode_ref_index(node, ref);
4005         name_len = btrfs_inode_ref_name_len(node, ref);
4006         if (name_len <= BTRFS_NAME_LEN) {
4007                 len = name_len;
4008         } else {
4009                 len = BTRFS_NAME_LEN;
4010                 warning("root %llu INODE_REF[%llu %llu] name too long",
4011                         root->objectid, ref_key->objectid, ref_key->offset);
4012         }
4013
4014         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4015
4016         /* Check root dir ref name */
4017         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4018                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4019                       root->objectid, ref_key->objectid, ref_key->offset,
4020                       namebuf);
4021                 err |= ROOT_DIR_ERROR;
4022         }
4023
4024         /* Find related DIR_INDEX */
4025         key.objectid = ref_key->offset;
4026         key.type = BTRFS_DIR_INDEX_KEY;
4027         key.offset = index;
4028         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4029         err |= ret;
4030
4031         /* Find related dir_item */
4032         key.objectid = ref_key->offset;
4033         key.type = BTRFS_DIR_ITEM_KEY;
4034         key.offset = btrfs_name_hash(namebuf, len);
4035         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4036         err |= ret;
4037
4038         len = sizeof(*ref) + name_len;
4039         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4040         cur += len;
4041         if (cur < total)
4042                 goto next;
4043
4044         return err;
4045 }
4046
4047 /*
4048  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4049  * DIR_ITEM/DIR_INDEX.
4050  *
4051  * @root:       the root of the fs/file tree
4052  * @ref_key:    the key of the INODE_EXTREF
4053  * @refs:       the count of INODE_EXTREF
4054  * @mode:       the st_mode of INODE_ITEM
4055  *
4056  * Return 0 if no error occurred.
4057  */
4058 static int check_inode_extref(struct btrfs_root *root,
4059                               struct btrfs_key *ref_key,
4060                               struct extent_buffer *node, int slot, u64 *refs,
4061                               int mode)
4062 {
4063         struct btrfs_key key;
4064         struct btrfs_inode_extref *extref;
4065         char namebuf[BTRFS_NAME_LEN] = {0};
4066         u32 total;
4067         u32 cur = 0;
4068         u32 len;
4069         u32 name_len;
4070         u64 index;
4071         u64 parent;
4072         int ret;
4073         int err = 0;
4074
4075         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4076         total = btrfs_item_size_nr(node, slot);
4077
4078 next:
4079         /* update inode ref count */
4080         (*refs)++;
4081         name_len = btrfs_inode_extref_name_len(node, extref);
4082         index = btrfs_inode_extref_index(node, extref);
4083         parent = btrfs_inode_extref_parent(node, extref);
4084         if (name_len <= BTRFS_NAME_LEN) {
4085                 len = name_len;
4086         } else {
4087                 len = BTRFS_NAME_LEN;
4088                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4089                         root->objectid, ref_key->objectid, ref_key->offset);
4090         }
4091         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4092
4093         /* Check root dir ref name */
4094         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4095                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4096                       root->objectid, ref_key->objectid, ref_key->offset,
4097                       namebuf);
4098                 err |= ROOT_DIR_ERROR;
4099         }
4100
4101         /* find related dir_index */
4102         key.objectid = parent;
4103         key.type = BTRFS_DIR_INDEX_KEY;
4104         key.offset = index;
4105         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4106         err |= ret;
4107
4108         /* find related dir_item */
4109         key.objectid = parent;
4110         key.type = BTRFS_DIR_ITEM_KEY;
4111         key.offset = btrfs_name_hash(namebuf, len);
4112         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4113         err |= ret;
4114
4115         len = sizeof(*extref) + name_len;
4116         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4117         cur += len;
4118
4119         if (cur < total)
4120                 goto next;
4121
4122         return err;
4123 }
4124
4125 /*
4126  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4127  * DIR_ITEM/DIR_INDEX match.
4128  *
4129  * @root:       the root of the fs/file tree
4130  * @key:        the key of the INODE_REF/INODE_EXTREF
4131  * @name:       the name in the INODE_REF/INODE_EXTREF
4132  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4133  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4134  * to (u64)-1
4135  * @ext_ref:    the EXTENDED_IREF feature
4136  *
4137  * Return 0 if no error occurred.
4138  * Return >0 for error bitmap
4139  */
4140 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4141                           char *name, int namelen, u64 index,
4142                           unsigned int ext_ref)
4143 {
4144         struct btrfs_path path;
4145         struct btrfs_inode_ref *ref;
4146         struct btrfs_inode_extref *extref;
4147         struct extent_buffer *node;
4148         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4149         u32 total;
4150         u32 cur = 0;
4151         u32 len;
4152         u32 ref_namelen;
4153         u64 ref_index;
4154         u64 parent;
4155         u64 dir_id;
4156         int slot;
4157         int ret;
4158
4159         btrfs_init_path(&path);
4160         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4161         if (ret) {
4162                 ret = INODE_REF_MISSING;
4163                 goto extref;
4164         }
4165
4166         node = path.nodes[0];
4167         slot = path.slots[0];
4168
4169         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4170         total = btrfs_item_size_nr(node, slot);
4171
4172         /* Iterate all entry of INODE_REF */
4173         while (cur < total) {
4174                 ret = INODE_REF_MISSING;
4175
4176                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4177                 ref_index = btrfs_inode_ref_index(node, ref);
4178                 if (index != (u64)-1 && index != ref_index)
4179                         goto next_ref;
4180
4181                 if (ref_namelen <= BTRFS_NAME_LEN) {
4182                         len = ref_namelen;
4183                 } else {
4184                         len = BTRFS_NAME_LEN;
4185                         warning("root %llu INODE %s[%llu %llu] name too long",
4186                                 root->objectid,
4187                                 key->type == BTRFS_INODE_REF_KEY ?
4188                                         "REF" : "EXTREF",
4189                                 key->objectid, key->offset);
4190                 }
4191                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4192                                    len);
4193
4194                 if (len != namelen || strncmp(ref_namebuf, name, len))
4195                         goto next_ref;
4196
4197                 ret = 0;
4198                 goto out;
4199 next_ref:
4200                 len = sizeof(*ref) + ref_namelen;
4201                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4202                 cur += len;
4203         }
4204
4205 extref:
4206         /* Skip if not support EXTENDED_IREF feature */
4207         if (!ext_ref)
4208                 goto out;
4209
4210         btrfs_release_path(&path);
4211         btrfs_init_path(&path);
4212
4213         dir_id = key->offset;
4214         key->type = BTRFS_INODE_EXTREF_KEY;
4215         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4216
4217         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4218         if (ret) {
4219                 ret = INODE_REF_MISSING;
4220                 goto out;
4221         }
4222
4223         node = path.nodes[0];
4224         slot = path.slots[0];
4225
4226         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4227         cur = 0;
4228         total = btrfs_item_size_nr(node, slot);
4229
4230         /* Iterate all entry of INODE_EXTREF */
4231         while (cur < total) {
4232                 ret = INODE_REF_MISSING;
4233
4234                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4235                 ref_index = btrfs_inode_extref_index(node, extref);
4236                 parent = btrfs_inode_extref_parent(node, extref);
4237                 if (index != (u64)-1 && index != ref_index)
4238                         goto next_extref;
4239
4240                 if (parent != dir_id)
4241                         goto next_extref;
4242
4243                 if (ref_namelen <= BTRFS_NAME_LEN) {
4244                         len = ref_namelen;
4245                 } else {
4246                         len = BTRFS_NAME_LEN;
4247                         warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4248                                 root->objectid,
4249                                 key->type == BTRFS_INODE_REF_KEY ?
4250                                         "REF" : "EXTREF",
4251                                 key->objectid, key->offset);
4252                 }
4253                 read_extent_buffer(node, ref_namebuf,
4254                                    (unsigned long)(extref + 1), len);
4255
4256                 if (len != namelen || strncmp(ref_namebuf, name, len))
4257                         goto next_extref;
4258
4259                 ret = 0;
4260                 goto out;
4261
4262 next_extref:
4263                 len = sizeof(*extref) + ref_namelen;
4264                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4265                 cur += len;
4266
4267         }
4268 out:
4269         btrfs_release_path(&path);
4270         return ret;
4271 }
4272
4273 /*
4274  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4275  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4276  *
4277  * @root:       the root of the fs/file tree
4278  * @key:        the key of the INODE_REF/INODE_EXTREF
4279  * @size:       the st_size of the INODE_ITEM
4280  * @ext_ref:    the EXTENDED_IREF feature
4281  *
4282  * Return 0 if no error occurred.
4283  */
4284 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4285                           struct extent_buffer *node, int slot, u64 *size,
4286                           unsigned int ext_ref)
4287 {
4288         struct btrfs_dir_item *di;
4289         struct btrfs_inode_item *ii;
4290         struct btrfs_path path;
4291         struct btrfs_key location;
4292         char namebuf[BTRFS_NAME_LEN] = {0};
4293         u32 total;
4294         u32 cur = 0;
4295         u32 len;
4296         u32 name_len;
4297         u32 data_len;
4298         u8 filetype;
4299         u32 mode;
4300         u64 index;
4301         int ret;
4302         int err = 0;
4303
4304         /*
4305          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4306          * ignore index check.
4307          */
4308         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4309
4310         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4311         total = btrfs_item_size_nr(node, slot);
4312
4313         while (cur < total) {
4314                 data_len = btrfs_dir_data_len(node, di);
4315                 if (data_len)
4316                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4317                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4318                               "DIR_ITEM" : "DIR_INDEX",
4319                               key->objectid, key->offset, data_len);
4320
4321                 name_len = btrfs_dir_name_len(node, di);
4322                 if (name_len <= BTRFS_NAME_LEN) {
4323                         len = name_len;
4324                 } else {
4325                         len = BTRFS_NAME_LEN;
4326                         warning("root %llu %s[%llu %llu] name too long",
4327                                 root->objectid,
4328                                 key->type == BTRFS_DIR_ITEM_KEY ?
4329                                 "DIR_ITEM" : "DIR_INDEX",
4330                                 key->objectid, key->offset);
4331                 }
4332                 (*size) += name_len;
4333
4334                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4335                 filetype = btrfs_dir_type(node, di);
4336
4337                 btrfs_init_path(&path);
4338                 btrfs_dir_item_key_to_cpu(node, di, &location);
4339
4340                 /* Ignore related ROOT_ITEM check */
4341                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4342                         goto next;
4343
4344                 /* Check relative INODE_ITEM(existence/filetype) */
4345                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4346                 if (ret) {
4347                         err |= INODE_ITEM_MISSING;
4348                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4349                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4350                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4351                               key->offset, location.objectid, name_len,
4352                               namebuf, filetype);
4353                         goto next;
4354                 }
4355
4356                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4357                                     struct btrfs_inode_item);
4358                 mode = btrfs_inode_mode(path.nodes[0], ii);
4359
4360                 if (imode_to_type(mode) != filetype) {
4361                         err |= INODE_ITEM_MISMATCH;
4362                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4363                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4364                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4365                               key->offset, name_len, namebuf, filetype);
4366                 }
4367
4368                 /* Check relative INODE_REF/INODE_EXTREF */
4369                 location.type = BTRFS_INODE_REF_KEY;
4370                 location.offset = key->objectid;
4371                 ret = find_inode_ref(root, &location, namebuf, len,
4372                                        index, ext_ref);
4373                 err |= ret;
4374                 if (ret & INODE_REF_MISSING)
4375                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4376                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4377                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4378                               key->offset, name_len, namebuf, filetype);
4379
4380 next:
4381                 btrfs_release_path(&path);
4382                 len = sizeof(*di) + name_len + data_len;
4383                 di = (struct btrfs_dir_item *)((char *)di + len);
4384                 cur += len;
4385
4386                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4387                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4388                               root->objectid, key->objectid, key->offset);
4389                         break;
4390                 }
4391         }
4392
4393         return err;
4394 }
4395
4396 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4397 {
4398         struct list_head *cur = rec->backrefs.next;
4399         struct extent_backref *back;
4400         struct tree_backref *tback;
4401         struct data_backref *dback;
4402         u64 found = 0;
4403         int err = 0;
4404
4405         while(cur != &rec->backrefs) {
4406                 back = to_extent_backref(cur);
4407                 cur = cur->next;
4408                 if (!back->found_extent_tree) {
4409                         err = 1;
4410                         if (!print_errs)
4411                                 goto out;
4412                         if (back->is_data) {
4413                                 dback = to_data_backref(back);
4414                                 fprintf(stderr, "Backref %llu %s %llu"
4415                                         " owner %llu offset %llu num_refs %lu"
4416                                         " not found in extent tree\n",
4417                                         (unsigned long long)rec->start,
4418                                         back->full_backref ?
4419                                         "parent" : "root",
4420                                         back->full_backref ?
4421                                         (unsigned long long)dback->parent:
4422                                         (unsigned long long)dback->root,
4423                                         (unsigned long long)dback->owner,
4424                                         (unsigned long long)dback->offset,
4425                                         (unsigned long)dback->num_refs);
4426                         } else {
4427                                 tback = to_tree_backref(back);
4428                                 fprintf(stderr, "Backref %llu parent %llu"
4429                                         " root %llu not found in extent tree\n",
4430                                         (unsigned long long)rec->start,
4431                                         (unsigned long long)tback->parent,
4432                                         (unsigned long long)tback->root);
4433                         }
4434                 }
4435                 if (!back->is_data && !back->found_ref) {
4436                         err = 1;
4437                         if (!print_errs)
4438                                 goto out;
4439                         tback = to_tree_backref(back);
4440                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4441                                 (unsigned long long)rec->start,
4442                                 back->full_backref ? "parent" : "root",
4443                                 back->full_backref ?
4444                                 (unsigned long long)tback->parent :
4445                                 (unsigned long long)tback->root, back);
4446                 }
4447                 if (back->is_data) {
4448                         dback = to_data_backref(back);
4449                         if (dback->found_ref != dback->num_refs) {
4450                                 err = 1;
4451                                 if (!print_errs)
4452                                         goto out;
4453                                 fprintf(stderr, "Incorrect local backref count"
4454                                         " on %llu %s %llu owner %llu"
4455                                         " offset %llu found %u wanted %u back %p\n",
4456                                         (unsigned long long)rec->start,
4457                                         back->full_backref ?
4458                                         "parent" : "root",
4459                                         back->full_backref ?
4460                                         (unsigned long long)dback->parent:
4461                                         (unsigned long long)dback->root,
4462                                         (unsigned long long)dback->owner,
4463                                         (unsigned long long)dback->offset,
4464                                         dback->found_ref, dback->num_refs, back);
4465                         }
4466                         if (dback->disk_bytenr != rec->start) {
4467                                 err = 1;
4468                                 if (!print_errs)
4469                                         goto out;
4470                                 fprintf(stderr, "Backref disk bytenr does not"
4471                                         " match extent record, bytenr=%llu, "
4472                                         "ref bytenr=%llu\n",
4473                                         (unsigned long long)rec->start,
4474                                         (unsigned long long)dback->disk_bytenr);
4475                         }
4476
4477                         if (dback->bytes != rec->nr) {
4478                                 err = 1;
4479                                 if (!print_errs)
4480                                         goto out;
4481                                 fprintf(stderr, "Backref bytes do not match "
4482                                         "extent backref, bytenr=%llu, ref "
4483                                         "bytes=%llu, backref bytes=%llu\n",
4484                                         (unsigned long long)rec->start,
4485                                         (unsigned long long)rec->nr,
4486                                         (unsigned long long)dback->bytes);
4487                         }
4488                 }
4489                 if (!back->is_data) {
4490                         found += 1;
4491                 } else {
4492                         dback = to_data_backref(back);
4493                         found += dback->found_ref;
4494                 }
4495         }
4496         if (found != rec->refs) {
4497                 err = 1;
4498                 if (!print_errs)
4499                         goto out;
4500                 fprintf(stderr, "Incorrect global backref count "
4501                         "on %llu found %llu wanted %llu\n",
4502                         (unsigned long long)rec->start,
4503                         (unsigned long long)found,
4504                         (unsigned long long)rec->refs);
4505         }
4506 out:
4507         return err;
4508 }
4509
4510 static int free_all_extent_backrefs(struct extent_record *rec)
4511 {
4512         struct extent_backref *back;
4513         struct list_head *cur;
4514         while (!list_empty(&rec->backrefs)) {
4515                 cur = rec->backrefs.next;
4516                 back = to_extent_backref(cur);
4517                 list_del(cur);
4518                 free(back);
4519         }
4520         return 0;
4521 }
4522
4523 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4524                                      struct cache_tree *extent_cache)
4525 {
4526         struct cache_extent *cache;
4527         struct extent_record *rec;
4528
4529         while (1) {
4530                 cache = first_cache_extent(extent_cache);
4531                 if (!cache)
4532                         break;
4533                 rec = container_of(cache, struct extent_record, cache);
4534                 remove_cache_extent(extent_cache, cache);
4535                 free_all_extent_backrefs(rec);
4536                 free(rec);
4537         }
4538 }
4539
4540 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4541                                  struct extent_record *rec)
4542 {
4543         if (rec->content_checked && rec->owner_ref_checked &&
4544             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4545             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4546             !rec->bad_full_backref && !rec->crossing_stripes &&
4547             !rec->wrong_chunk_type) {
4548                 remove_cache_extent(extent_cache, &rec->cache);
4549                 free_all_extent_backrefs(rec);
4550                 list_del_init(&rec->list);
4551                 free(rec);
4552         }
4553         return 0;
4554 }
4555
4556 static int check_owner_ref(struct btrfs_root *root,
4557                             struct extent_record *rec,
4558                             struct extent_buffer *buf)
4559 {
4560         struct extent_backref *node;
4561         struct tree_backref *back;
4562         struct btrfs_root *ref_root;
4563         struct btrfs_key key;
4564         struct btrfs_path path;
4565         struct extent_buffer *parent;
4566         int level;
4567         int found = 0;
4568         int ret;
4569
4570         list_for_each_entry(node, &rec->backrefs, list) {
4571                 if (node->is_data)
4572                         continue;
4573                 if (!node->found_ref)
4574                         continue;
4575                 if (node->full_backref)
4576                         continue;
4577                 back = to_tree_backref(node);
4578                 if (btrfs_header_owner(buf) == back->root)
4579                         return 0;
4580         }
4581         BUG_ON(rec->is_root);
4582
4583         /* try to find the block by search corresponding fs tree */
4584         key.objectid = btrfs_header_owner(buf);
4585         key.type = BTRFS_ROOT_ITEM_KEY;
4586         key.offset = (u64)-1;
4587
4588         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4589         if (IS_ERR(ref_root))
4590                 return 1;
4591
4592         level = btrfs_header_level(buf);
4593         if (level == 0)
4594                 btrfs_item_key_to_cpu(buf, &key, 0);
4595         else
4596                 btrfs_node_key_to_cpu(buf, &key, 0);
4597
4598         btrfs_init_path(&path);
4599         path.lowest_level = level + 1;
4600         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4601         if (ret < 0)
4602                 return 0;
4603
4604         parent = path.nodes[level + 1];
4605         if (parent && buf->start == btrfs_node_blockptr(parent,
4606                                                         path.slots[level + 1]))
4607                 found = 1;
4608
4609         btrfs_release_path(&path);
4610         return found ? 0 : 1;
4611 }
4612
4613 static int is_extent_tree_record(struct extent_record *rec)
4614 {
4615         struct list_head *cur = rec->backrefs.next;
4616         struct extent_backref *node;
4617         struct tree_backref *back;
4618         int is_extent = 0;
4619
4620         while(cur != &rec->backrefs) {
4621                 node = to_extent_backref(cur);
4622                 cur = cur->next;
4623                 if (node->is_data)
4624                         return 0;
4625                 back = to_tree_backref(node);
4626                 if (node->full_backref)
4627                         return 0;
4628                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4629                         is_extent = 1;
4630         }
4631         return is_extent;
4632 }
4633
4634
4635 static int record_bad_block_io(struct btrfs_fs_info *info,
4636                                struct cache_tree *extent_cache,
4637                                u64 start, u64 len)
4638 {
4639         struct extent_record *rec;
4640         struct cache_extent *cache;
4641         struct btrfs_key key;
4642
4643         cache = lookup_cache_extent(extent_cache, start, len);
4644         if (!cache)
4645                 return 0;
4646
4647         rec = container_of(cache, struct extent_record, cache);
4648         if (!is_extent_tree_record(rec))
4649                 return 0;
4650
4651         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4652         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4653 }
4654
4655 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4656                        struct extent_buffer *buf, int slot)
4657 {
4658         if (btrfs_header_level(buf)) {
4659                 struct btrfs_key_ptr ptr1, ptr2;
4660
4661                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4662                                    sizeof(struct btrfs_key_ptr));
4663                 read_extent_buffer(buf, &ptr2,
4664                                    btrfs_node_key_ptr_offset(slot + 1),
4665                                    sizeof(struct btrfs_key_ptr));
4666                 write_extent_buffer(buf, &ptr1,
4667                                     btrfs_node_key_ptr_offset(slot + 1),
4668                                     sizeof(struct btrfs_key_ptr));
4669                 write_extent_buffer(buf, &ptr2,
4670                                     btrfs_node_key_ptr_offset(slot),
4671                                     sizeof(struct btrfs_key_ptr));
4672                 if (slot == 0) {
4673                         struct btrfs_disk_key key;
4674                         btrfs_node_key(buf, &key, 0);
4675                         btrfs_fixup_low_keys(root, path, &key,
4676                                              btrfs_header_level(buf) + 1);
4677                 }
4678         } else {
4679                 struct btrfs_item *item1, *item2;
4680                 struct btrfs_key k1, k2;
4681                 char *item1_data, *item2_data;
4682                 u32 item1_offset, item2_offset, item1_size, item2_size;
4683
4684                 item1 = btrfs_item_nr(slot);
4685                 item2 = btrfs_item_nr(slot + 1);
4686                 btrfs_item_key_to_cpu(buf, &k1, slot);
4687                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4688                 item1_offset = btrfs_item_offset(buf, item1);
4689                 item2_offset = btrfs_item_offset(buf, item2);
4690                 item1_size = btrfs_item_size(buf, item1);
4691                 item2_size = btrfs_item_size(buf, item2);
4692
4693                 item1_data = malloc(item1_size);
4694                 if (!item1_data)
4695                         return -ENOMEM;
4696                 item2_data = malloc(item2_size);
4697                 if (!item2_data) {
4698                         free(item1_data);
4699                         return -ENOMEM;
4700                 }
4701
4702                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4703                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4704
4705                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4706                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4707                 free(item1_data);
4708                 free(item2_data);
4709
4710                 btrfs_set_item_offset(buf, item1, item2_offset);
4711                 btrfs_set_item_offset(buf, item2, item1_offset);
4712                 btrfs_set_item_size(buf, item1, item2_size);
4713                 btrfs_set_item_size(buf, item2, item1_size);
4714
4715                 path->slots[0] = slot;
4716                 btrfs_set_item_key_unsafe(root, path, &k2);
4717                 path->slots[0] = slot + 1;
4718                 btrfs_set_item_key_unsafe(root, path, &k1);
4719         }
4720         return 0;
4721 }
4722
4723 static int fix_key_order(struct btrfs_trans_handle *trans,
4724                          struct btrfs_root *root,
4725                          struct btrfs_path *path)
4726 {
4727         struct extent_buffer *buf;
4728         struct btrfs_key k1, k2;
4729         int i;
4730         int level = path->lowest_level;
4731         int ret = -EIO;
4732
4733         buf = path->nodes[level];
4734         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4735                 if (level) {
4736                         btrfs_node_key_to_cpu(buf, &k1, i);
4737                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4738                 } else {
4739                         btrfs_item_key_to_cpu(buf, &k1, i);
4740                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4741                 }
4742                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4743                         continue;
4744                 ret = swap_values(root, path, buf, i);
4745                 if (ret)
4746                         break;
4747                 btrfs_mark_buffer_dirty(buf);
4748                 i = 0;
4749         }
4750         return ret;
4751 }
4752
4753 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4754                              struct btrfs_root *root,
4755                              struct btrfs_path *path,
4756                              struct extent_buffer *buf, int slot)
4757 {
4758         struct btrfs_key key;
4759         int nritems = btrfs_header_nritems(buf);
4760
4761         btrfs_item_key_to_cpu(buf, &key, slot);
4762
4763         /* These are all the keys we can deal with missing. */
4764         if (key.type != BTRFS_DIR_INDEX_KEY &&
4765             key.type != BTRFS_EXTENT_ITEM_KEY &&
4766             key.type != BTRFS_METADATA_ITEM_KEY &&
4767             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4768             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4769                 return -1;
4770
4771         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4772                (unsigned long long)key.objectid, key.type,
4773                (unsigned long long)key.offset, slot, buf->start);
4774         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4775                               btrfs_item_nr_offset(slot + 1),
4776                               sizeof(struct btrfs_item) *
4777                               (nritems - slot - 1));
4778         btrfs_set_header_nritems(buf, nritems - 1);
4779         if (slot == 0) {
4780                 struct btrfs_disk_key disk_key;
4781
4782                 btrfs_item_key(buf, &disk_key, 0);
4783                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4784         }
4785         btrfs_mark_buffer_dirty(buf);
4786         return 0;
4787 }
4788
4789 static int fix_item_offset(struct btrfs_trans_handle *trans,
4790                            struct btrfs_root *root,
4791                            struct btrfs_path *path)
4792 {
4793         struct extent_buffer *buf;
4794         int i;
4795         int ret = 0;
4796
4797         /* We should only get this for leaves */
4798         BUG_ON(path->lowest_level);
4799         buf = path->nodes[0];
4800 again:
4801         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4802                 unsigned int shift = 0, offset;
4803
4804                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4805                     BTRFS_LEAF_DATA_SIZE(root)) {
4806                         if (btrfs_item_end_nr(buf, i) >
4807                             BTRFS_LEAF_DATA_SIZE(root)) {
4808                                 ret = delete_bogus_item(trans, root, path,
4809                                                         buf, i);
4810                                 if (!ret)
4811                                         goto again;
4812                                 fprintf(stderr, "item is off the end of the "
4813                                         "leaf, can't fix\n");
4814                                 ret = -EIO;
4815                                 break;
4816                         }
4817                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4818                                 btrfs_item_end_nr(buf, i);
4819                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4820                            btrfs_item_offset_nr(buf, i - 1)) {
4821                         if (btrfs_item_end_nr(buf, i) >
4822                             btrfs_item_offset_nr(buf, i - 1)) {
4823                                 ret = delete_bogus_item(trans, root, path,
4824                                                         buf, i);
4825                                 if (!ret)
4826                                         goto again;
4827                                 fprintf(stderr, "items overlap, can't fix\n");
4828                                 ret = -EIO;
4829                                 break;
4830                         }
4831                         shift = btrfs_item_offset_nr(buf, i - 1) -
4832                                 btrfs_item_end_nr(buf, i);
4833                 }
4834                 if (!shift)
4835                         continue;
4836
4837                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4838                        i, shift, (unsigned long long)buf->start);
4839                 offset = btrfs_item_offset_nr(buf, i);
4840                 memmove_extent_buffer(buf,
4841                                       btrfs_leaf_data(buf) + offset + shift,
4842                                       btrfs_leaf_data(buf) + offset,
4843                                       btrfs_item_size_nr(buf, i));
4844                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4845                                       offset + shift);
4846                 btrfs_mark_buffer_dirty(buf);
4847         }
4848
4849         /*
4850          * We may have moved things, in which case we want to exit so we don't
4851          * write those changes out.  Once we have proper abort functionality in
4852          * progs this can be changed to something nicer.
4853          */
4854         BUG_ON(ret);
4855         return ret;
4856 }
4857
4858 /*
4859  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4860  * then just return -EIO.
4861  */
4862 static int try_to_fix_bad_block(struct btrfs_root *root,
4863                                 struct extent_buffer *buf,
4864                                 enum btrfs_tree_block_status status)
4865 {
4866         struct btrfs_trans_handle *trans;
4867         struct ulist *roots;
4868         struct ulist_node *node;
4869         struct btrfs_root *search_root;
4870         struct btrfs_path path;
4871         struct ulist_iterator iter;
4872         struct btrfs_key root_key, key;
4873         int ret;
4874
4875         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4876             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4877                 return -EIO;
4878
4879         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4880         if (ret)
4881                 return -EIO;
4882
4883         btrfs_init_path(&path);
4884         ULIST_ITER_INIT(&iter);
4885         while ((node = ulist_next(roots, &iter))) {
4886                 root_key.objectid = node->val;
4887                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4888                 root_key.offset = (u64)-1;
4889
4890                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4891                 if (IS_ERR(root)) {
4892                         ret = -EIO;
4893                         break;
4894                 }
4895
4896
4897                 trans = btrfs_start_transaction(search_root, 0);
4898                 if (IS_ERR(trans)) {
4899                         ret = PTR_ERR(trans);
4900                         break;
4901                 }
4902
4903                 path.lowest_level = btrfs_header_level(buf);
4904                 path.skip_check_block = 1;
4905                 if (path.lowest_level)
4906                         btrfs_node_key_to_cpu(buf, &key, 0);
4907                 else
4908                         btrfs_item_key_to_cpu(buf, &key, 0);
4909                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4910                 if (ret) {
4911                         ret = -EIO;
4912                         btrfs_commit_transaction(trans, search_root);
4913                         break;
4914                 }
4915                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4916                         ret = fix_key_order(trans, search_root, &path);
4917                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4918                         ret = fix_item_offset(trans, search_root, &path);
4919                 if (ret) {
4920                         btrfs_commit_transaction(trans, search_root);
4921                         break;
4922                 }
4923                 btrfs_release_path(&path);
4924                 btrfs_commit_transaction(trans, search_root);
4925         }
4926         ulist_free(roots);
4927         btrfs_release_path(&path);
4928         return ret;
4929 }
4930
4931 static int check_block(struct btrfs_root *root,
4932                        struct cache_tree *extent_cache,
4933                        struct extent_buffer *buf, u64 flags)
4934 {
4935         struct extent_record *rec;
4936         struct cache_extent *cache;
4937         struct btrfs_key key;
4938         enum btrfs_tree_block_status status;
4939         int ret = 0;
4940         int level;
4941
4942         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4943         if (!cache)
4944                 return 1;
4945         rec = container_of(cache, struct extent_record, cache);
4946         rec->generation = btrfs_header_generation(buf);
4947
4948         level = btrfs_header_level(buf);
4949         if (btrfs_header_nritems(buf) > 0) {
4950
4951                 if (level == 0)
4952                         btrfs_item_key_to_cpu(buf, &key, 0);
4953                 else
4954                         btrfs_node_key_to_cpu(buf, &key, 0);
4955
4956                 rec->info_objectid = key.objectid;
4957         }
4958         rec->info_level = level;
4959
4960         if (btrfs_is_leaf(buf))
4961                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4962         else
4963                 status = btrfs_check_node(root, &rec->parent_key, buf);
4964
4965         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4966                 if (repair)
4967                         status = try_to_fix_bad_block(root, buf, status);
4968                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4969                         ret = -EIO;
4970                         fprintf(stderr, "bad block %llu\n",
4971                                 (unsigned long long)buf->start);
4972                 } else {
4973                         /*
4974                          * Signal to callers we need to start the scan over
4975                          * again since we'll have cowed blocks.
4976                          */
4977                         ret = -EAGAIN;
4978                 }
4979         } else {
4980                 rec->content_checked = 1;
4981                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4982                         rec->owner_ref_checked = 1;
4983                 else {
4984                         ret = check_owner_ref(root, rec, buf);
4985                         if (!ret)
4986                                 rec->owner_ref_checked = 1;
4987                 }
4988         }
4989         if (!ret)
4990                 maybe_free_extent_rec(extent_cache, rec);
4991         return ret;
4992 }
4993
4994 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4995                                                 u64 parent, u64 root)
4996 {
4997         struct list_head *cur = rec->backrefs.next;
4998         struct extent_backref *node;
4999         struct tree_backref *back;
5000
5001         while(cur != &rec->backrefs) {
5002                 node = to_extent_backref(cur);
5003                 cur = cur->next;
5004                 if (node->is_data)
5005                         continue;
5006                 back = to_tree_backref(node);
5007                 if (parent > 0) {
5008                         if (!node->full_backref)
5009                                 continue;
5010                         if (parent == back->parent)
5011                                 return back;
5012                 } else {
5013                         if (node->full_backref)
5014                                 continue;
5015                         if (back->root == root)
5016                                 return back;
5017                 }
5018         }
5019         return NULL;
5020 }
5021
5022 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5023                                                 u64 parent, u64 root)
5024 {
5025         struct tree_backref *ref = malloc(sizeof(*ref));
5026
5027         if (!ref)
5028                 return NULL;
5029         memset(&ref->node, 0, sizeof(ref->node));
5030         if (parent > 0) {
5031                 ref->parent = parent;
5032                 ref->node.full_backref = 1;
5033         } else {
5034                 ref->root = root;
5035                 ref->node.full_backref = 0;
5036         }
5037         list_add_tail(&ref->node.list, &rec->backrefs);
5038
5039         return ref;
5040 }
5041
5042 static struct data_backref *find_data_backref(struct extent_record *rec,
5043                                                 u64 parent, u64 root,
5044                                                 u64 owner, u64 offset,
5045                                                 int found_ref,
5046                                                 u64 disk_bytenr, u64 bytes)
5047 {
5048         struct list_head *cur = rec->backrefs.next;
5049         struct extent_backref *node;
5050         struct data_backref *back;
5051
5052         while(cur != &rec->backrefs) {
5053                 node = to_extent_backref(cur);
5054                 cur = cur->next;
5055                 if (!node->is_data)
5056                         continue;
5057                 back = to_data_backref(node);
5058                 if (parent > 0) {
5059                         if (!node->full_backref)
5060                                 continue;
5061                         if (parent == back->parent)
5062                                 return back;
5063                 } else {
5064                         if (node->full_backref)
5065                                 continue;
5066                         if (back->root == root && back->owner == owner &&
5067                             back->offset == offset) {
5068                                 if (found_ref && node->found_ref &&
5069                                     (back->bytes != bytes ||
5070                                     back->disk_bytenr != disk_bytenr))
5071                                         continue;
5072                                 return back;
5073                         }
5074                 }
5075         }
5076         return NULL;
5077 }
5078
5079 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5080                                                 u64 parent, u64 root,
5081                                                 u64 owner, u64 offset,
5082                                                 u64 max_size)
5083 {
5084         struct data_backref *ref = malloc(sizeof(*ref));
5085
5086         if (!ref)
5087                 return NULL;
5088         memset(&ref->node, 0, sizeof(ref->node));
5089         ref->node.is_data = 1;
5090
5091         if (parent > 0) {
5092                 ref->parent = parent;
5093                 ref->owner = 0;
5094                 ref->offset = 0;
5095                 ref->node.full_backref = 1;
5096         } else {
5097                 ref->root = root;
5098                 ref->owner = owner;
5099                 ref->offset = offset;
5100                 ref->node.full_backref = 0;
5101         }
5102         ref->bytes = max_size;
5103         ref->found_ref = 0;
5104         ref->num_refs = 0;
5105         list_add_tail(&ref->node.list, &rec->backrefs);
5106         if (max_size > rec->max_size)
5107                 rec->max_size = max_size;
5108         return ref;
5109 }
5110
5111 /* Check if the type of extent matches with its chunk */
5112 static void check_extent_type(struct extent_record *rec)
5113 {
5114         struct btrfs_block_group_cache *bg_cache;
5115
5116         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5117         if (!bg_cache)
5118                 return;
5119
5120         /* data extent, check chunk directly*/
5121         if (!rec->metadata) {
5122                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5123                         rec->wrong_chunk_type = 1;
5124                 return;
5125         }
5126
5127         /* metadata extent, check the obvious case first */
5128         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5129                                  BTRFS_BLOCK_GROUP_METADATA))) {
5130                 rec->wrong_chunk_type = 1;
5131                 return;
5132         }
5133
5134         /*
5135          * Check SYSTEM extent, as it's also marked as metadata, we can only
5136          * make sure it's a SYSTEM extent by its backref
5137          */
5138         if (!list_empty(&rec->backrefs)) {
5139                 struct extent_backref *node;
5140                 struct tree_backref *tback;
5141                 u64 bg_type;
5142
5143                 node = to_extent_backref(rec->backrefs.next);
5144                 if (node->is_data) {
5145                         /* tree block shouldn't have data backref */
5146                         rec->wrong_chunk_type = 1;
5147                         return;
5148                 }
5149                 tback = container_of(node, struct tree_backref, node);
5150
5151                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5152                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5153                 else
5154                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5155                 if (!(bg_cache->flags & bg_type))
5156                         rec->wrong_chunk_type = 1;
5157         }
5158 }
5159
5160 /*
5161  * Allocate a new extent record, fill default values from @tmpl and insert int
5162  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5163  * the cache, otherwise it fails.
5164  */
5165 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5166                 struct extent_record *tmpl)
5167 {
5168         struct extent_record *rec;
5169         int ret = 0;
5170
5171         rec = malloc(sizeof(*rec));
5172         if (!rec)
5173                 return -ENOMEM;
5174         rec->start = tmpl->start;
5175         rec->max_size = tmpl->max_size;
5176         rec->nr = max(tmpl->nr, tmpl->max_size);
5177         rec->found_rec = tmpl->found_rec;
5178         rec->content_checked = tmpl->content_checked;
5179         rec->owner_ref_checked = tmpl->owner_ref_checked;
5180         rec->num_duplicates = 0;
5181         rec->metadata = tmpl->metadata;
5182         rec->flag_block_full_backref = FLAG_UNSET;
5183         rec->bad_full_backref = 0;
5184         rec->crossing_stripes = 0;
5185         rec->wrong_chunk_type = 0;
5186         rec->is_root = tmpl->is_root;
5187         rec->refs = tmpl->refs;
5188         rec->extent_item_refs = tmpl->extent_item_refs;
5189         rec->parent_generation = tmpl->parent_generation;
5190         INIT_LIST_HEAD(&rec->backrefs);
5191         INIT_LIST_HEAD(&rec->dups);
5192         INIT_LIST_HEAD(&rec->list);
5193         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5194         rec->cache.start = tmpl->start;
5195         rec->cache.size = tmpl->nr;
5196         ret = insert_cache_extent(extent_cache, &rec->cache);
5197         if (ret) {
5198                 free(rec);
5199                 return ret;
5200         }
5201         bytes_used += rec->nr;
5202
5203         if (tmpl->metadata)
5204                 rec->crossing_stripes = check_crossing_stripes(global_info,
5205                                 rec->start, global_info->tree_root->nodesize);
5206         check_extent_type(rec);
5207         return ret;
5208 }
5209
5210 /*
5211  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5212  * some are hints:
5213  * - refs              - if found, increase refs
5214  * - is_root           - if found, set
5215  * - content_checked   - if found, set
5216  * - owner_ref_checked - if found, set
5217  *
5218  * If not found, create a new one, initialize and insert.
5219  */
5220 static int add_extent_rec(struct cache_tree *extent_cache,
5221                 struct extent_record *tmpl)
5222 {
5223         struct extent_record *rec;
5224         struct cache_extent *cache;
5225         int ret = 0;
5226         int dup = 0;
5227
5228         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5229         if (cache) {
5230                 rec = container_of(cache, struct extent_record, cache);
5231                 if (tmpl->refs)
5232                         rec->refs++;
5233                 if (rec->nr == 1)
5234                         rec->nr = max(tmpl->nr, tmpl->max_size);
5235
5236                 /*
5237                  * We need to make sure to reset nr to whatever the extent
5238                  * record says was the real size, this way we can compare it to
5239                  * the backrefs.
5240                  */
5241                 if (tmpl->found_rec) {
5242                         if (tmpl->start != rec->start || rec->found_rec) {
5243                                 struct extent_record *tmp;
5244
5245                                 dup = 1;
5246                                 if (list_empty(&rec->list))
5247                                         list_add_tail(&rec->list,
5248                                                       &duplicate_extents);
5249
5250                                 /*
5251                                  * We have to do this song and dance in case we
5252                                  * find an extent record that falls inside of
5253                                  * our current extent record but does not have
5254                                  * the same objectid.
5255                                  */
5256                                 tmp = malloc(sizeof(*tmp));
5257                                 if (!tmp)
5258                                         return -ENOMEM;
5259                                 tmp->start = tmpl->start;
5260                                 tmp->max_size = tmpl->max_size;
5261                                 tmp->nr = tmpl->nr;
5262                                 tmp->found_rec = 1;
5263                                 tmp->metadata = tmpl->metadata;
5264                                 tmp->extent_item_refs = tmpl->extent_item_refs;
5265                                 INIT_LIST_HEAD(&tmp->list);
5266                                 list_add_tail(&tmp->list, &rec->dups);
5267                                 rec->num_duplicates++;
5268                         } else {
5269                                 rec->nr = tmpl->nr;
5270                                 rec->found_rec = 1;
5271                         }
5272                 }
5273
5274                 if (tmpl->extent_item_refs && !dup) {
5275                         if (rec->extent_item_refs) {
5276                                 fprintf(stderr, "block %llu rec "
5277                                         "extent_item_refs %llu, passed %llu\n",
5278                                         (unsigned long long)tmpl->start,
5279                                         (unsigned long long)
5280                                                         rec->extent_item_refs,
5281                                         (unsigned long long)tmpl->extent_item_refs);
5282                         }
5283                         rec->extent_item_refs = tmpl->extent_item_refs;
5284                 }
5285                 if (tmpl->is_root)
5286                         rec->is_root = 1;
5287                 if (tmpl->content_checked)
5288                         rec->content_checked = 1;
5289                 if (tmpl->owner_ref_checked)
5290                         rec->owner_ref_checked = 1;
5291                 memcpy(&rec->parent_key, &tmpl->parent_key,
5292                                 sizeof(tmpl->parent_key));
5293                 if (tmpl->parent_generation)
5294                         rec->parent_generation = tmpl->parent_generation;
5295                 if (rec->max_size < tmpl->max_size)
5296                         rec->max_size = tmpl->max_size;
5297
5298                 /*
5299                  * A metadata extent can't cross stripe_len boundary, otherwise
5300                  * kernel scrub won't be able to handle it.
5301                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5302                  * it.
5303                  */
5304                 if (tmpl->metadata)
5305                         rec->crossing_stripes = check_crossing_stripes(
5306                                         global_info, rec->start,
5307                                         global_info->tree_root->nodesize);
5308                 check_extent_type(rec);
5309                 maybe_free_extent_rec(extent_cache, rec);
5310                 return ret;
5311         }
5312
5313         ret = add_extent_rec_nolookup(extent_cache, tmpl);
5314
5315         return ret;
5316 }
5317
5318 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5319                             u64 parent, u64 root, int found_ref)
5320 {
5321         struct extent_record *rec;
5322         struct tree_backref *back;
5323         struct cache_extent *cache;
5324         int ret;
5325
5326         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5327         if (!cache) {
5328                 struct extent_record tmpl;
5329
5330                 memset(&tmpl, 0, sizeof(tmpl));
5331                 tmpl.start = bytenr;
5332                 tmpl.nr = 1;
5333                 tmpl.metadata = 1;
5334
5335                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5336                 if (ret)
5337                         return ret;
5338
5339                 /* really a bug in cache_extent implement now */
5340                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5341                 if (!cache)
5342                         return -ENOENT;
5343         }
5344
5345         rec = container_of(cache, struct extent_record, cache);
5346         if (rec->start != bytenr) {
5347                 /*
5348                  * Several cause, from unaligned bytenr to over lapping extents
5349                  */
5350                 return -EEXIST;
5351         }
5352
5353         back = find_tree_backref(rec, parent, root);
5354         if (!back) {
5355                 back = alloc_tree_backref(rec, parent, root);
5356                 if (!back)
5357                         return -ENOMEM;
5358         }
5359
5360         if (found_ref) {
5361                 if (back->node.found_ref) {
5362                         fprintf(stderr, "Extent back ref already exists "
5363                                 "for %llu parent %llu root %llu \n",
5364                                 (unsigned long long)bytenr,
5365                                 (unsigned long long)parent,
5366                                 (unsigned long long)root);
5367                 }
5368                 back->node.found_ref = 1;
5369         } else {
5370                 if (back->node.found_extent_tree) {
5371                         fprintf(stderr, "Extent back ref already exists "
5372                                 "for %llu parent %llu root %llu \n",
5373                                 (unsigned long long)bytenr,
5374                                 (unsigned long long)parent,
5375                                 (unsigned long long)root);
5376                 }
5377                 back->node.found_extent_tree = 1;
5378         }
5379         check_extent_type(rec);
5380         maybe_free_extent_rec(extent_cache, rec);
5381         return 0;
5382 }
5383
5384 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5385                             u64 parent, u64 root, u64 owner, u64 offset,
5386                             u32 num_refs, int found_ref, u64 max_size)
5387 {
5388         struct extent_record *rec;
5389         struct data_backref *back;
5390         struct cache_extent *cache;
5391         int ret;
5392
5393         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5394         if (!cache) {
5395                 struct extent_record tmpl;
5396
5397                 memset(&tmpl, 0, sizeof(tmpl));
5398                 tmpl.start = bytenr;
5399                 tmpl.nr = 1;
5400                 tmpl.max_size = max_size;
5401
5402                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5403                 if (ret)
5404                         return ret;
5405
5406                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5407                 if (!cache)
5408                         abort();
5409         }
5410
5411         rec = container_of(cache, struct extent_record, cache);
5412         if (rec->max_size < max_size)
5413                 rec->max_size = max_size;
5414
5415         /*
5416          * If found_ref is set then max_size is the real size and must match the
5417          * existing refs.  So if we have already found a ref then we need to
5418          * make sure that this ref matches the existing one, otherwise we need
5419          * to add a new backref so we can notice that the backrefs don't match
5420          * and we need to figure out who is telling the truth.  This is to
5421          * account for that awful fsync bug I introduced where we'd end up with
5422          * a btrfs_file_extent_item that would have its length include multiple
5423          * prealloc extents or point inside of a prealloc extent.
5424          */
5425         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5426                                  bytenr, max_size);
5427         if (!back) {
5428                 back = alloc_data_backref(rec, parent, root, owner, offset,
5429                                           max_size);
5430                 BUG_ON(!back);
5431         }
5432
5433         if (found_ref) {
5434                 BUG_ON(num_refs != 1);
5435                 if (back->node.found_ref)
5436                         BUG_ON(back->bytes != max_size);
5437                 back->node.found_ref = 1;
5438                 back->found_ref += 1;
5439                 back->bytes = max_size;
5440                 back->disk_bytenr = bytenr;
5441                 rec->refs += 1;
5442                 rec->content_checked = 1;
5443                 rec->owner_ref_checked = 1;
5444         } else {
5445                 if (back->node.found_extent_tree) {
5446                         fprintf(stderr, "Extent back ref already exists "
5447                                 "for %llu parent %llu root %llu "
5448                                 "owner %llu offset %llu num_refs %lu\n",
5449                                 (unsigned long long)bytenr,
5450                                 (unsigned long long)parent,
5451                                 (unsigned long long)root,
5452                                 (unsigned long long)owner,
5453                                 (unsigned long long)offset,
5454                                 (unsigned long)num_refs);
5455                 }
5456                 back->num_refs = num_refs;
5457                 back->node.found_extent_tree = 1;
5458         }
5459         maybe_free_extent_rec(extent_cache, rec);
5460         return 0;
5461 }
5462
5463 static int add_pending(struct cache_tree *pending,
5464                        struct cache_tree *seen, u64 bytenr, u32 size)
5465 {
5466         int ret;
5467         ret = add_cache_extent(seen, bytenr, size);
5468         if (ret)
5469                 return ret;
5470         add_cache_extent(pending, bytenr, size);
5471         return 0;
5472 }
5473
5474 static int pick_next_pending(struct cache_tree *pending,
5475                         struct cache_tree *reada,
5476                         struct cache_tree *nodes,
5477                         u64 last, struct block_info *bits, int bits_nr,
5478                         int *reada_bits)
5479 {
5480         unsigned long node_start = last;
5481         struct cache_extent *cache;
5482         int ret;
5483
5484         cache = search_cache_extent(reada, 0);
5485         if (cache) {
5486                 bits[0].start = cache->start;
5487                 bits[0].size = cache->size;
5488                 *reada_bits = 1;
5489                 return 1;
5490         }
5491         *reada_bits = 0;
5492         if (node_start > 32768)
5493                 node_start -= 32768;
5494
5495         cache = search_cache_extent(nodes, node_start);
5496         if (!cache)
5497                 cache = search_cache_extent(nodes, 0);
5498
5499         if (!cache) {
5500                  cache = search_cache_extent(pending, 0);
5501                  if (!cache)
5502                          return 0;
5503                  ret = 0;
5504                  do {
5505                          bits[ret].start = cache->start;
5506                          bits[ret].size = cache->size;
5507                          cache = next_cache_extent(cache);
5508                          ret++;
5509                  } while (cache && ret < bits_nr);
5510                  return ret;
5511         }
5512
5513         ret = 0;
5514         do {
5515                 bits[ret].start = cache->start;
5516                 bits[ret].size = cache->size;
5517                 cache = next_cache_extent(cache);
5518                 ret++;
5519         } while (cache && ret < bits_nr);
5520
5521         if (bits_nr - ret > 8) {
5522                 u64 lookup = bits[0].start + bits[0].size;
5523                 struct cache_extent *next;
5524                 next = search_cache_extent(pending, lookup);
5525                 while(next) {
5526                         if (next->start - lookup > 32768)
5527                                 break;
5528                         bits[ret].start = next->start;
5529                         bits[ret].size = next->size;
5530                         lookup = next->start + next->size;
5531                         ret++;
5532                         if (ret == bits_nr)
5533                                 break;
5534                         next = next_cache_extent(next);
5535                         if (!next)
5536                                 break;
5537                 }
5538         }
5539         return ret;
5540 }
5541
5542 static void free_chunk_record(struct cache_extent *cache)
5543 {
5544         struct chunk_record *rec;
5545
5546         rec = container_of(cache, struct chunk_record, cache);
5547         list_del_init(&rec->list);
5548         list_del_init(&rec->dextents);
5549         free(rec);
5550 }
5551
5552 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5553 {
5554         cache_tree_free_extents(chunk_cache, free_chunk_record);
5555 }
5556
5557 static void free_device_record(struct rb_node *node)
5558 {
5559         struct device_record *rec;
5560
5561         rec = container_of(node, struct device_record, node);
5562         free(rec);
5563 }
5564
5565 FREE_RB_BASED_TREE(device_cache, free_device_record);
5566
5567 int insert_block_group_record(struct block_group_tree *tree,
5568                               struct block_group_record *bg_rec)
5569 {
5570         int ret;
5571
5572         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5573         if (ret)
5574                 return ret;
5575
5576         list_add_tail(&bg_rec->list, &tree->block_groups);
5577         return 0;
5578 }
5579
5580 static void free_block_group_record(struct cache_extent *cache)
5581 {
5582         struct block_group_record *rec;
5583
5584         rec = container_of(cache, struct block_group_record, cache);
5585         list_del_init(&rec->list);
5586         free(rec);
5587 }
5588
5589 void free_block_group_tree(struct block_group_tree *tree)
5590 {
5591         cache_tree_free_extents(&tree->tree, free_block_group_record);
5592 }
5593
5594 int insert_device_extent_record(struct device_extent_tree *tree,
5595                                 struct device_extent_record *de_rec)
5596 {
5597         int ret;
5598
5599         /*
5600          * Device extent is a bit different from the other extents, because
5601          * the extents which belong to the different devices may have the
5602          * same start and size, so we need use the special extent cache
5603          * search/insert functions.
5604          */
5605         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5606         if (ret)
5607                 return ret;
5608
5609         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5610         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5611         return 0;
5612 }
5613
5614 static void free_device_extent_record(struct cache_extent *cache)
5615 {
5616         struct device_extent_record *rec;
5617
5618         rec = container_of(cache, struct device_extent_record, cache);
5619         if (!list_empty(&rec->chunk_list))
5620                 list_del_init(&rec->chunk_list);
5621         if (!list_empty(&rec->device_list))
5622                 list_del_init(&rec->device_list);
5623         free(rec);
5624 }
5625
5626 void free_device_extent_tree(struct device_extent_tree *tree)
5627 {
5628         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5629 }
5630
5631 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5632 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5633                                  struct extent_buffer *leaf, int slot)
5634 {
5635         struct btrfs_extent_ref_v0 *ref0;
5636         struct btrfs_key key;
5637         int ret;
5638
5639         btrfs_item_key_to_cpu(leaf, &key, slot);
5640         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5641         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5642                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5643                                 0, 0);
5644         } else {
5645                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5646                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5647         }
5648         return ret;
5649 }
5650 #endif
5651
5652 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5653                                             struct btrfs_key *key,
5654                                             int slot)
5655 {
5656         struct btrfs_chunk *ptr;
5657         struct chunk_record *rec;
5658         int num_stripes, i;
5659
5660         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5661         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5662
5663         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5664         if (!rec) {
5665                 fprintf(stderr, "memory allocation failed\n");
5666                 exit(-1);
5667         }
5668
5669         INIT_LIST_HEAD(&rec->list);
5670         INIT_LIST_HEAD(&rec->dextents);
5671         rec->bg_rec = NULL;
5672
5673         rec->cache.start = key->offset;
5674         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5675
5676         rec->generation = btrfs_header_generation(leaf);
5677
5678         rec->objectid = key->objectid;
5679         rec->type = key->type;
5680         rec->offset = key->offset;
5681
5682         rec->length = rec->cache.size;
5683         rec->owner = btrfs_chunk_owner(leaf, ptr);
5684         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5685         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5686         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5687         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5688         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5689         rec->num_stripes = num_stripes;
5690         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5691
5692         for (i = 0; i < rec->num_stripes; ++i) {
5693                 rec->stripes[i].devid =
5694                         btrfs_stripe_devid_nr(leaf, ptr, i);
5695                 rec->stripes[i].offset =
5696                         btrfs_stripe_offset_nr(leaf, ptr, i);
5697                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5698                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5699                                 BTRFS_UUID_SIZE);
5700         }
5701
5702         return rec;
5703 }
5704
5705 static int process_chunk_item(struct cache_tree *chunk_cache,
5706                               struct btrfs_key *key, struct extent_buffer *eb,
5707                               int slot)
5708 {
5709         struct chunk_record *rec;
5710         struct btrfs_chunk *chunk;
5711         int ret = 0;
5712
5713         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5714         /*
5715          * Do extra check for this chunk item,
5716          *
5717          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5718          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5719          * and owner<->key_type check.
5720          */
5721         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5722                                       key->offset);
5723         if (ret < 0) {
5724                 error("chunk(%llu, %llu) is not valid, ignore it",
5725                       key->offset, btrfs_chunk_length(eb, chunk));
5726                 return 0;
5727         }
5728         rec = btrfs_new_chunk_record(eb, key, slot);
5729         ret = insert_cache_extent(chunk_cache, &rec->cache);
5730         if (ret) {
5731                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5732                         rec->offset, rec->length);
5733                 free(rec);
5734         }
5735
5736         return ret;
5737 }
5738
5739 static int process_device_item(struct rb_root *dev_cache,
5740                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5741 {
5742         struct btrfs_dev_item *ptr;
5743         struct device_record *rec;
5744         int ret = 0;
5745
5746         ptr = btrfs_item_ptr(eb,
5747                 slot, struct btrfs_dev_item);
5748
5749         rec = malloc(sizeof(*rec));
5750         if (!rec) {
5751                 fprintf(stderr, "memory allocation failed\n");
5752                 return -ENOMEM;
5753         }
5754
5755         rec->devid = key->offset;
5756         rec->generation = btrfs_header_generation(eb);
5757
5758         rec->objectid = key->objectid;
5759         rec->type = key->type;
5760         rec->offset = key->offset;
5761
5762         rec->devid = btrfs_device_id(eb, ptr);
5763         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5764         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5765
5766         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5767         if (ret) {
5768                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5769                 free(rec);
5770         }
5771
5772         return ret;
5773 }
5774
5775 struct block_group_record *
5776 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5777                              int slot)
5778 {
5779         struct btrfs_block_group_item *ptr;
5780         struct block_group_record *rec;
5781
5782         rec = calloc(1, sizeof(*rec));
5783         if (!rec) {
5784                 fprintf(stderr, "memory allocation failed\n");
5785                 exit(-1);
5786         }
5787
5788         rec->cache.start = key->objectid;
5789         rec->cache.size = key->offset;
5790
5791         rec->generation = btrfs_header_generation(leaf);
5792
5793         rec->objectid = key->objectid;
5794         rec->type = key->type;
5795         rec->offset = key->offset;
5796
5797         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5798         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5799
5800         INIT_LIST_HEAD(&rec->list);
5801
5802         return rec;
5803 }
5804
5805 static int process_block_group_item(struct block_group_tree *block_group_cache,
5806                                     struct btrfs_key *key,
5807                                     struct extent_buffer *eb, int slot)
5808 {
5809         struct block_group_record *rec;
5810         int ret = 0;
5811
5812         rec = btrfs_new_block_group_record(eb, key, slot);
5813         ret = insert_block_group_record(block_group_cache, rec);
5814         if (ret) {
5815                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5816                         rec->objectid, rec->offset);
5817                 free(rec);
5818         }
5819
5820         return ret;
5821 }
5822
5823 struct device_extent_record *
5824 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5825                                struct btrfs_key *key, int slot)
5826 {
5827         struct device_extent_record *rec;
5828         struct btrfs_dev_extent *ptr;
5829
5830         rec = calloc(1, sizeof(*rec));
5831         if (!rec) {
5832                 fprintf(stderr, "memory allocation failed\n");
5833                 exit(-1);
5834         }
5835
5836         rec->cache.objectid = key->objectid;
5837         rec->cache.start = key->offset;
5838
5839         rec->generation = btrfs_header_generation(leaf);
5840
5841         rec->objectid = key->objectid;
5842         rec->type = key->type;
5843         rec->offset = key->offset;
5844
5845         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5846         rec->chunk_objecteid =
5847                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5848         rec->chunk_offset =
5849                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5850         rec->length = btrfs_dev_extent_length(leaf, ptr);
5851         rec->cache.size = rec->length;
5852
5853         INIT_LIST_HEAD(&rec->chunk_list);
5854         INIT_LIST_HEAD(&rec->device_list);
5855
5856         return rec;
5857 }
5858
5859 static int
5860 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5861                            struct btrfs_key *key, struct extent_buffer *eb,
5862                            int slot)
5863 {
5864         struct device_extent_record *rec;
5865         int ret;
5866
5867         rec = btrfs_new_device_extent_record(eb, key, slot);
5868         ret = insert_device_extent_record(dev_extent_cache, rec);
5869         if (ret) {
5870                 fprintf(stderr,
5871                         "Device extent[%llu, %llu, %llu] existed.\n",
5872                         rec->objectid, rec->offset, rec->length);
5873                 free(rec);
5874         }
5875
5876         return ret;
5877 }
5878
5879 static int process_extent_item(struct btrfs_root *root,
5880                                struct cache_tree *extent_cache,
5881                                struct extent_buffer *eb, int slot)
5882 {
5883         struct btrfs_extent_item *ei;
5884         struct btrfs_extent_inline_ref *iref;
5885         struct btrfs_extent_data_ref *dref;
5886         struct btrfs_shared_data_ref *sref;
5887         struct btrfs_key key;
5888         struct extent_record tmpl;
5889         unsigned long end;
5890         unsigned long ptr;
5891         int ret;
5892         int type;
5893         u32 item_size = btrfs_item_size_nr(eb, slot);
5894         u64 refs = 0;
5895         u64 offset;
5896         u64 num_bytes;
5897         int metadata = 0;
5898
5899         btrfs_item_key_to_cpu(eb, &key, slot);
5900
5901         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5902                 metadata = 1;
5903                 num_bytes = root->nodesize;
5904         } else {
5905                 num_bytes = key.offset;
5906         }
5907
5908         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5909                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5910                       key.objectid, root->sectorsize);
5911                 return -EIO;
5912         }
5913         if (item_size < sizeof(*ei)) {
5914 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5915                 struct btrfs_extent_item_v0 *ei0;
5916                 BUG_ON(item_size != sizeof(*ei0));
5917                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5918                 refs = btrfs_extent_refs_v0(eb, ei0);
5919 #else
5920                 BUG();
5921 #endif
5922                 memset(&tmpl, 0, sizeof(tmpl));
5923                 tmpl.start = key.objectid;
5924                 tmpl.nr = num_bytes;
5925                 tmpl.extent_item_refs = refs;
5926                 tmpl.metadata = metadata;
5927                 tmpl.found_rec = 1;
5928                 tmpl.max_size = num_bytes;
5929
5930                 return add_extent_rec(extent_cache, &tmpl);
5931         }
5932
5933         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5934         refs = btrfs_extent_refs(eb, ei);
5935         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5936                 metadata = 1;
5937         else
5938                 metadata = 0;
5939         if (metadata && num_bytes != root->nodesize) {
5940                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5941                       num_bytes, root->nodesize);
5942                 return -EIO;
5943         }
5944         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5945                 error("ignore invalid data extent, length %llu is not aligned to %u",
5946                       num_bytes, root->sectorsize);
5947                 return -EIO;
5948         }
5949
5950         memset(&tmpl, 0, sizeof(tmpl));
5951         tmpl.start = key.objectid;
5952         tmpl.nr = num_bytes;
5953         tmpl.extent_item_refs = refs;
5954         tmpl.metadata = metadata;
5955         tmpl.found_rec = 1;
5956         tmpl.max_size = num_bytes;
5957         add_extent_rec(extent_cache, &tmpl);
5958
5959         ptr = (unsigned long)(ei + 1);
5960         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5961             key.type == BTRFS_EXTENT_ITEM_KEY)
5962                 ptr += sizeof(struct btrfs_tree_block_info);
5963
5964         end = (unsigned long)ei + item_size;
5965         while (ptr < end) {
5966                 iref = (struct btrfs_extent_inline_ref *)ptr;
5967                 type = btrfs_extent_inline_ref_type(eb, iref);
5968                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5969                 switch (type) {
5970                 case BTRFS_TREE_BLOCK_REF_KEY:
5971                         ret = add_tree_backref(extent_cache, key.objectid,
5972                                         0, offset, 0);
5973                         if (ret < 0)
5974                                 error("add_tree_backref failed: %s",
5975                                       strerror(-ret));
5976                         break;
5977                 case BTRFS_SHARED_BLOCK_REF_KEY:
5978                         ret = add_tree_backref(extent_cache, key.objectid,
5979                                         offset, 0, 0);
5980                         if (ret < 0)
5981                                 error("add_tree_backref failed: %s",
5982                                       strerror(-ret));
5983                         break;
5984                 case BTRFS_EXTENT_DATA_REF_KEY:
5985                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5986                         add_data_backref(extent_cache, key.objectid, 0,
5987                                         btrfs_extent_data_ref_root(eb, dref),
5988                                         btrfs_extent_data_ref_objectid(eb,
5989                                                                        dref),
5990                                         btrfs_extent_data_ref_offset(eb, dref),
5991                                         btrfs_extent_data_ref_count(eb, dref),
5992                                         0, num_bytes);
5993                         break;
5994                 case BTRFS_SHARED_DATA_REF_KEY:
5995                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5996                         add_data_backref(extent_cache, key.objectid, offset,
5997                                         0, 0, 0,
5998                                         btrfs_shared_data_ref_count(eb, sref),
5999                                         0, num_bytes);
6000                         break;
6001                 default:
6002                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6003                                 key.objectid, key.type, num_bytes);
6004                         goto out;
6005                 }
6006                 ptr += btrfs_extent_inline_ref_size(type);
6007         }
6008         WARN_ON(ptr > end);
6009 out:
6010         return 0;
6011 }
6012
6013 static int check_cache_range(struct btrfs_root *root,
6014                              struct btrfs_block_group_cache *cache,
6015                              u64 offset, u64 bytes)
6016 {
6017         struct btrfs_free_space *entry;
6018         u64 *logical;
6019         u64 bytenr;
6020         int stripe_len;
6021         int i, nr, ret;
6022
6023         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6024                 bytenr = btrfs_sb_offset(i);
6025                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6026                                        cache->key.objectid, bytenr, 0,
6027                                        &logical, &nr, &stripe_len);
6028                 if (ret)
6029                         return ret;
6030
6031                 while (nr--) {
6032                         if (logical[nr] + stripe_len <= offset)
6033                                 continue;
6034                         if (offset + bytes <= logical[nr])
6035                                 continue;
6036                         if (logical[nr] == offset) {
6037                                 if (stripe_len >= bytes) {
6038                                         free(logical);
6039                                         return 0;
6040                                 }
6041                                 bytes -= stripe_len;
6042                                 offset += stripe_len;
6043                         } else if (logical[nr] < offset) {
6044                                 if (logical[nr] + stripe_len >=
6045                                     offset + bytes) {
6046                                         free(logical);
6047                                         return 0;
6048                                 }
6049                                 bytes = (offset + bytes) -
6050                                         (logical[nr] + stripe_len);
6051                                 offset = logical[nr] + stripe_len;
6052                         } else {
6053                                 /*
6054                                  * Could be tricky, the super may land in the
6055                                  * middle of the area we're checking.  First
6056                                  * check the easiest case, it's at the end.
6057                                  */
6058                                 if (logical[nr] + stripe_len >=
6059                                     bytes + offset) {
6060                                         bytes = logical[nr] - offset;
6061                                         continue;
6062                                 }
6063
6064                                 /* Check the left side */
6065                                 ret = check_cache_range(root, cache,
6066                                                         offset,
6067                                                         logical[nr] - offset);
6068                                 if (ret) {
6069                                         free(logical);
6070                                         return ret;
6071                                 }
6072
6073                                 /* Now we continue with the right side */
6074                                 bytes = (offset + bytes) -
6075                                         (logical[nr] + stripe_len);
6076                                 offset = logical[nr] + stripe_len;
6077                         }
6078                 }
6079
6080                 free(logical);
6081         }
6082
6083         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6084         if (!entry) {
6085                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6086                         offset, offset+bytes);
6087                 return -EINVAL;
6088         }
6089
6090         if (entry->offset != offset) {
6091                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6092                         entry->offset);
6093                 return -EINVAL;
6094         }
6095
6096         if (entry->bytes != bytes) {
6097                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6098                         bytes, entry->bytes, offset);
6099                 return -EINVAL;
6100         }
6101
6102         unlink_free_space(cache->free_space_ctl, entry);
6103         free(entry);
6104         return 0;
6105 }
6106
6107 static int verify_space_cache(struct btrfs_root *root,
6108                               struct btrfs_block_group_cache *cache)
6109 {
6110         struct btrfs_path path;
6111         struct extent_buffer *leaf;
6112         struct btrfs_key key;
6113         u64 last;
6114         int ret = 0;
6115
6116         root = root->fs_info->extent_root;
6117
6118         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6119
6120         btrfs_init_path(&path);
6121         key.objectid = last;
6122         key.offset = 0;
6123         key.type = BTRFS_EXTENT_ITEM_KEY;
6124         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6125         if (ret < 0)
6126                 goto out;
6127         ret = 0;
6128         while (1) {
6129                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6130                         ret = btrfs_next_leaf(root, &path);
6131                         if (ret < 0)
6132                                 goto out;
6133                         if (ret > 0) {
6134                                 ret = 0;
6135                                 break;
6136                         }
6137                 }
6138                 leaf = path.nodes[0];
6139                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6140                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6141                         break;
6142                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6143                     key.type != BTRFS_METADATA_ITEM_KEY) {
6144                         path.slots[0]++;
6145                         continue;
6146                 }
6147
6148                 if (last == key.objectid) {
6149                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6150                                 last = key.objectid + key.offset;
6151                         else
6152                                 last = key.objectid + root->nodesize;
6153                         path.slots[0]++;
6154                         continue;
6155                 }
6156
6157                 ret = check_cache_range(root, cache, last,
6158                                         key.objectid - last);
6159                 if (ret)
6160                         break;
6161                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6162                         last = key.objectid + key.offset;
6163                 else
6164                         last = key.objectid + root->nodesize;
6165                 path.slots[0]++;
6166         }
6167
6168         if (last < cache->key.objectid + cache->key.offset)
6169                 ret = check_cache_range(root, cache, last,
6170                                         cache->key.objectid +
6171                                         cache->key.offset - last);
6172
6173 out:
6174         btrfs_release_path(&path);
6175
6176         if (!ret &&
6177             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6178                 fprintf(stderr, "There are still entries left in the space "
6179                         "cache\n");
6180                 ret = -EINVAL;
6181         }
6182
6183         return ret;
6184 }
6185
6186 static int check_space_cache(struct btrfs_root *root)
6187 {
6188         struct btrfs_block_group_cache *cache;
6189         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6190         int ret;
6191         int error = 0;
6192
6193         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6194             btrfs_super_generation(root->fs_info->super_copy) !=
6195             btrfs_super_cache_generation(root->fs_info->super_copy)) {
6196                 printf("cache and super generation don't match, space cache "
6197                        "will be invalidated\n");
6198                 return 0;
6199         }
6200
6201         if (ctx.progress_enabled) {
6202                 ctx.tp = TASK_FREE_SPACE;
6203                 task_start(ctx.info);
6204         }
6205
6206         while (1) {
6207                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6208                 if (!cache)
6209                         break;
6210
6211                 start = cache->key.objectid + cache->key.offset;
6212                 if (!cache->free_space_ctl) {
6213                         if (btrfs_init_free_space_ctl(cache,
6214                                                       root->sectorsize)) {
6215                                 ret = -ENOMEM;
6216                                 break;
6217                         }
6218                 } else {
6219                         btrfs_remove_free_space_cache(cache);
6220                 }
6221
6222                 if (btrfs_fs_compat_ro(root->fs_info,
6223                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6224                         ret = exclude_super_stripes(root, cache);
6225                         if (ret) {
6226                                 fprintf(stderr, "could not exclude super stripes: %s\n",
6227                                         strerror(-ret));
6228                                 error++;
6229                                 continue;
6230                         }
6231                         ret = load_free_space_tree(root->fs_info, cache);
6232                         free_excluded_extents(root, cache);
6233                         if (ret < 0) {
6234                                 fprintf(stderr, "could not load free space tree: %s\n",
6235                                         strerror(-ret));
6236                                 error++;
6237                                 continue;
6238                         }
6239                         error += ret;
6240                 } else {
6241                         ret = load_free_space_cache(root->fs_info, cache);
6242                         if (!ret)
6243                                 continue;
6244                 }
6245
6246                 ret = verify_space_cache(root, cache);
6247                 if (ret) {
6248                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
6249                                 cache->key.objectid);
6250                         error++;
6251                 }
6252         }
6253
6254         task_stop(ctx.info);
6255
6256         return error ? -EINVAL : 0;
6257 }
6258
6259 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6260                         u64 num_bytes, unsigned long leaf_offset,
6261                         struct extent_buffer *eb) {
6262
6263         u64 offset = 0;
6264         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6265         char *data;
6266         unsigned long csum_offset;
6267         u32 csum;
6268         u32 csum_expected;
6269         u64 read_len;
6270         u64 data_checked = 0;
6271         u64 tmp;
6272         int ret = 0;
6273         int mirror;
6274         int num_copies;
6275
6276         if (num_bytes % root->sectorsize)
6277                 return -EINVAL;
6278
6279         data = malloc(num_bytes);
6280         if (!data)
6281                 return -ENOMEM;
6282
6283         while (offset < num_bytes) {
6284                 mirror = 0;
6285 again:
6286                 read_len = num_bytes - offset;
6287                 /* read as much space once a time */
6288                 ret = read_extent_data(root, data + offset,
6289                                 bytenr + offset, &read_len, mirror);
6290                 if (ret)
6291                         goto out;
6292                 data_checked = 0;
6293                 /* verify every 4k data's checksum */
6294                 while (data_checked < read_len) {
6295                         csum = ~(u32)0;
6296                         tmp = offset + data_checked;
6297
6298                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
6299                                                csum, root->sectorsize);
6300                         btrfs_csum_final(csum, (u8 *)&csum);
6301
6302                         csum_offset = leaf_offset +
6303                                  tmp / root->sectorsize * csum_size;
6304                         read_extent_buffer(eb, (char *)&csum_expected,
6305                                            csum_offset, csum_size);
6306                         /* try another mirror */
6307                         if (csum != csum_expected) {
6308                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6309                                                 mirror, bytenr + tmp,
6310                                                 csum, csum_expected);
6311                                 num_copies = btrfs_num_copies(
6312                                                 &root->fs_info->mapping_tree,
6313                                                 bytenr, num_bytes);
6314                                 if (mirror < num_copies - 1) {
6315                                         mirror += 1;
6316                                         goto again;
6317                                 }
6318                         }
6319                         data_checked += root->sectorsize;
6320                 }
6321                 offset += read_len;
6322         }
6323 out:
6324         free(data);
6325         return ret;
6326 }
6327
6328 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6329                                u64 num_bytes)
6330 {
6331         struct btrfs_path path;
6332         struct extent_buffer *leaf;
6333         struct btrfs_key key;
6334         int ret;
6335
6336         btrfs_init_path(&path);
6337         key.objectid = bytenr;
6338         key.type = BTRFS_EXTENT_ITEM_KEY;
6339         key.offset = (u64)-1;
6340
6341 again:
6342         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6343                                 0, 0);
6344         if (ret < 0) {
6345                 fprintf(stderr, "Error looking up extent record %d\n", ret);
6346                 btrfs_release_path(&path);
6347                 return ret;
6348         } else if (ret) {
6349                 if (path.slots[0] > 0) {
6350                         path.slots[0]--;
6351                 } else {
6352                         ret = btrfs_prev_leaf(root, &path);
6353                         if (ret < 0) {
6354                                 goto out;
6355                         } else if (ret > 0) {
6356                                 ret = 0;
6357                                 goto out;
6358                         }
6359                 }
6360         }
6361
6362         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6363
6364         /*
6365          * Block group items come before extent items if they have the same
6366          * bytenr, so walk back one more just in case.  Dear future traveller,
6367          * first congrats on mastering time travel.  Now if it's not too much
6368          * trouble could you go back to 2006 and tell Chris to make the
6369          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6370          * EXTENT_ITEM_KEY please?
6371          */
6372         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6373                 if (path.slots[0] > 0) {
6374                         path.slots[0]--;
6375                 } else {
6376                         ret = btrfs_prev_leaf(root, &path);
6377                         if (ret < 0) {
6378                                 goto out;
6379                         } else if (ret > 0) {
6380                                 ret = 0;
6381                                 goto out;
6382                         }
6383                 }
6384                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6385         }
6386
6387         while (num_bytes) {
6388                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6389                         ret = btrfs_next_leaf(root, &path);
6390                         if (ret < 0) {
6391                                 fprintf(stderr, "Error going to next leaf "
6392                                         "%d\n", ret);
6393                                 btrfs_release_path(&path);
6394                                 return ret;
6395                         } else if (ret) {
6396                                 break;
6397                         }
6398                 }
6399                 leaf = path.nodes[0];
6400                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6401                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6402                         path.slots[0]++;
6403                         continue;
6404                 }
6405                 if (key.objectid + key.offset < bytenr) {
6406                         path.slots[0]++;
6407                         continue;
6408                 }
6409                 if (key.objectid > bytenr + num_bytes)
6410                         break;
6411
6412                 if (key.objectid == bytenr) {
6413                         if (key.offset >= num_bytes) {
6414                                 num_bytes = 0;
6415                                 break;
6416                         }
6417                         num_bytes -= key.offset;
6418                         bytenr += key.offset;
6419                 } else if (key.objectid < bytenr) {
6420                         if (key.objectid + key.offset >= bytenr + num_bytes) {
6421                                 num_bytes = 0;
6422                                 break;
6423                         }
6424                         num_bytes = (bytenr + num_bytes) -
6425                                 (key.objectid + key.offset);
6426                         bytenr = key.objectid + key.offset;
6427                 } else {
6428                         if (key.objectid + key.offset < bytenr + num_bytes) {
6429                                 u64 new_start = key.objectid + key.offset;
6430                                 u64 new_bytes = bytenr + num_bytes - new_start;
6431
6432                                 /*
6433                                  * Weird case, the extent is in the middle of
6434                                  * our range, we'll have to search one side
6435                                  * and then the other.  Not sure if this happens
6436                                  * in real life, but no harm in coding it up
6437                                  * anyway just in case.
6438                                  */
6439                                 btrfs_release_path(&path);
6440                                 ret = check_extent_exists(root, new_start,
6441                                                           new_bytes);
6442                                 if (ret) {
6443                                         fprintf(stderr, "Right section didn't "
6444                                                 "have a record\n");
6445                                         break;
6446                                 }
6447                                 num_bytes = key.objectid - bytenr;
6448                                 goto again;
6449                         }
6450                         num_bytes = key.objectid - bytenr;
6451                 }
6452                 path.slots[0]++;
6453         }
6454         ret = 0;
6455
6456 out:
6457         if (num_bytes && !ret) {
6458                 fprintf(stderr, "There are no extents for csum range "
6459                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6460                 ret = 1;
6461         }
6462
6463         btrfs_release_path(&path);
6464         return ret;
6465 }
6466
6467 static int check_csums(struct btrfs_root *root)
6468 {
6469         struct btrfs_path path;
6470         struct extent_buffer *leaf;
6471         struct btrfs_key key;
6472         u64 offset = 0, num_bytes = 0;
6473         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6474         int errors = 0;
6475         int ret;
6476         u64 data_len;
6477         unsigned long leaf_offset;
6478
6479         root = root->fs_info->csum_root;
6480         if (!extent_buffer_uptodate(root->node)) {
6481                 fprintf(stderr, "No valid csum tree found\n");
6482                 return -ENOENT;
6483         }
6484
6485         btrfs_init_path(&path);
6486         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6487         key.type = BTRFS_EXTENT_CSUM_KEY;
6488         key.offset = 0;
6489         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6490         if (ret < 0) {
6491                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6492                 btrfs_release_path(&path);
6493                 return ret;
6494         }
6495
6496         if (ret > 0 && path.slots[0])
6497                 path.slots[0]--;
6498         ret = 0;
6499
6500         while (1) {
6501                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6502                         ret = btrfs_next_leaf(root, &path);
6503                         if (ret < 0) {
6504                                 fprintf(stderr, "Error going to next leaf "
6505                                         "%d\n", ret);
6506                                 break;
6507                         }
6508                         if (ret)
6509                                 break;
6510                 }
6511                 leaf = path.nodes[0];
6512
6513                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6514                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6515                         path.slots[0]++;
6516                         continue;
6517                 }
6518
6519                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6520                               csum_size) * root->sectorsize;
6521                 if (!check_data_csum)
6522                         goto skip_csum_check;
6523                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6524                 ret = check_extent_csums(root, key.offset, data_len,
6525                                          leaf_offset, leaf);
6526                 if (ret)
6527                         break;
6528 skip_csum_check:
6529                 if (!num_bytes) {
6530                         offset = key.offset;
6531                 } else if (key.offset != offset + num_bytes) {
6532                         ret = check_extent_exists(root, offset, num_bytes);
6533                         if (ret) {
6534                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6535                                         "there is no extent record\n",
6536                                         offset, offset+num_bytes);
6537                                 errors++;
6538                         }
6539                         offset = key.offset;
6540                         num_bytes = 0;
6541                 }
6542                 num_bytes += data_len;
6543                 path.slots[0]++;
6544         }
6545
6546         btrfs_release_path(&path);
6547         return errors;
6548 }
6549
6550 static int is_dropped_key(struct btrfs_key *key,
6551                           struct btrfs_key *drop_key) {
6552         if (key->objectid < drop_key->objectid)
6553                 return 1;
6554         else if (key->objectid == drop_key->objectid) {
6555                 if (key->type < drop_key->type)
6556                         return 1;
6557                 else if (key->type == drop_key->type) {
6558                         if (key->offset < drop_key->offset)
6559                                 return 1;
6560                 }
6561         }
6562         return 0;
6563 }
6564
6565 /*
6566  * Here are the rules for FULL_BACKREF.
6567  *
6568  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6569  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6570  *      FULL_BACKREF set.
6571  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6572  *    if it happened after the relocation occurred since we'll have dropped the
6573  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6574  *    have no real way to know for sure.
6575  *
6576  * We process the blocks one root at a time, and we start from the lowest root
6577  * objectid and go to the highest.  So we can just lookup the owner backref for
6578  * the record and if we don't find it then we know it doesn't exist and we have
6579  * a FULL BACKREF.
6580  *
6581  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6582  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6583  * be set or not and then we can check later once we've gathered all the refs.
6584  */
6585 static int calc_extent_flag(struct btrfs_root *root,
6586                            struct cache_tree *extent_cache,
6587                            struct extent_buffer *buf,
6588                            struct root_item_record *ri,
6589                            u64 *flags)
6590 {
6591         struct extent_record *rec;
6592         struct cache_extent *cache;
6593         struct tree_backref *tback;
6594         u64 owner = 0;
6595
6596         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6597         /* we have added this extent before */
6598         if (!cache)
6599                 return -ENOENT;
6600
6601         rec = container_of(cache, struct extent_record, cache);
6602
6603         /*
6604          * Except file/reloc tree, we can not have
6605          * FULL BACKREF MODE
6606          */
6607         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6608                 goto normal;
6609         /*
6610          * root node
6611          */
6612         if (buf->start == ri->bytenr)
6613                 goto normal;
6614
6615         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6616                 goto full_backref;
6617
6618         owner = btrfs_header_owner(buf);
6619         if (owner == ri->objectid)
6620                 goto normal;
6621
6622         tback = find_tree_backref(rec, 0, owner);
6623         if (!tback)
6624                 goto full_backref;
6625 normal:
6626         *flags = 0;
6627         if (rec->flag_block_full_backref != FLAG_UNSET &&
6628             rec->flag_block_full_backref != 0)
6629                 rec->bad_full_backref = 1;
6630         return 0;
6631 full_backref:
6632         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6633         if (rec->flag_block_full_backref != FLAG_UNSET &&
6634             rec->flag_block_full_backref != 1)
6635                 rec->bad_full_backref = 1;
6636         return 0;
6637 }
6638
6639 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6640 {
6641         fprintf(stderr, "Invalid key type(");
6642         print_key_type(stderr, 0, key_type);
6643         fprintf(stderr, ") found in root(");
6644         print_objectid(stderr, rootid, 0);
6645         fprintf(stderr, ")\n");
6646 }
6647
6648 /*
6649  * Check if the key is valid with its extent buffer.
6650  *
6651  * This is a early check in case invalid key exists in a extent buffer
6652  * This is not comprehensive yet, but should prevent wrong key/item passed
6653  * further
6654  */
6655 static int check_type_with_root(u64 rootid, u8 key_type)
6656 {
6657         switch (key_type) {
6658         /* Only valid in chunk tree */
6659         case BTRFS_DEV_ITEM_KEY:
6660         case BTRFS_CHUNK_ITEM_KEY:
6661                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6662                         goto err;
6663                 break;
6664         /* valid in csum and log tree */
6665         case BTRFS_CSUM_TREE_OBJECTID:
6666                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6667                       is_fstree(rootid)))
6668                         goto err;
6669                 break;
6670         case BTRFS_EXTENT_ITEM_KEY:
6671         case BTRFS_METADATA_ITEM_KEY:
6672         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6673                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6674                         goto err;
6675                 break;
6676         case BTRFS_ROOT_ITEM_KEY:
6677                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6678                         goto err;
6679                 break;
6680         case BTRFS_DEV_EXTENT_KEY:
6681                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6682                         goto err;
6683                 break;
6684         }
6685         return 0;
6686 err:
6687         report_mismatch_key_root(key_type, rootid);
6688         return -EINVAL;
6689 }
6690
6691 static int run_next_block(struct btrfs_root *root,
6692                           struct block_info *bits,
6693                           int bits_nr,
6694                           u64 *last,
6695                           struct cache_tree *pending,
6696                           struct cache_tree *seen,
6697                           struct cache_tree *reada,
6698                           struct cache_tree *nodes,
6699                           struct cache_tree *extent_cache,
6700                           struct cache_tree *chunk_cache,
6701                           struct rb_root *dev_cache,
6702                           struct block_group_tree *block_group_cache,
6703                           struct device_extent_tree *dev_extent_cache,
6704                           struct root_item_record *ri)
6705 {
6706         struct extent_buffer *buf;
6707         struct extent_record *rec = NULL;
6708         u64 bytenr;
6709         u32 size;
6710         u64 parent;
6711         u64 owner;
6712         u64 flags;
6713         u64 ptr;
6714         u64 gen = 0;
6715         int ret = 0;
6716         int i;
6717         int nritems;
6718         struct btrfs_key key;
6719         struct cache_extent *cache;
6720         int reada_bits;
6721
6722         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6723                                     bits_nr, &reada_bits);
6724         if (nritems == 0)
6725                 return 1;
6726
6727         if (!reada_bits) {
6728                 for(i = 0; i < nritems; i++) {
6729                         ret = add_cache_extent(reada, bits[i].start,
6730                                                bits[i].size);
6731                         if (ret == -EEXIST)
6732                                 continue;
6733
6734                         /* fixme, get the parent transid */
6735                         readahead_tree_block(root, bits[i].start,
6736                                              bits[i].size, 0);
6737                 }
6738         }
6739         *last = bits[0].start;
6740         bytenr = bits[0].start;
6741         size = bits[0].size;
6742
6743         cache = lookup_cache_extent(pending, bytenr, size);
6744         if (cache) {
6745                 remove_cache_extent(pending, cache);
6746                 free(cache);
6747         }
6748         cache = lookup_cache_extent(reada, bytenr, size);
6749         if (cache) {
6750                 remove_cache_extent(reada, cache);
6751                 free(cache);
6752         }
6753         cache = lookup_cache_extent(nodes, bytenr, size);
6754         if (cache) {
6755                 remove_cache_extent(nodes, cache);
6756                 free(cache);
6757         }
6758         cache = lookup_cache_extent(extent_cache, bytenr, size);
6759         if (cache) {
6760                 rec = container_of(cache, struct extent_record, cache);
6761                 gen = rec->parent_generation;
6762         }
6763
6764         /* fixme, get the real parent transid */
6765         buf = read_tree_block(root, bytenr, size, gen);
6766         if (!extent_buffer_uptodate(buf)) {
6767                 record_bad_block_io(root->fs_info,
6768                                     extent_cache, bytenr, size);
6769                 goto out;
6770         }
6771
6772         nritems = btrfs_header_nritems(buf);
6773
6774         flags = 0;
6775         if (!init_extent_tree) {
6776                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6777                                        btrfs_header_level(buf), 1, NULL,
6778                                        &flags);
6779                 if (ret < 0) {
6780                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6781                         if (ret < 0) {
6782                                 fprintf(stderr, "Couldn't calc extent flags\n");
6783                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6784                         }
6785                 }
6786         } else {
6787                 flags = 0;
6788                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6789                 if (ret < 0) {
6790                         fprintf(stderr, "Couldn't calc extent flags\n");
6791                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6792                 }
6793         }
6794
6795         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6796                 if (ri != NULL &&
6797                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6798                     ri->objectid == btrfs_header_owner(buf)) {
6799                         /*
6800                          * Ok we got to this block from it's original owner and
6801                          * we have FULL_BACKREF set.  Relocation can leave
6802                          * converted blocks over so this is altogether possible,
6803                          * however it's not possible if the generation > the
6804                          * last snapshot, so check for this case.
6805                          */
6806                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6807                             btrfs_header_generation(buf) > ri->last_snapshot) {
6808                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6809                                 rec->bad_full_backref = 1;
6810                         }
6811                 }
6812         } else {
6813                 if (ri != NULL &&
6814                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6815                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6816                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6817                         rec->bad_full_backref = 1;
6818                 }
6819         }
6820
6821         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6822                 rec->flag_block_full_backref = 1;
6823                 parent = bytenr;
6824                 owner = 0;
6825         } else {
6826                 rec->flag_block_full_backref = 0;
6827                 parent = 0;
6828                 owner = btrfs_header_owner(buf);
6829         }
6830
6831         ret = check_block(root, extent_cache, buf, flags);
6832         if (ret)
6833                 goto out;
6834
6835         if (btrfs_is_leaf(buf)) {
6836                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6837                 for (i = 0; i < nritems; i++) {
6838                         struct btrfs_file_extent_item *fi;
6839                         btrfs_item_key_to_cpu(buf, &key, i);
6840                         /*
6841                          * Check key type against the leaf owner.
6842                          * Could filter quite a lot of early error if
6843                          * owner is correct
6844                          */
6845                         if (check_type_with_root(btrfs_header_owner(buf),
6846                                                  key.type)) {
6847                                 fprintf(stderr, "ignoring invalid key\n");
6848                                 continue;
6849                         }
6850                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6851                                 process_extent_item(root, extent_cache, buf,
6852                                                     i);
6853                                 continue;
6854                         }
6855                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6856                                 process_extent_item(root, extent_cache, buf,
6857                                                     i);
6858                                 continue;
6859                         }
6860                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6861                                 total_csum_bytes +=
6862                                         btrfs_item_size_nr(buf, i);
6863                                 continue;
6864                         }
6865                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6866                                 process_chunk_item(chunk_cache, &key, buf, i);
6867                                 continue;
6868                         }
6869                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6870                                 process_device_item(dev_cache, &key, buf, i);
6871                                 continue;
6872                         }
6873                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6874                                 process_block_group_item(block_group_cache,
6875                                         &key, buf, i);
6876                                 continue;
6877                         }
6878                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6879                                 process_device_extent_item(dev_extent_cache,
6880                                         &key, buf, i);
6881                                 continue;
6882
6883                         }
6884                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6885 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6886                                 process_extent_ref_v0(extent_cache, buf, i);
6887 #else
6888                                 BUG();
6889 #endif
6890                                 continue;
6891                         }
6892
6893                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6894                                 ret = add_tree_backref(extent_cache,
6895                                                 key.objectid, 0, key.offset, 0);
6896                                 if (ret < 0)
6897                                         error("add_tree_backref failed: %s",
6898                                               strerror(-ret));
6899                                 continue;
6900                         }
6901                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6902                                 ret = add_tree_backref(extent_cache,
6903                                                 key.objectid, key.offset, 0, 0);
6904                                 if (ret < 0)
6905                                         error("add_tree_backref failed: %s",
6906                                               strerror(-ret));
6907                                 continue;
6908                         }
6909                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6910                                 struct btrfs_extent_data_ref *ref;
6911                                 ref = btrfs_item_ptr(buf, i,
6912                                                 struct btrfs_extent_data_ref);
6913                                 add_data_backref(extent_cache,
6914                                         key.objectid, 0,
6915                                         btrfs_extent_data_ref_root(buf, ref),
6916                                         btrfs_extent_data_ref_objectid(buf,
6917                                                                        ref),
6918                                         btrfs_extent_data_ref_offset(buf, ref),
6919                                         btrfs_extent_data_ref_count(buf, ref),
6920                                         0, root->sectorsize);
6921                                 continue;
6922                         }
6923                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6924                                 struct btrfs_shared_data_ref *ref;
6925                                 ref = btrfs_item_ptr(buf, i,
6926                                                 struct btrfs_shared_data_ref);
6927                                 add_data_backref(extent_cache,
6928                                         key.objectid, key.offset, 0, 0, 0,
6929                                         btrfs_shared_data_ref_count(buf, ref),
6930                                         0, root->sectorsize);
6931                                 continue;
6932                         }
6933                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6934                                 struct bad_item *bad;
6935
6936                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6937                                         continue;
6938                                 if (!owner)
6939                                         continue;
6940                                 bad = malloc(sizeof(struct bad_item));
6941                                 if (!bad)
6942                                         continue;
6943                                 INIT_LIST_HEAD(&bad->list);
6944                                 memcpy(&bad->key, &key,
6945                                        sizeof(struct btrfs_key));
6946                                 bad->root_id = owner;
6947                                 list_add_tail(&bad->list, &delete_items);
6948                                 continue;
6949                         }
6950                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6951                                 continue;
6952                         fi = btrfs_item_ptr(buf, i,
6953                                             struct btrfs_file_extent_item);
6954                         if (btrfs_file_extent_type(buf, fi) ==
6955                             BTRFS_FILE_EXTENT_INLINE)
6956                                 continue;
6957                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6958                                 continue;
6959
6960                         data_bytes_allocated +=
6961                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6962                         if (data_bytes_allocated < root->sectorsize) {
6963                                 abort();
6964                         }
6965                         data_bytes_referenced +=
6966                                 btrfs_file_extent_num_bytes(buf, fi);
6967                         add_data_backref(extent_cache,
6968                                 btrfs_file_extent_disk_bytenr(buf, fi),
6969                                 parent, owner, key.objectid, key.offset -
6970                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6971                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6972                 }
6973         } else {
6974                 int level;
6975                 struct btrfs_key first_key;
6976
6977                 first_key.objectid = 0;
6978
6979                 if (nritems > 0)
6980                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6981                 level = btrfs_header_level(buf);
6982                 for (i = 0; i < nritems; i++) {
6983                         struct extent_record tmpl;
6984
6985                         ptr = btrfs_node_blockptr(buf, i);
6986                         size = root->nodesize;
6987                         btrfs_node_key_to_cpu(buf, &key, i);
6988                         if (ri != NULL) {
6989                                 if ((level == ri->drop_level)
6990                                     && is_dropped_key(&key, &ri->drop_key)) {
6991                                         continue;
6992                                 }
6993                         }
6994
6995                         memset(&tmpl, 0, sizeof(tmpl));
6996                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6997                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6998                         tmpl.start = ptr;
6999                         tmpl.nr = size;
7000                         tmpl.refs = 1;
7001                         tmpl.metadata = 1;
7002                         tmpl.max_size = size;
7003                         ret = add_extent_rec(extent_cache, &tmpl);
7004                         if (ret < 0)
7005                                 goto out;
7006
7007                         ret = add_tree_backref(extent_cache, ptr, parent,
7008                                         owner, 1);
7009                         if (ret < 0) {
7010                                 error("add_tree_backref failed: %s",
7011                                       strerror(-ret));
7012                                 continue;
7013                         }
7014
7015                         if (level > 1) {
7016                                 add_pending(nodes, seen, ptr, size);
7017                         } else {
7018                                 add_pending(pending, seen, ptr, size);
7019                         }
7020                 }
7021                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7022                                       nritems) * sizeof(struct btrfs_key_ptr);
7023         }
7024         total_btree_bytes += buf->len;
7025         if (fs_root_objectid(btrfs_header_owner(buf)))
7026                 total_fs_tree_bytes += buf->len;
7027         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7028                 total_extent_tree_bytes += buf->len;
7029         if (!found_old_backref &&
7030             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7031             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7032             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7033                 found_old_backref = 1;
7034 out:
7035         free_extent_buffer(buf);
7036         return ret;
7037 }
7038
7039 static int add_root_to_pending(struct extent_buffer *buf,
7040                                struct cache_tree *extent_cache,
7041                                struct cache_tree *pending,
7042                                struct cache_tree *seen,
7043                                struct cache_tree *nodes,
7044                                u64 objectid)
7045 {
7046         struct extent_record tmpl;
7047         int ret;
7048
7049         if (btrfs_header_level(buf) > 0)
7050                 add_pending(nodes, seen, buf->start, buf->len);
7051         else
7052                 add_pending(pending, seen, buf->start, buf->len);
7053
7054         memset(&tmpl, 0, sizeof(tmpl));
7055         tmpl.start = buf->start;
7056         tmpl.nr = buf->len;
7057         tmpl.is_root = 1;
7058         tmpl.refs = 1;
7059         tmpl.metadata = 1;
7060         tmpl.max_size = buf->len;
7061         add_extent_rec(extent_cache, &tmpl);
7062
7063         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7064             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7065                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7066                                 0, 1);
7067         else
7068                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7069                                 1);
7070         return ret;
7071 }
7072
7073 /* as we fix the tree, we might be deleting blocks that
7074  * we're tracking for repair.  This hook makes sure we
7075  * remove any backrefs for blocks as we are fixing them.
7076  */
7077 static int free_extent_hook(struct btrfs_trans_handle *trans,
7078                             struct btrfs_root *root,
7079                             u64 bytenr, u64 num_bytes, u64 parent,
7080                             u64 root_objectid, u64 owner, u64 offset,
7081                             int refs_to_drop)
7082 {
7083         struct extent_record *rec;
7084         struct cache_extent *cache;
7085         int is_data;
7086         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7087
7088         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7089         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7090         if (!cache)
7091                 return 0;
7092
7093         rec = container_of(cache, struct extent_record, cache);
7094         if (is_data) {
7095                 struct data_backref *back;
7096                 back = find_data_backref(rec, parent, root_objectid, owner,
7097                                          offset, 1, bytenr, num_bytes);
7098                 if (!back)
7099                         goto out;
7100                 if (back->node.found_ref) {
7101                         back->found_ref -= refs_to_drop;
7102                         if (rec->refs)
7103                                 rec->refs -= refs_to_drop;
7104                 }
7105                 if (back->node.found_extent_tree) {
7106                         back->num_refs -= refs_to_drop;
7107                         if (rec->extent_item_refs)
7108                                 rec->extent_item_refs -= refs_to_drop;
7109                 }
7110                 if (back->found_ref == 0)
7111                         back->node.found_ref = 0;
7112                 if (back->num_refs == 0)
7113                         back->node.found_extent_tree = 0;
7114
7115                 if (!back->node.found_extent_tree && back->node.found_ref) {
7116                         list_del(&back->node.list);
7117                         free(back);
7118                 }
7119         } else {
7120                 struct tree_backref *back;
7121                 back = find_tree_backref(rec, parent, root_objectid);
7122                 if (!back)
7123                         goto out;
7124                 if (back->node.found_ref) {
7125                         if (rec->refs)
7126                                 rec->refs--;
7127                         back->node.found_ref = 0;
7128                 }
7129                 if (back->node.found_extent_tree) {
7130                         if (rec->extent_item_refs)
7131                                 rec->extent_item_refs--;
7132                         back->node.found_extent_tree = 0;
7133                 }
7134                 if (!back->node.found_extent_tree && back->node.found_ref) {
7135                         list_del(&back->node.list);
7136                         free(back);
7137                 }
7138         }
7139         maybe_free_extent_rec(extent_cache, rec);
7140 out:
7141         return 0;
7142 }
7143
7144 static int delete_extent_records(struct btrfs_trans_handle *trans,
7145                                  struct btrfs_root *root,
7146                                  struct btrfs_path *path,
7147                                  u64 bytenr, u64 new_len)
7148 {
7149         struct btrfs_key key;
7150         struct btrfs_key found_key;
7151         struct extent_buffer *leaf;
7152         int ret;
7153         int slot;
7154
7155
7156         key.objectid = bytenr;
7157         key.type = (u8)-1;
7158         key.offset = (u64)-1;
7159
7160         while(1) {
7161                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7162                                         &key, path, 0, 1);
7163                 if (ret < 0)
7164                         break;
7165
7166                 if (ret > 0) {
7167                         ret = 0;
7168                         if (path->slots[0] == 0)
7169                                 break;
7170                         path->slots[0]--;
7171                 }
7172                 ret = 0;
7173
7174                 leaf = path->nodes[0];
7175                 slot = path->slots[0];
7176
7177                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7178                 if (found_key.objectid != bytenr)
7179                         break;
7180
7181                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7182                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7183                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7184                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7185                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7186                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7187                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7188                         btrfs_release_path(path);
7189                         if (found_key.type == 0) {
7190                                 if (found_key.offset == 0)
7191                                         break;
7192                                 key.offset = found_key.offset - 1;
7193                                 key.type = found_key.type;
7194                         }
7195                         key.type = found_key.type - 1;
7196                         key.offset = (u64)-1;
7197                         continue;
7198                 }
7199
7200                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7201                         found_key.objectid, found_key.type, found_key.offset);
7202
7203                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7204                 if (ret)
7205                         break;
7206                 btrfs_release_path(path);
7207
7208                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7209                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
7210                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7211                                 found_key.offset : root->nodesize;
7212
7213                         ret = btrfs_update_block_group(trans, root, bytenr,
7214                                                        bytes, 0, 0);
7215                         if (ret)
7216                                 break;
7217                 }
7218         }
7219
7220         btrfs_release_path(path);
7221         return ret;
7222 }
7223
7224 /*
7225  * for a single backref, this will allocate a new extent
7226  * and add the backref to it.
7227  */
7228 static int record_extent(struct btrfs_trans_handle *trans,
7229                          struct btrfs_fs_info *info,
7230                          struct btrfs_path *path,
7231                          struct extent_record *rec,
7232                          struct extent_backref *back,
7233                          int allocated, u64 flags)
7234 {
7235         int ret;
7236         struct btrfs_root *extent_root = info->extent_root;
7237         struct extent_buffer *leaf;
7238         struct btrfs_key ins_key;
7239         struct btrfs_extent_item *ei;
7240         struct data_backref *dback;
7241         struct btrfs_tree_block_info *bi;
7242
7243         if (!back->is_data)
7244                 rec->max_size = max_t(u64, rec->max_size,
7245                                     info->extent_root->nodesize);
7246
7247         if (!allocated) {
7248                 u32 item_size = sizeof(*ei);
7249
7250                 if (!back->is_data)
7251                         item_size += sizeof(*bi);
7252
7253                 ins_key.objectid = rec->start;
7254                 ins_key.offset = rec->max_size;
7255                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7256
7257                 ret = btrfs_insert_empty_item(trans, extent_root, path,
7258                                         &ins_key, item_size);
7259                 if (ret)
7260                         goto fail;
7261
7262                 leaf = path->nodes[0];
7263                 ei = btrfs_item_ptr(leaf, path->slots[0],
7264                                     struct btrfs_extent_item);
7265
7266                 btrfs_set_extent_refs(leaf, ei, 0);
7267                 btrfs_set_extent_generation(leaf, ei, rec->generation);
7268
7269                 if (back->is_data) {
7270                         btrfs_set_extent_flags(leaf, ei,
7271                                                BTRFS_EXTENT_FLAG_DATA);
7272                 } else {
7273                         struct btrfs_disk_key copy_key;;
7274
7275                         bi = (struct btrfs_tree_block_info *)(ei + 1);
7276                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
7277                                              sizeof(*bi));
7278
7279                         btrfs_set_disk_key_objectid(&copy_key,
7280                                                     rec->info_objectid);
7281                         btrfs_set_disk_key_type(&copy_key, 0);
7282                         btrfs_set_disk_key_offset(&copy_key, 0);
7283
7284                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7285                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
7286
7287                         btrfs_set_extent_flags(leaf, ei,
7288                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7289                 }
7290
7291                 btrfs_mark_buffer_dirty(leaf);
7292                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7293                                                rec->max_size, 1, 0);
7294                 if (ret)
7295                         goto fail;
7296                 btrfs_release_path(path);
7297         }
7298
7299         if (back->is_data) {
7300                 u64 parent;
7301                 int i;
7302
7303                 dback = to_data_backref(back);
7304                 if (back->full_backref)
7305                         parent = dback->parent;
7306                 else
7307                         parent = 0;
7308
7309                 for (i = 0; i < dback->found_ref; i++) {
7310                         /* if parent != 0, we're doing a full backref
7311                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7312                          * just makes the backref allocator create a data
7313                          * backref
7314                          */
7315                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
7316                                                    rec->start, rec->max_size,
7317                                                    parent,
7318                                                    dback->root,
7319                                                    parent ?
7320                                                    BTRFS_FIRST_FREE_OBJECTID :
7321                                                    dback->owner,
7322                                                    dback->offset);
7323                         if (ret)
7324                                 break;
7325                 }
7326                 fprintf(stderr, "adding new data backref"
7327                                 " on %llu %s %llu owner %llu"
7328                                 " offset %llu found %d\n",
7329                                 (unsigned long long)rec->start,
7330                                 back->full_backref ?
7331                                 "parent" : "root",
7332                                 back->full_backref ?
7333                                 (unsigned long long)parent :
7334                                 (unsigned long long)dback->root,
7335                                 (unsigned long long)dback->owner,
7336                                 (unsigned long long)dback->offset,
7337                                 dback->found_ref);
7338         } else {
7339                 u64 parent;
7340                 struct tree_backref *tback;
7341
7342                 tback = to_tree_backref(back);
7343                 if (back->full_backref)
7344                         parent = tback->parent;
7345                 else
7346                         parent = 0;
7347
7348                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7349                                            rec->start, rec->max_size,
7350                                            parent, tback->root, 0, 0);
7351                 fprintf(stderr, "adding new tree backref on "
7352                         "start %llu len %llu parent %llu root %llu\n",
7353                         rec->start, rec->max_size, parent, tback->root);
7354         }
7355 fail:
7356         btrfs_release_path(path);
7357         return ret;
7358 }
7359
7360 static struct extent_entry *find_entry(struct list_head *entries,
7361                                        u64 bytenr, u64 bytes)
7362 {
7363         struct extent_entry *entry = NULL;
7364
7365         list_for_each_entry(entry, entries, list) {
7366                 if (entry->bytenr == bytenr && entry->bytes == bytes)
7367                         return entry;
7368         }
7369
7370         return NULL;
7371 }
7372
7373 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7374 {
7375         struct extent_entry *entry, *best = NULL, *prev = NULL;
7376
7377         list_for_each_entry(entry, entries, list) {
7378                 /*
7379                  * If there are as many broken entries as entries then we know
7380                  * not to trust this particular entry.
7381                  */
7382                 if (entry->broken == entry->count)
7383                         continue;
7384
7385                 /*
7386                  * Special case, when there are only two entries and 'best' is
7387                  * the first one
7388                  */
7389                 if (!prev) {
7390                         best = entry;
7391                         prev = entry;
7392                         continue;
7393                 }
7394
7395                 /*
7396                  * If our current entry == best then we can't be sure our best
7397                  * is really the best, so we need to keep searching.
7398                  */
7399                 if (best && best->count == entry->count) {
7400                         prev = entry;
7401                         best = NULL;
7402                         continue;
7403                 }
7404
7405                 /* Prev == entry, not good enough, have to keep searching */
7406                 if (!prev->broken && prev->count == entry->count)
7407                         continue;
7408
7409                 if (!best)
7410                         best = (prev->count > entry->count) ? prev : entry;
7411                 else if (best->count < entry->count)
7412                         best = entry;
7413                 prev = entry;
7414         }
7415
7416         return best;
7417 }
7418
7419 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7420                       struct data_backref *dback, struct extent_entry *entry)
7421 {
7422         struct btrfs_trans_handle *trans;
7423         struct btrfs_root *root;
7424         struct btrfs_file_extent_item *fi;
7425         struct extent_buffer *leaf;
7426         struct btrfs_key key;
7427         u64 bytenr, bytes;
7428         int ret, err;
7429
7430         key.objectid = dback->root;
7431         key.type = BTRFS_ROOT_ITEM_KEY;
7432         key.offset = (u64)-1;
7433         root = btrfs_read_fs_root(info, &key);
7434         if (IS_ERR(root)) {
7435                 fprintf(stderr, "Couldn't find root for our ref\n");
7436                 return -EINVAL;
7437         }
7438
7439         /*
7440          * The backref points to the original offset of the extent if it was
7441          * split, so we need to search down to the offset we have and then walk
7442          * forward until we find the backref we're looking for.
7443          */
7444         key.objectid = dback->owner;
7445         key.type = BTRFS_EXTENT_DATA_KEY;
7446         key.offset = dback->offset;
7447         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7448         if (ret < 0) {
7449                 fprintf(stderr, "Error looking up ref %d\n", ret);
7450                 return ret;
7451         }
7452
7453         while (1) {
7454                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7455                         ret = btrfs_next_leaf(root, path);
7456                         if (ret) {
7457                                 fprintf(stderr, "Couldn't find our ref, next\n");
7458                                 return -EINVAL;
7459                         }
7460                 }
7461                 leaf = path->nodes[0];
7462                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7463                 if (key.objectid != dback->owner ||
7464                     key.type != BTRFS_EXTENT_DATA_KEY) {
7465                         fprintf(stderr, "Couldn't find our ref, search\n");
7466                         return -EINVAL;
7467                 }
7468                 fi = btrfs_item_ptr(leaf, path->slots[0],
7469                                     struct btrfs_file_extent_item);
7470                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7471                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7472
7473                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7474                         break;
7475                 path->slots[0]++;
7476         }
7477
7478         btrfs_release_path(path);
7479
7480         trans = btrfs_start_transaction(root, 1);
7481         if (IS_ERR(trans))
7482                 return PTR_ERR(trans);
7483
7484         /*
7485          * Ok we have the key of the file extent we want to fix, now we can cow
7486          * down to the thing and fix it.
7487          */
7488         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7489         if (ret < 0) {
7490                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7491                         key.objectid, key.type, key.offset, ret);
7492                 goto out;
7493         }
7494         if (ret > 0) {
7495                 fprintf(stderr, "Well that's odd, we just found this key "
7496                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7497                         key.offset);
7498                 ret = -EINVAL;
7499                 goto out;
7500         }
7501         leaf = path->nodes[0];
7502         fi = btrfs_item_ptr(leaf, path->slots[0],
7503                             struct btrfs_file_extent_item);
7504
7505         if (btrfs_file_extent_compression(leaf, fi) &&
7506             dback->disk_bytenr != entry->bytenr) {
7507                 fprintf(stderr, "Ref doesn't match the record start and is "
7508                         "compressed, please take a btrfs-image of this file "
7509                         "system and send it to a btrfs developer so they can "
7510                         "complete this functionality for bytenr %Lu\n",
7511                         dback->disk_bytenr);
7512                 ret = -EINVAL;
7513                 goto out;
7514         }
7515
7516         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7517                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7518         } else if (dback->disk_bytenr > entry->bytenr) {
7519                 u64 off_diff, offset;
7520
7521                 off_diff = dback->disk_bytenr - entry->bytenr;
7522                 offset = btrfs_file_extent_offset(leaf, fi);
7523                 if (dback->disk_bytenr + offset +
7524                     btrfs_file_extent_num_bytes(leaf, fi) >
7525                     entry->bytenr + entry->bytes) {
7526                         fprintf(stderr, "Ref is past the entry end, please "
7527                                 "take a btrfs-image of this file system and "
7528                                 "send it to a btrfs developer, ref %Lu\n",
7529                                 dback->disk_bytenr);
7530                         ret = -EINVAL;
7531                         goto out;
7532                 }
7533                 offset += off_diff;
7534                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7535                 btrfs_set_file_extent_offset(leaf, fi, offset);
7536         } else if (dback->disk_bytenr < entry->bytenr) {
7537                 u64 offset;
7538
7539                 offset = btrfs_file_extent_offset(leaf, fi);
7540                 if (dback->disk_bytenr + offset < entry->bytenr) {
7541                         fprintf(stderr, "Ref is before the entry start, please"
7542                                 " take a btrfs-image of this file system and "
7543                                 "send it to a btrfs developer, ref %Lu\n",
7544                                 dback->disk_bytenr);
7545                         ret = -EINVAL;
7546                         goto out;
7547                 }
7548
7549                 offset += dback->disk_bytenr;
7550                 offset -= entry->bytenr;
7551                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7552                 btrfs_set_file_extent_offset(leaf, fi, offset);
7553         }
7554
7555         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7556
7557         /*
7558          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7559          * only do this if we aren't using compression, otherwise it's a
7560          * trickier case.
7561          */
7562         if (!btrfs_file_extent_compression(leaf, fi))
7563                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7564         else
7565                 printf("ram bytes may be wrong?\n");
7566         btrfs_mark_buffer_dirty(leaf);
7567 out:
7568         err = btrfs_commit_transaction(trans, root);
7569         btrfs_release_path(path);
7570         return ret ? ret : err;
7571 }
7572
7573 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7574                            struct extent_record *rec)
7575 {
7576         struct extent_backref *back;
7577         struct data_backref *dback;
7578         struct extent_entry *entry, *best = NULL;
7579         LIST_HEAD(entries);
7580         int nr_entries = 0;
7581         int broken_entries = 0;
7582         int ret = 0;
7583         short mismatch = 0;
7584
7585         /*
7586          * Metadata is easy and the backrefs should always agree on bytenr and
7587          * size, if not we've got bigger issues.
7588          */
7589         if (rec->metadata)
7590                 return 0;
7591
7592         list_for_each_entry(back, &rec->backrefs, list) {
7593                 if (back->full_backref || !back->is_data)
7594                         continue;
7595
7596                 dback = to_data_backref(back);
7597
7598                 /*
7599                  * We only pay attention to backrefs that we found a real
7600                  * backref for.
7601                  */
7602                 if (dback->found_ref == 0)
7603                         continue;
7604
7605                 /*
7606                  * For now we only catch when the bytes don't match, not the
7607                  * bytenr.  We can easily do this at the same time, but I want
7608                  * to have a fs image to test on before we just add repair
7609                  * functionality willy-nilly so we know we won't screw up the
7610                  * repair.
7611                  */
7612
7613                 entry = find_entry(&entries, dback->disk_bytenr,
7614                                    dback->bytes);
7615                 if (!entry) {
7616                         entry = malloc(sizeof(struct extent_entry));
7617                         if (!entry) {
7618                                 ret = -ENOMEM;
7619                                 goto out;
7620                         }
7621                         memset(entry, 0, sizeof(*entry));
7622                         entry->bytenr = dback->disk_bytenr;
7623                         entry->bytes = dback->bytes;
7624                         list_add_tail(&entry->list, &entries);
7625                         nr_entries++;
7626                 }
7627
7628                 /*
7629                  * If we only have on entry we may think the entries agree when
7630                  * in reality they don't so we have to do some extra checking.
7631                  */
7632                 if (dback->disk_bytenr != rec->start ||
7633                     dback->bytes != rec->nr || back->broken)
7634                         mismatch = 1;
7635
7636                 if (back->broken) {
7637                         entry->broken++;
7638                         broken_entries++;
7639                 }
7640
7641                 entry->count++;
7642         }
7643
7644         /* Yay all the backrefs agree, carry on good sir */
7645         if (nr_entries <= 1 && !mismatch)
7646                 goto out;
7647
7648         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7649                 "%Lu\n", rec->start);
7650
7651         /*
7652          * First we want to see if the backrefs can agree amongst themselves who
7653          * is right, so figure out which one of the entries has the highest
7654          * count.
7655          */
7656         best = find_most_right_entry(&entries);
7657
7658         /*
7659          * Ok so we may have an even split between what the backrefs think, so
7660          * this is where we use the extent ref to see what it thinks.
7661          */
7662         if (!best) {
7663                 entry = find_entry(&entries, rec->start, rec->nr);
7664                 if (!entry && (!broken_entries || !rec->found_rec)) {
7665                         fprintf(stderr, "Backrefs don't agree with each other "
7666                                 "and extent record doesn't agree with anybody,"
7667                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7668                                 rec->start, rec->nr);
7669                         ret = -EINVAL;
7670                         goto out;
7671                 } else if (!entry) {
7672                         /*
7673                          * Ok our backrefs were broken, we'll assume this is the
7674                          * correct value and add an entry for this range.
7675                          */
7676                         entry = malloc(sizeof(struct extent_entry));
7677                         if (!entry) {
7678                                 ret = -ENOMEM;
7679                                 goto out;
7680                         }
7681                         memset(entry, 0, sizeof(*entry));
7682                         entry->bytenr = rec->start;
7683                         entry->bytes = rec->nr;
7684                         list_add_tail(&entry->list, &entries);
7685                         nr_entries++;
7686                 }
7687                 entry->count++;
7688                 best = find_most_right_entry(&entries);
7689                 if (!best) {
7690                         fprintf(stderr, "Backrefs and extent record evenly "
7691                                 "split on who is right, this is going to "
7692                                 "require user input to fix bytenr %Lu bytes "
7693                                 "%Lu\n", rec->start, rec->nr);
7694                         ret = -EINVAL;
7695                         goto out;
7696                 }
7697         }
7698
7699         /*
7700          * I don't think this can happen currently as we'll abort() if we catch
7701          * this case higher up, but in case somebody removes that we still can't
7702          * deal with it properly here yet, so just bail out of that's the case.
7703          */
7704         if (best->bytenr != rec->start) {
7705                 fprintf(stderr, "Extent start and backref starts don't match, "
7706                         "please use btrfs-image on this file system and send "
7707                         "it to a btrfs developer so they can make fsck fix "
7708                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7709                         rec->start, rec->nr);
7710                 ret = -EINVAL;
7711                 goto out;
7712         }
7713
7714         /*
7715          * Ok great we all agreed on an extent record, let's go find the real
7716          * references and fix up the ones that don't match.
7717          */
7718         list_for_each_entry(back, &rec->backrefs, list) {
7719                 if (back->full_backref || !back->is_data)
7720                         continue;
7721
7722                 dback = to_data_backref(back);
7723
7724                 /*
7725                  * Still ignoring backrefs that don't have a real ref attached
7726                  * to them.
7727                  */
7728                 if (dback->found_ref == 0)
7729                         continue;
7730
7731                 if (dback->bytes == best->bytes &&
7732                     dback->disk_bytenr == best->bytenr)
7733                         continue;
7734
7735                 ret = repair_ref(info, path, dback, best);
7736                 if (ret)
7737                         goto out;
7738         }
7739
7740         /*
7741          * Ok we messed with the actual refs, which means we need to drop our
7742          * entire cache and go back and rescan.  I know this is a huge pain and
7743          * adds a lot of extra work, but it's the only way to be safe.  Once all
7744          * the backrefs agree we may not need to do anything to the extent
7745          * record itself.
7746          */
7747         ret = -EAGAIN;
7748 out:
7749         while (!list_empty(&entries)) {
7750                 entry = list_entry(entries.next, struct extent_entry, list);
7751                 list_del_init(&entry->list);
7752                 free(entry);
7753         }
7754         return ret;
7755 }
7756
7757 static int process_duplicates(struct btrfs_root *root,
7758                               struct cache_tree *extent_cache,
7759                               struct extent_record *rec)
7760 {
7761         struct extent_record *good, *tmp;
7762         struct cache_extent *cache;
7763         int ret;
7764
7765         /*
7766          * If we found a extent record for this extent then return, or if we
7767          * have more than one duplicate we are likely going to need to delete
7768          * something.
7769          */
7770         if (rec->found_rec || rec->num_duplicates > 1)
7771                 return 0;
7772
7773         /* Shouldn't happen but just in case */
7774         BUG_ON(!rec->num_duplicates);
7775
7776         /*
7777          * So this happens if we end up with a backref that doesn't match the
7778          * actual extent entry.  So either the backref is bad or the extent
7779          * entry is bad.  Either way we want to have the extent_record actually
7780          * reflect what we found in the extent_tree, so we need to take the
7781          * duplicate out and use that as the extent_record since the only way we
7782          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7783          */
7784         remove_cache_extent(extent_cache, &rec->cache);
7785
7786         good = to_extent_record(rec->dups.next);
7787         list_del_init(&good->list);
7788         INIT_LIST_HEAD(&good->backrefs);
7789         INIT_LIST_HEAD(&good->dups);
7790         good->cache.start = good->start;
7791         good->cache.size = good->nr;
7792         good->content_checked = 0;
7793         good->owner_ref_checked = 0;
7794         good->num_duplicates = 0;
7795         good->refs = rec->refs;
7796         list_splice_init(&rec->backrefs, &good->backrefs);
7797         while (1) {
7798                 cache = lookup_cache_extent(extent_cache, good->start,
7799                                             good->nr);
7800                 if (!cache)
7801                         break;
7802                 tmp = container_of(cache, struct extent_record, cache);
7803
7804                 /*
7805                  * If we find another overlapping extent and it's found_rec is
7806                  * set then it's a duplicate and we need to try and delete
7807                  * something.
7808                  */
7809                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7810                         if (list_empty(&good->list))
7811                                 list_add_tail(&good->list,
7812                                               &duplicate_extents);
7813                         good->num_duplicates += tmp->num_duplicates + 1;
7814                         list_splice_init(&tmp->dups, &good->dups);
7815                         list_del_init(&tmp->list);
7816                         list_add_tail(&tmp->list, &good->dups);
7817                         remove_cache_extent(extent_cache, &tmp->cache);
7818                         continue;
7819                 }
7820
7821                 /*
7822                  * Ok we have another non extent item backed extent rec, so lets
7823                  * just add it to this extent and carry on like we did above.
7824                  */
7825                 good->refs += tmp->refs;
7826                 list_splice_init(&tmp->backrefs, &good->backrefs);
7827                 remove_cache_extent(extent_cache, &tmp->cache);
7828                 free(tmp);
7829         }
7830         ret = insert_cache_extent(extent_cache, &good->cache);
7831         BUG_ON(ret);
7832         free(rec);
7833         return good->num_duplicates ? 0 : 1;
7834 }
7835
7836 static int delete_duplicate_records(struct btrfs_root *root,
7837                                     struct extent_record *rec)
7838 {
7839         struct btrfs_trans_handle *trans;
7840         LIST_HEAD(delete_list);
7841         struct btrfs_path path;
7842         struct extent_record *tmp, *good, *n;
7843         int nr_del = 0;
7844         int ret = 0, err;
7845         struct btrfs_key key;
7846
7847         btrfs_init_path(&path);
7848
7849         good = rec;
7850         /* Find the record that covers all of the duplicates. */
7851         list_for_each_entry(tmp, &rec->dups, list) {
7852                 if (good->start < tmp->start)
7853                         continue;
7854                 if (good->nr > tmp->nr)
7855                         continue;
7856
7857                 if (tmp->start + tmp->nr < good->start + good->nr) {
7858                         fprintf(stderr, "Ok we have overlapping extents that "
7859                                 "aren't completely covered by each other, this "
7860                                 "is going to require more careful thought.  "
7861                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7862                                 tmp->start, tmp->nr, good->start, good->nr);
7863                         abort();
7864                 }
7865                 good = tmp;
7866         }
7867
7868         if (good != rec)
7869                 list_add_tail(&rec->list, &delete_list);
7870
7871         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7872                 if (tmp == good)
7873                         continue;
7874                 list_move_tail(&tmp->list, &delete_list);
7875         }
7876
7877         root = root->fs_info->extent_root;
7878         trans = btrfs_start_transaction(root, 1);
7879         if (IS_ERR(trans)) {
7880                 ret = PTR_ERR(trans);
7881                 goto out;
7882         }
7883
7884         list_for_each_entry(tmp, &delete_list, list) {
7885                 if (tmp->found_rec == 0)
7886                         continue;
7887                 key.objectid = tmp->start;
7888                 key.type = BTRFS_EXTENT_ITEM_KEY;
7889                 key.offset = tmp->nr;
7890
7891                 /* Shouldn't happen but just in case */
7892                 if (tmp->metadata) {
7893                         fprintf(stderr, "Well this shouldn't happen, extent "
7894                                 "record overlaps but is metadata? "
7895                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7896                         abort();
7897                 }
7898
7899                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7900                 if (ret) {
7901                         if (ret > 0)
7902                                 ret = -EINVAL;
7903                         break;
7904                 }
7905                 ret = btrfs_del_item(trans, root, &path);
7906                 if (ret)
7907                         break;
7908                 btrfs_release_path(&path);
7909                 nr_del++;
7910         }
7911         err = btrfs_commit_transaction(trans, root);
7912         if (err && !ret)
7913                 ret = err;
7914 out:
7915         while (!list_empty(&delete_list)) {
7916                 tmp = to_extent_record(delete_list.next);
7917                 list_del_init(&tmp->list);
7918                 if (tmp == rec)
7919                         continue;
7920                 free(tmp);
7921         }
7922
7923         while (!list_empty(&rec->dups)) {
7924                 tmp = to_extent_record(rec->dups.next);
7925                 list_del_init(&tmp->list);
7926                 free(tmp);
7927         }
7928
7929         btrfs_release_path(&path);
7930
7931         if (!ret && !nr_del)
7932                 rec->num_duplicates = 0;
7933
7934         return ret ? ret : nr_del;
7935 }
7936
7937 static int find_possible_backrefs(struct btrfs_fs_info *info,
7938                                   struct btrfs_path *path,
7939                                   struct cache_tree *extent_cache,
7940                                   struct extent_record *rec)
7941 {
7942         struct btrfs_root *root;
7943         struct extent_backref *back;
7944         struct data_backref *dback;
7945         struct cache_extent *cache;
7946         struct btrfs_file_extent_item *fi;
7947         struct btrfs_key key;
7948         u64 bytenr, bytes;
7949         int ret;
7950
7951         list_for_each_entry(back, &rec->backrefs, list) {
7952                 /* Don't care about full backrefs (poor unloved backrefs) */
7953                 if (back->full_backref || !back->is_data)
7954                         continue;
7955
7956                 dback = to_data_backref(back);
7957
7958                 /* We found this one, we don't need to do a lookup */
7959                 if (dback->found_ref)
7960                         continue;
7961
7962                 key.objectid = dback->root;
7963                 key.type = BTRFS_ROOT_ITEM_KEY;
7964                 key.offset = (u64)-1;
7965
7966                 root = btrfs_read_fs_root(info, &key);
7967
7968                 /* No root, definitely a bad ref, skip */
7969                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7970                         continue;
7971                 /* Other err, exit */
7972                 if (IS_ERR(root))
7973                         return PTR_ERR(root);
7974
7975                 key.objectid = dback->owner;
7976                 key.type = BTRFS_EXTENT_DATA_KEY;
7977                 key.offset = dback->offset;
7978                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7979                 if (ret) {
7980                         btrfs_release_path(path);
7981                         if (ret < 0)
7982                                 return ret;
7983                         /* Didn't find it, we can carry on */
7984                         ret = 0;
7985                         continue;
7986                 }
7987
7988                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7989                                     struct btrfs_file_extent_item);
7990                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7991                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7992                 btrfs_release_path(path);
7993                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7994                 if (cache) {
7995                         struct extent_record *tmp;
7996                         tmp = container_of(cache, struct extent_record, cache);
7997
7998                         /*
7999                          * If we found an extent record for the bytenr for this
8000                          * particular backref then we can't add it to our
8001                          * current extent record.  We only want to add backrefs
8002                          * that don't have a corresponding extent item in the
8003                          * extent tree since they likely belong to this record
8004                          * and we need to fix it if it doesn't match bytenrs.
8005                          */
8006                         if  (tmp->found_rec)
8007                                 continue;
8008                 }
8009
8010                 dback->found_ref += 1;
8011                 dback->disk_bytenr = bytenr;
8012                 dback->bytes = bytes;
8013
8014                 /*
8015                  * Set this so the verify backref code knows not to trust the
8016                  * values in this backref.
8017                  */
8018                 back->broken = 1;
8019         }
8020
8021         return 0;
8022 }
8023
8024 /*
8025  * Record orphan data ref into corresponding root.
8026  *
8027  * Return 0 if the extent item contains data ref and recorded.
8028  * Return 1 if the extent item contains no useful data ref
8029  *   On that case, it may contains only shared_dataref or metadata backref
8030  *   or the file extent exists(this should be handled by the extent bytenr
8031  *   recovery routine)
8032  * Return <0 if something goes wrong.
8033  */
8034 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8035                                       struct extent_record *rec)
8036 {
8037         struct btrfs_key key;
8038         struct btrfs_root *dest_root;
8039         struct extent_backref *back;
8040         struct data_backref *dback;
8041         struct orphan_data_extent *orphan;
8042         struct btrfs_path path;
8043         int recorded_data_ref = 0;
8044         int ret = 0;
8045
8046         if (rec->metadata)
8047                 return 1;
8048         btrfs_init_path(&path);
8049         list_for_each_entry(back, &rec->backrefs, list) {
8050                 if (back->full_backref || !back->is_data ||
8051                     !back->found_extent_tree)
8052                         continue;
8053                 dback = to_data_backref(back);
8054                 if (dback->found_ref)
8055                         continue;
8056                 key.objectid = dback->root;
8057                 key.type = BTRFS_ROOT_ITEM_KEY;
8058                 key.offset = (u64)-1;
8059
8060                 dest_root = btrfs_read_fs_root(fs_info, &key);
8061
8062                 /* For non-exist root we just skip it */
8063                 if (IS_ERR(dest_root) || !dest_root)
8064                         continue;
8065
8066                 key.objectid = dback->owner;
8067                 key.type = BTRFS_EXTENT_DATA_KEY;
8068                 key.offset = dback->offset;
8069
8070                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8071                 btrfs_release_path(&path);
8072                 /*
8073                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8074                  * we need to record it for inode/file extent rebuild.
8075                  * For ret > 0, we record it only for file extent rebuild.
8076                  * For ret == 0, the file extent exists but only bytenr
8077                  * mismatch, let the original bytenr fix routine to handle,
8078                  * don't record it.
8079                  */
8080                 if (ret == 0)
8081                         continue;
8082                 ret = 0;
8083                 orphan = malloc(sizeof(*orphan));
8084                 if (!orphan) {
8085                         ret = -ENOMEM;
8086                         goto out;
8087                 }
8088                 INIT_LIST_HEAD(&orphan->list);
8089                 orphan->root = dback->root;
8090                 orphan->objectid = dback->owner;
8091                 orphan->offset = dback->offset;
8092                 orphan->disk_bytenr = rec->cache.start;
8093                 orphan->disk_len = rec->cache.size;
8094                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8095                 recorded_data_ref = 1;
8096         }
8097 out:
8098         btrfs_release_path(&path);
8099         if (!ret)
8100                 return !recorded_data_ref;
8101         else
8102                 return ret;
8103 }
8104
8105 /*
8106  * when an incorrect extent item is found, this will delete
8107  * all of the existing entries for it and recreate them
8108  * based on what the tree scan found.
8109  */
8110 static int fixup_extent_refs(struct btrfs_fs_info *info,
8111                              struct cache_tree *extent_cache,
8112                              struct extent_record *rec)
8113 {
8114         struct btrfs_trans_handle *trans = NULL;
8115         int ret;
8116         struct btrfs_path path;
8117         struct list_head *cur = rec->backrefs.next;
8118         struct cache_extent *cache;
8119         struct extent_backref *back;
8120         int allocated = 0;
8121         u64 flags = 0;
8122
8123         if (rec->flag_block_full_backref)
8124                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8125
8126         btrfs_init_path(&path);
8127         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8128                 /*
8129                  * Sometimes the backrefs themselves are so broken they don't
8130                  * get attached to any meaningful rec, so first go back and
8131                  * check any of our backrefs that we couldn't find and throw
8132                  * them into the list if we find the backref so that
8133                  * verify_backrefs can figure out what to do.
8134                  */
8135                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8136                 if (ret < 0)
8137                         goto out;
8138         }
8139
8140         /* step one, make sure all of the backrefs agree */
8141         ret = verify_backrefs(info, &path, rec);
8142         if (ret < 0)
8143                 goto out;
8144
8145         trans = btrfs_start_transaction(info->extent_root, 1);
8146         if (IS_ERR(trans)) {
8147                 ret = PTR_ERR(trans);
8148                 goto out;
8149         }
8150
8151         /* step two, delete all the existing records */
8152         ret = delete_extent_records(trans, info->extent_root, &path,
8153                                     rec->start, rec->max_size);
8154
8155         if (ret < 0)
8156                 goto out;
8157
8158         /* was this block corrupt?  If so, don't add references to it */
8159         cache = lookup_cache_extent(info->corrupt_blocks,
8160                                     rec->start, rec->max_size);
8161         if (cache) {
8162                 ret = 0;
8163                 goto out;
8164         }
8165
8166         /* step three, recreate all the refs we did find */
8167         while(cur != &rec->backrefs) {
8168                 back = to_extent_backref(cur);
8169                 cur = cur->next;
8170
8171                 /*
8172                  * if we didn't find any references, don't create a
8173                  * new extent record
8174                  */
8175                 if (!back->found_ref)
8176                         continue;
8177
8178                 rec->bad_full_backref = 0;
8179                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8180                 allocated = 1;
8181
8182                 if (ret)
8183                         goto out;
8184         }
8185 out:
8186         if (trans) {
8187                 int err = btrfs_commit_transaction(trans, info->extent_root);
8188                 if (!ret)
8189                         ret = err;
8190         }
8191
8192         btrfs_release_path(&path);
8193         return ret;
8194 }
8195
8196 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8197                               struct extent_record *rec)
8198 {
8199         struct btrfs_trans_handle *trans;
8200         struct btrfs_root *root = fs_info->extent_root;
8201         struct btrfs_path path;
8202         struct btrfs_extent_item *ei;
8203         struct btrfs_key key;
8204         u64 flags;
8205         int ret = 0;
8206
8207         key.objectid = rec->start;
8208         if (rec->metadata) {
8209                 key.type = BTRFS_METADATA_ITEM_KEY;
8210                 key.offset = rec->info_level;
8211         } else {
8212                 key.type = BTRFS_EXTENT_ITEM_KEY;
8213                 key.offset = rec->max_size;
8214         }
8215
8216         trans = btrfs_start_transaction(root, 0);
8217         if (IS_ERR(trans))
8218                 return PTR_ERR(trans);
8219
8220         btrfs_init_path(&path);
8221         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8222         if (ret < 0) {
8223                 btrfs_release_path(&path);
8224                 btrfs_commit_transaction(trans, root);
8225                 return ret;
8226         } else if (ret) {
8227                 fprintf(stderr, "Didn't find extent for %llu\n",
8228                         (unsigned long long)rec->start);
8229                 btrfs_release_path(&path);
8230                 btrfs_commit_transaction(trans, root);
8231                 return -ENOENT;
8232         }
8233
8234         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8235                             struct btrfs_extent_item);
8236         flags = btrfs_extent_flags(path.nodes[0], ei);
8237         if (rec->flag_block_full_backref) {
8238                 fprintf(stderr, "setting full backref on %llu\n",
8239                         (unsigned long long)key.objectid);
8240                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8241         } else {
8242                 fprintf(stderr, "clearing full backref on %llu\n",
8243                         (unsigned long long)key.objectid);
8244                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8245         }
8246         btrfs_set_extent_flags(path.nodes[0], ei, flags);
8247         btrfs_mark_buffer_dirty(path.nodes[0]);
8248         btrfs_release_path(&path);
8249         return btrfs_commit_transaction(trans, root);
8250 }
8251
8252 /* right now we only prune from the extent allocation tree */
8253 static int prune_one_block(struct btrfs_trans_handle *trans,
8254                            struct btrfs_fs_info *info,
8255                            struct btrfs_corrupt_block *corrupt)
8256 {
8257         int ret;
8258         struct btrfs_path path;
8259         struct extent_buffer *eb;
8260         u64 found;
8261         int slot;
8262         int nritems;
8263         int level = corrupt->level + 1;
8264
8265         btrfs_init_path(&path);
8266 again:
8267         /* we want to stop at the parent to our busted block */
8268         path.lowest_level = level;
8269
8270         ret = btrfs_search_slot(trans, info->extent_root,
8271                                 &corrupt->key, &path, -1, 1);
8272
8273         if (ret < 0)
8274                 goto out;
8275
8276         eb = path.nodes[level];
8277         if (!eb) {
8278                 ret = -ENOENT;
8279                 goto out;
8280         }
8281
8282         /*
8283          * hopefully the search gave us the block we want to prune,
8284          * lets try that first
8285          */
8286         slot = path.slots[level];
8287         found =  btrfs_node_blockptr(eb, slot);
8288         if (found == corrupt->cache.start)
8289                 goto del_ptr;
8290
8291         nritems = btrfs_header_nritems(eb);
8292
8293         /* the search failed, lets scan this node and hope we find it */
8294         for (slot = 0; slot < nritems; slot++) {
8295                 found =  btrfs_node_blockptr(eb, slot);
8296                 if (found == corrupt->cache.start)
8297                         goto del_ptr;
8298         }
8299         /*
8300          * we couldn't find the bad block.  TODO, search all the nodes for pointers
8301          * to this block
8302          */
8303         if (eb == info->extent_root->node) {
8304                 ret = -ENOENT;
8305                 goto out;
8306         } else {
8307                 level++;
8308                 btrfs_release_path(&path);
8309                 goto again;
8310         }
8311
8312 del_ptr:
8313         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8314         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8315
8316 out:
8317         btrfs_release_path(&path);
8318         return ret;
8319 }
8320
8321 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8322 {
8323         struct btrfs_trans_handle *trans = NULL;
8324         struct cache_extent *cache;
8325         struct btrfs_corrupt_block *corrupt;
8326
8327         while (1) {
8328                 cache = search_cache_extent(info->corrupt_blocks, 0);
8329                 if (!cache)
8330                         break;
8331                 if (!trans) {
8332                         trans = btrfs_start_transaction(info->extent_root, 1);
8333                         if (IS_ERR(trans))
8334                                 return PTR_ERR(trans);
8335                 }
8336                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8337                 prune_one_block(trans, info, corrupt);
8338                 remove_cache_extent(info->corrupt_blocks, cache);
8339         }
8340         if (trans)
8341                 return btrfs_commit_transaction(trans, info->extent_root);
8342         return 0;
8343 }
8344
8345 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8346 {
8347         struct btrfs_block_group_cache *cache;
8348         u64 start, end;
8349         int ret;
8350
8351         while (1) {
8352                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8353                                             &start, &end, EXTENT_DIRTY);
8354                 if (ret)
8355                         break;
8356                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8357                                    GFP_NOFS);
8358         }
8359
8360         start = 0;
8361         while (1) {
8362                 cache = btrfs_lookup_first_block_group(fs_info, start);
8363                 if (!cache)
8364                         break;
8365                 if (cache->cached)
8366                         cache->cached = 0;
8367                 start = cache->key.objectid + cache->key.offset;
8368         }
8369 }
8370
8371 static int check_extent_refs(struct btrfs_root *root,
8372                              struct cache_tree *extent_cache)
8373 {
8374         struct extent_record *rec;
8375         struct cache_extent *cache;
8376         int err = 0;
8377         int ret = 0;
8378         int fixed = 0;
8379         int had_dups = 0;
8380         int recorded = 0;
8381
8382         if (repair) {
8383                 /*
8384                  * if we're doing a repair, we have to make sure
8385                  * we don't allocate from the problem extents.
8386                  * In the worst case, this will be all the
8387                  * extents in the FS
8388                  */
8389                 cache = search_cache_extent(extent_cache, 0);
8390                 while(cache) {
8391                         rec = container_of(cache, struct extent_record, cache);
8392                         set_extent_dirty(root->fs_info->excluded_extents,
8393                                          rec->start,
8394                                          rec->start + rec->max_size - 1,
8395                                          GFP_NOFS);
8396                         cache = next_cache_extent(cache);
8397                 }
8398
8399                 /* pin down all the corrupted blocks too */
8400                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8401                 while(cache) {
8402                         set_extent_dirty(root->fs_info->excluded_extents,
8403                                          cache->start,
8404                                          cache->start + cache->size - 1,
8405                                          GFP_NOFS);
8406                         cache = next_cache_extent(cache);
8407                 }
8408                 prune_corrupt_blocks(root->fs_info);
8409                 reset_cached_block_groups(root->fs_info);
8410         }
8411
8412         reset_cached_block_groups(root->fs_info);
8413
8414         /*
8415          * We need to delete any duplicate entries we find first otherwise we
8416          * could mess up the extent tree when we have backrefs that actually
8417          * belong to a different extent item and not the weird duplicate one.
8418          */
8419         while (repair && !list_empty(&duplicate_extents)) {
8420                 rec = to_extent_record(duplicate_extents.next);
8421                 list_del_init(&rec->list);
8422
8423                 /* Sometimes we can find a backref before we find an actual
8424                  * extent, so we need to process it a little bit to see if there
8425                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8426                  * if this is a backref screwup.  If we need to delete stuff
8427                  * process_duplicates() will return 0, otherwise it will return
8428                  * 1 and we
8429                  */
8430                 if (process_duplicates(root, extent_cache, rec))
8431                         continue;
8432                 ret = delete_duplicate_records(root, rec);
8433                 if (ret < 0)
8434                         return ret;
8435                 /*
8436                  * delete_duplicate_records will return the number of entries
8437                  * deleted, so if it's greater than 0 then we know we actually
8438                  * did something and we need to remove.
8439                  */
8440                 if (ret)
8441                         had_dups = 1;
8442         }
8443
8444         if (had_dups)
8445                 return -EAGAIN;
8446
8447         while(1) {
8448                 int cur_err = 0;
8449
8450                 fixed = 0;
8451                 recorded = 0;
8452                 cache = search_cache_extent(extent_cache, 0);
8453                 if (!cache)
8454                         break;
8455                 rec = container_of(cache, struct extent_record, cache);
8456                 if (rec->num_duplicates) {
8457                         fprintf(stderr, "extent item %llu has multiple extent "
8458                                 "items\n", (unsigned long long)rec->start);
8459                         err = 1;
8460                         cur_err = 1;
8461                 }
8462
8463                 if (rec->refs != rec->extent_item_refs) {
8464                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8465                                 (unsigned long long)rec->start,
8466                                 (unsigned long long)rec->nr);
8467                         fprintf(stderr, "extent item %llu, found %llu\n",
8468                                 (unsigned long long)rec->extent_item_refs,
8469                                 (unsigned long long)rec->refs);
8470                         ret = record_orphan_data_extents(root->fs_info, rec);
8471                         if (ret < 0)
8472                                 goto repair_abort;
8473                         if (ret == 0) {
8474                                 recorded = 1;
8475                         } else {
8476                                 /*
8477                                  * we can't use the extent to repair file
8478                                  * extent, let the fallback method handle it.
8479                                  */
8480                                 if (!fixed && repair) {
8481                                         ret = fixup_extent_refs(
8482                                                         root->fs_info,
8483                                                         extent_cache, rec);
8484                                         if (ret)
8485                                                 goto repair_abort;
8486                                         fixed = 1;
8487                                 }
8488                         }
8489                         err = 1;
8490                         cur_err = 1;
8491                 }
8492                 if (all_backpointers_checked(rec, 1)) {
8493                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8494                                 (unsigned long long)rec->start,
8495                                 (unsigned long long)rec->nr);
8496
8497                         if (!fixed && !recorded && repair) {
8498                                 ret = fixup_extent_refs(root->fs_info,
8499                                                         extent_cache, rec);
8500                                 if (ret)
8501                                         goto repair_abort;
8502                                 fixed = 1;
8503                         }
8504                         cur_err = 1;
8505                         err = 1;
8506                 }
8507                 if (!rec->owner_ref_checked) {
8508                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8509                                 (unsigned long long)rec->start,
8510                                 (unsigned long long)rec->nr);
8511                         if (!fixed && !recorded && repair) {
8512                                 ret = fixup_extent_refs(root->fs_info,
8513                                                         extent_cache, rec);
8514                                 if (ret)
8515                                         goto repair_abort;
8516                                 fixed = 1;
8517                         }
8518                         err = 1;
8519                         cur_err = 1;
8520                 }
8521                 if (rec->bad_full_backref) {
8522                         fprintf(stderr, "bad full backref, on [%llu]\n",
8523                                 (unsigned long long)rec->start);
8524                         if (repair) {
8525                                 ret = fixup_extent_flags(root->fs_info, rec);
8526                                 if (ret)
8527                                         goto repair_abort;
8528                                 fixed = 1;
8529                         }
8530                         err = 1;
8531                         cur_err = 1;
8532                 }
8533                 /*
8534                  * Although it's not a extent ref's problem, we reuse this
8535                  * routine for error reporting.
8536                  * No repair function yet.
8537                  */
8538                 if (rec->crossing_stripes) {
8539                         fprintf(stderr,
8540                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8541                                 rec->start, rec->start + rec->max_size);
8542                         err = 1;
8543                         cur_err = 1;
8544                 }
8545
8546                 if (rec->wrong_chunk_type) {
8547                         fprintf(stderr,
8548                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8549                                 rec->start, rec->start + rec->max_size);
8550                         err = 1;
8551                         cur_err = 1;
8552                 }
8553
8554                 remove_cache_extent(extent_cache, cache);
8555                 free_all_extent_backrefs(rec);
8556                 if (!init_extent_tree && repair && (!cur_err || fixed))
8557                         clear_extent_dirty(root->fs_info->excluded_extents,
8558                                            rec->start,
8559                                            rec->start + rec->max_size - 1,
8560                                            GFP_NOFS);
8561                 free(rec);
8562         }
8563 repair_abort:
8564         if (repair) {
8565                 if (ret && ret != -EAGAIN) {
8566                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8567                         exit(1);
8568                 } else if (!ret) {
8569                         struct btrfs_trans_handle *trans;
8570
8571                         root = root->fs_info->extent_root;
8572                         trans = btrfs_start_transaction(root, 1);
8573                         if (IS_ERR(trans)) {
8574                                 ret = PTR_ERR(trans);
8575                                 goto repair_abort;
8576                         }
8577
8578                         btrfs_fix_block_accounting(trans, root);
8579                         ret = btrfs_commit_transaction(trans, root);
8580                         if (ret)
8581                                 goto repair_abort;
8582                 }
8583                 if (err)
8584                         fprintf(stderr, "repaired damaged extent references\n");
8585                 return ret;
8586         }
8587         return err;
8588 }
8589
8590 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8591 {
8592         u64 stripe_size;
8593
8594         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8595                 stripe_size = length;
8596                 stripe_size /= num_stripes;
8597         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8598                 stripe_size = length * 2;
8599                 stripe_size /= num_stripes;
8600         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8601                 stripe_size = length;
8602                 stripe_size /= (num_stripes - 1);
8603         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8604                 stripe_size = length;
8605                 stripe_size /= (num_stripes - 2);
8606         } else {
8607                 stripe_size = length;
8608         }
8609         return stripe_size;
8610 }
8611
8612 /*
8613  * Check the chunk with its block group/dev list ref:
8614  * Return 0 if all refs seems valid.
8615  * Return 1 if part of refs seems valid, need later check for rebuild ref
8616  * like missing block group and needs to search extent tree to rebuild them.
8617  * Return -1 if essential refs are missing and unable to rebuild.
8618  */
8619 static int check_chunk_refs(struct chunk_record *chunk_rec,
8620                             struct block_group_tree *block_group_cache,
8621                             struct device_extent_tree *dev_extent_cache,
8622                             int silent)
8623 {
8624         struct cache_extent *block_group_item;
8625         struct block_group_record *block_group_rec;
8626         struct cache_extent *dev_extent_item;
8627         struct device_extent_record *dev_extent_rec;
8628         u64 devid;
8629         u64 offset;
8630         u64 length;
8631         int metadump_v2 = 0;
8632         int i;
8633         int ret = 0;
8634
8635         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8636                                                chunk_rec->offset,
8637                                                chunk_rec->length);
8638         if (block_group_item) {
8639                 block_group_rec = container_of(block_group_item,
8640                                                struct block_group_record,
8641                                                cache);
8642                 if (chunk_rec->length != block_group_rec->offset ||
8643                     chunk_rec->offset != block_group_rec->objectid ||
8644                     (!metadump_v2 &&
8645                      chunk_rec->type_flags != block_group_rec->flags)) {
8646                         if (!silent)
8647                                 fprintf(stderr,
8648                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8649                                         chunk_rec->objectid,
8650                                         chunk_rec->type,
8651                                         chunk_rec->offset,
8652                                         chunk_rec->length,
8653                                         chunk_rec->offset,
8654                                         chunk_rec->type_flags,
8655                                         block_group_rec->objectid,
8656                                         block_group_rec->type,
8657                                         block_group_rec->offset,
8658                                         block_group_rec->offset,
8659                                         block_group_rec->objectid,
8660                                         block_group_rec->flags);
8661                         ret = -1;
8662                 } else {
8663                         list_del_init(&block_group_rec->list);
8664                         chunk_rec->bg_rec = block_group_rec;
8665                 }
8666         } else {
8667                 if (!silent)
8668                         fprintf(stderr,
8669                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8670                                 chunk_rec->objectid,
8671                                 chunk_rec->type,
8672                                 chunk_rec->offset,
8673                                 chunk_rec->length,
8674                                 chunk_rec->offset,
8675                                 chunk_rec->type_flags);
8676                 ret = 1;
8677         }
8678
8679         if (metadump_v2)
8680                 return ret;
8681
8682         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8683                                     chunk_rec->num_stripes);
8684         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8685                 devid = chunk_rec->stripes[i].devid;
8686                 offset = chunk_rec->stripes[i].offset;
8687                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8688                                                        devid, offset, length);
8689                 if (dev_extent_item) {
8690                         dev_extent_rec = container_of(dev_extent_item,
8691                                                 struct device_extent_record,
8692                                                 cache);
8693                         if (dev_extent_rec->objectid != devid ||
8694                             dev_extent_rec->offset != offset ||
8695                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8696                             dev_extent_rec->length != length) {
8697                                 if (!silent)
8698                                         fprintf(stderr,
8699                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8700                                                 chunk_rec->objectid,
8701                                                 chunk_rec->type,
8702                                                 chunk_rec->offset,
8703                                                 chunk_rec->stripes[i].devid,
8704                                                 chunk_rec->stripes[i].offset,
8705                                                 dev_extent_rec->objectid,
8706                                                 dev_extent_rec->offset,
8707                                                 dev_extent_rec->length);
8708                                 ret = -1;
8709                         } else {
8710                                 list_move(&dev_extent_rec->chunk_list,
8711                                           &chunk_rec->dextents);
8712                         }
8713                 } else {
8714                         if (!silent)
8715                                 fprintf(stderr,
8716                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8717                                         chunk_rec->objectid,
8718                                         chunk_rec->type,
8719                                         chunk_rec->offset,
8720                                         chunk_rec->stripes[i].devid,
8721                                         chunk_rec->stripes[i].offset);
8722                         ret = -1;
8723                 }
8724         }
8725         return ret;
8726 }
8727
8728 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8729 int check_chunks(struct cache_tree *chunk_cache,
8730                  struct block_group_tree *block_group_cache,
8731                  struct device_extent_tree *dev_extent_cache,
8732                  struct list_head *good, struct list_head *bad,
8733                  struct list_head *rebuild, int silent)
8734 {
8735         struct cache_extent *chunk_item;
8736         struct chunk_record *chunk_rec;
8737         struct block_group_record *bg_rec;
8738         struct device_extent_record *dext_rec;
8739         int err;
8740         int ret = 0;
8741
8742         chunk_item = first_cache_extent(chunk_cache);
8743         while (chunk_item) {
8744                 chunk_rec = container_of(chunk_item, struct chunk_record,
8745                                          cache);
8746                 err = check_chunk_refs(chunk_rec, block_group_cache,
8747                                        dev_extent_cache, silent);
8748                 if (err < 0)
8749                         ret = err;
8750                 if (err == 0 && good)
8751                         list_add_tail(&chunk_rec->list, good);
8752                 if (err > 0 && rebuild)
8753                         list_add_tail(&chunk_rec->list, rebuild);
8754                 if (err < 0 && bad)
8755                         list_add_tail(&chunk_rec->list, bad);
8756                 chunk_item = next_cache_extent(chunk_item);
8757         }
8758
8759         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8760                 if (!silent)
8761                         fprintf(stderr,
8762                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8763                                 bg_rec->objectid,
8764                                 bg_rec->offset,
8765                                 bg_rec->flags);
8766                 if (!ret)
8767                         ret = 1;
8768         }
8769
8770         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8771                             chunk_list) {
8772                 if (!silent)
8773                         fprintf(stderr,
8774                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8775                                 dext_rec->objectid,
8776                                 dext_rec->offset,
8777                                 dext_rec->length);
8778                 if (!ret)
8779                         ret = 1;
8780         }
8781         return ret;
8782 }
8783
8784
8785 static int check_device_used(struct device_record *dev_rec,
8786                              struct device_extent_tree *dext_cache)
8787 {
8788         struct cache_extent *cache;
8789         struct device_extent_record *dev_extent_rec;
8790         u64 total_byte = 0;
8791
8792         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8793         while (cache) {
8794                 dev_extent_rec = container_of(cache,
8795                                               struct device_extent_record,
8796                                               cache);
8797                 if (dev_extent_rec->objectid != dev_rec->devid)
8798                         break;
8799
8800                 list_del_init(&dev_extent_rec->device_list);
8801                 total_byte += dev_extent_rec->length;
8802                 cache = next_cache_extent(cache);
8803         }
8804
8805         if (total_byte != dev_rec->byte_used) {
8806                 fprintf(stderr,
8807                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8808                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8809                         dev_rec->type, dev_rec->offset);
8810                 return -1;
8811         } else {
8812                 return 0;
8813         }
8814 }
8815
8816 /* check btrfs_dev_item -> btrfs_dev_extent */
8817 static int check_devices(struct rb_root *dev_cache,
8818                          struct device_extent_tree *dev_extent_cache)
8819 {
8820         struct rb_node *dev_node;
8821         struct device_record *dev_rec;
8822         struct device_extent_record *dext_rec;
8823         int err;
8824         int ret = 0;
8825
8826         dev_node = rb_first(dev_cache);
8827         while (dev_node) {
8828                 dev_rec = container_of(dev_node, struct device_record, node);
8829                 err = check_device_used(dev_rec, dev_extent_cache);
8830                 if (err)
8831                         ret = err;
8832
8833                 dev_node = rb_next(dev_node);
8834         }
8835         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8836                             device_list) {
8837                 fprintf(stderr,
8838                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8839                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8840                 if (!ret)
8841                         ret = 1;
8842         }
8843         return ret;
8844 }
8845
8846 static int add_root_item_to_list(struct list_head *head,
8847                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8848                                   u8 level, u8 drop_level,
8849                                   int level_size, struct btrfs_key *drop_key)
8850 {
8851
8852         struct root_item_record *ri_rec;
8853         ri_rec = malloc(sizeof(*ri_rec));
8854         if (!ri_rec)
8855                 return -ENOMEM;
8856         ri_rec->bytenr = bytenr;
8857         ri_rec->objectid = objectid;
8858         ri_rec->level = level;
8859         ri_rec->level_size = level_size;
8860         ri_rec->drop_level = drop_level;
8861         ri_rec->last_snapshot = last_snapshot;
8862         if (drop_key)
8863                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8864         list_add_tail(&ri_rec->list, head);
8865
8866         return 0;
8867 }
8868
8869 static void free_root_item_list(struct list_head *list)
8870 {
8871         struct root_item_record *ri_rec;
8872
8873         while (!list_empty(list)) {
8874                 ri_rec = list_first_entry(list, struct root_item_record,
8875                                           list);
8876                 list_del_init(&ri_rec->list);
8877                 free(ri_rec);
8878         }
8879 }
8880
8881 static int deal_root_from_list(struct list_head *list,
8882                                struct btrfs_root *root,
8883                                struct block_info *bits,
8884                                int bits_nr,
8885                                struct cache_tree *pending,
8886                                struct cache_tree *seen,
8887                                struct cache_tree *reada,
8888                                struct cache_tree *nodes,
8889                                struct cache_tree *extent_cache,
8890                                struct cache_tree *chunk_cache,
8891                                struct rb_root *dev_cache,
8892                                struct block_group_tree *block_group_cache,
8893                                struct device_extent_tree *dev_extent_cache)
8894 {
8895         int ret = 0;
8896         u64 last;
8897
8898         while (!list_empty(list)) {
8899                 struct root_item_record *rec;
8900                 struct extent_buffer *buf;
8901                 rec = list_entry(list->next,
8902                                  struct root_item_record, list);
8903                 last = 0;
8904                 buf = read_tree_block(root->fs_info->tree_root,
8905                                       rec->bytenr, rec->level_size, 0);
8906                 if (!extent_buffer_uptodate(buf)) {
8907                         free_extent_buffer(buf);
8908                         ret = -EIO;
8909                         break;
8910                 }
8911                 ret = add_root_to_pending(buf, extent_cache, pending,
8912                                     seen, nodes, rec->objectid);
8913                 if (ret < 0)
8914                         break;
8915                 /*
8916                  * To rebuild extent tree, we need deal with snapshot
8917                  * one by one, otherwise we deal with node firstly which
8918                  * can maximize readahead.
8919                  */
8920                 while (1) {
8921                         ret = run_next_block(root, bits, bits_nr, &last,
8922                                              pending, seen, reada, nodes,
8923                                              extent_cache, chunk_cache,
8924                                              dev_cache, block_group_cache,
8925                                              dev_extent_cache, rec);
8926                         if (ret != 0)
8927                                 break;
8928                 }
8929                 free_extent_buffer(buf);
8930                 list_del(&rec->list);
8931                 free(rec);
8932                 if (ret < 0)
8933                         break;
8934         }
8935         while (ret >= 0) {
8936                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8937                                      reada, nodes, extent_cache, chunk_cache,
8938                                      dev_cache, block_group_cache,
8939                                      dev_extent_cache, NULL);
8940                 if (ret != 0) {
8941                         if (ret > 0)
8942                                 ret = 0;
8943                         break;
8944                 }
8945         }
8946         return ret;
8947 }
8948
8949 static int check_chunks_and_extents(struct btrfs_root *root)
8950 {
8951         struct rb_root dev_cache;
8952         struct cache_tree chunk_cache;
8953         struct block_group_tree block_group_cache;
8954         struct device_extent_tree dev_extent_cache;
8955         struct cache_tree extent_cache;
8956         struct cache_tree seen;
8957         struct cache_tree pending;
8958         struct cache_tree reada;
8959         struct cache_tree nodes;
8960         struct extent_io_tree excluded_extents;
8961         struct cache_tree corrupt_blocks;
8962         struct btrfs_path path;
8963         struct btrfs_key key;
8964         struct btrfs_key found_key;
8965         int ret, err = 0;
8966         struct block_info *bits;
8967         int bits_nr;
8968         struct extent_buffer *leaf;
8969         int slot;
8970         struct btrfs_root_item ri;
8971         struct list_head dropping_trees;
8972         struct list_head normal_trees;
8973         struct btrfs_root *root1;
8974         u64 objectid;
8975         u32 level_size;
8976         u8 level;
8977
8978         dev_cache = RB_ROOT;
8979         cache_tree_init(&chunk_cache);
8980         block_group_tree_init(&block_group_cache);
8981         device_extent_tree_init(&dev_extent_cache);
8982
8983         cache_tree_init(&extent_cache);
8984         cache_tree_init(&seen);
8985         cache_tree_init(&pending);
8986         cache_tree_init(&nodes);
8987         cache_tree_init(&reada);
8988         cache_tree_init(&corrupt_blocks);
8989         extent_io_tree_init(&excluded_extents);
8990         INIT_LIST_HEAD(&dropping_trees);
8991         INIT_LIST_HEAD(&normal_trees);
8992
8993         if (repair) {
8994                 root->fs_info->excluded_extents = &excluded_extents;
8995                 root->fs_info->fsck_extent_cache = &extent_cache;
8996                 root->fs_info->free_extent_hook = free_extent_hook;
8997                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8998         }
8999
9000         bits_nr = 1024;
9001         bits = malloc(bits_nr * sizeof(struct block_info));
9002         if (!bits) {
9003                 perror("malloc");
9004                 exit(1);
9005         }
9006
9007         if (ctx.progress_enabled) {
9008                 ctx.tp = TASK_EXTENTS;
9009                 task_start(ctx.info);
9010         }
9011
9012 again:
9013         root1 = root->fs_info->tree_root;
9014         level = btrfs_header_level(root1->node);
9015         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9016                                     root1->node->start, 0, level, 0,
9017                                     root1->nodesize, NULL);
9018         if (ret < 0)
9019                 goto out;
9020         root1 = root->fs_info->chunk_root;
9021         level = btrfs_header_level(root1->node);
9022         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9023                                     root1->node->start, 0, level, 0,
9024                                     root1->nodesize, NULL);
9025         if (ret < 0)
9026                 goto out;
9027         btrfs_init_path(&path);
9028         key.offset = 0;
9029         key.objectid = 0;
9030         key.type = BTRFS_ROOT_ITEM_KEY;
9031         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9032                                         &key, &path, 0, 0);
9033         if (ret < 0)
9034                 goto out;
9035         while(1) {
9036                 leaf = path.nodes[0];
9037                 slot = path.slots[0];
9038                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9039                         ret = btrfs_next_leaf(root, &path);
9040                         if (ret != 0)
9041                                 break;
9042                         leaf = path.nodes[0];
9043                         slot = path.slots[0];
9044                 }
9045                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9046                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9047                         unsigned long offset;
9048                         u64 last_snapshot;
9049
9050                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9051                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9052                         last_snapshot = btrfs_root_last_snapshot(&ri);
9053                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9054                                 level = btrfs_root_level(&ri);
9055                                 level_size = root->nodesize;
9056                                 ret = add_root_item_to_list(&normal_trees,
9057                                                 found_key.objectid,
9058                                                 btrfs_root_bytenr(&ri),
9059                                                 last_snapshot, level,
9060                                                 0, level_size, NULL);
9061                                 if (ret < 0)
9062                                         goto out;
9063                         } else {
9064                                 level = btrfs_root_level(&ri);
9065                                 level_size = root->nodesize;
9066                                 objectid = found_key.objectid;
9067                                 btrfs_disk_key_to_cpu(&found_key,
9068                                                       &ri.drop_progress);
9069                                 ret = add_root_item_to_list(&dropping_trees,
9070                                                 objectid,
9071                                                 btrfs_root_bytenr(&ri),
9072                                                 last_snapshot, level,
9073                                                 ri.drop_level,
9074                                                 level_size, &found_key);
9075                                 if (ret < 0)
9076                                         goto out;
9077                         }
9078                 }
9079                 path.slots[0]++;
9080         }
9081         btrfs_release_path(&path);
9082
9083         /*
9084          * check_block can return -EAGAIN if it fixes something, please keep
9085          * this in mind when dealing with return values from these functions, if
9086          * we get -EAGAIN we want to fall through and restart the loop.
9087          */
9088         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9089                                   &seen, &reada, &nodes, &extent_cache,
9090                                   &chunk_cache, &dev_cache, &block_group_cache,
9091                                   &dev_extent_cache);
9092         if (ret < 0) {
9093                 if (ret == -EAGAIN)
9094                         goto loop;
9095                 goto out;
9096         }
9097         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9098                                   &pending, &seen, &reada, &nodes,
9099                                   &extent_cache, &chunk_cache, &dev_cache,
9100                                   &block_group_cache, &dev_extent_cache);
9101         if (ret < 0) {
9102                 if (ret == -EAGAIN)
9103                         goto loop;
9104                 goto out;
9105         }
9106
9107         ret = check_chunks(&chunk_cache, &block_group_cache,
9108                            &dev_extent_cache, NULL, NULL, NULL, 0);
9109         if (ret) {
9110                 if (ret == -EAGAIN)
9111                         goto loop;
9112                 err = ret;
9113         }
9114
9115         ret = check_extent_refs(root, &extent_cache);
9116         if (ret < 0) {
9117                 if (ret == -EAGAIN)
9118                         goto loop;
9119                 goto out;
9120         }
9121
9122         ret = check_devices(&dev_cache, &dev_extent_cache);
9123         if (ret && err)
9124                 ret = err;
9125
9126 out:
9127         task_stop(ctx.info);
9128         if (repair) {
9129                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9130                 extent_io_tree_cleanup(&excluded_extents);
9131                 root->fs_info->fsck_extent_cache = NULL;
9132                 root->fs_info->free_extent_hook = NULL;
9133                 root->fs_info->corrupt_blocks = NULL;
9134                 root->fs_info->excluded_extents = NULL;
9135         }
9136         free(bits);
9137         free_chunk_cache_tree(&chunk_cache);
9138         free_device_cache_tree(&dev_cache);
9139         free_block_group_tree(&block_group_cache);
9140         free_device_extent_tree(&dev_extent_cache);
9141         free_extent_cache_tree(&seen);
9142         free_extent_cache_tree(&pending);
9143         free_extent_cache_tree(&reada);
9144         free_extent_cache_tree(&nodes);
9145         return ret;
9146 loop:
9147         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9148         free_extent_cache_tree(&seen);
9149         free_extent_cache_tree(&pending);
9150         free_extent_cache_tree(&reada);
9151         free_extent_cache_tree(&nodes);
9152         free_chunk_cache_tree(&chunk_cache);
9153         free_block_group_tree(&block_group_cache);
9154         free_device_cache_tree(&dev_cache);
9155         free_device_extent_tree(&dev_extent_cache);
9156         free_extent_record_cache(root->fs_info, &extent_cache);
9157         free_root_item_list(&normal_trees);
9158         free_root_item_list(&dropping_trees);
9159         extent_io_tree_cleanup(&excluded_extents);
9160         goto again;
9161 }
9162
9163 /*
9164  * Check backrefs of a tree block given by @bytenr or @eb.
9165  *
9166  * @root:       the root containing the @bytenr or @eb
9167  * @eb:         tree block extent buffer, can be NULL
9168  * @bytenr:     bytenr of the tree block to search
9169  * @level:      tree level of the tree block
9170  * @owner:      owner of the tree block
9171  *
9172  * Return >0 for any error found and output error message
9173  * Return 0 for no error found
9174  */
9175 static int check_tree_block_ref(struct btrfs_root *root,
9176                                 struct extent_buffer *eb, u64 bytenr,
9177                                 int level, u64 owner)
9178 {
9179         struct btrfs_key key;
9180         struct btrfs_root *extent_root = root->fs_info->extent_root;
9181         struct btrfs_path path;
9182         struct btrfs_extent_item *ei;
9183         struct btrfs_extent_inline_ref *iref;
9184         struct extent_buffer *leaf;
9185         unsigned long end;
9186         unsigned long ptr;
9187         int slot;
9188         int skinny_level;
9189         int type;
9190         u32 nodesize = root->nodesize;
9191         u32 item_size;
9192         u64 offset;
9193         int found_ref = 0;
9194         int err = 0;
9195         int ret;
9196
9197         btrfs_init_path(&path);
9198         key.objectid = bytenr;
9199         if (btrfs_fs_incompat(root->fs_info,
9200                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9201                 key.type = BTRFS_METADATA_ITEM_KEY;
9202         else
9203                 key.type = BTRFS_EXTENT_ITEM_KEY;
9204         key.offset = (u64)-1;
9205
9206         /* Search for the backref in extent tree */
9207         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9208         if (ret < 0) {
9209                 err |= BACKREF_MISSING;
9210                 goto out;
9211         }
9212         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9213         if (ret) {
9214                 err |= BACKREF_MISSING;
9215                 goto out;
9216         }
9217
9218         leaf = path.nodes[0];
9219         slot = path.slots[0];
9220         btrfs_item_key_to_cpu(leaf, &key, slot);
9221
9222         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9223
9224         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9225                 skinny_level = (int)key.offset;
9226                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9227         } else {
9228                 struct btrfs_tree_block_info *info;
9229
9230                 info = (struct btrfs_tree_block_info *)(ei + 1);
9231                 skinny_level = btrfs_tree_block_level(leaf, info);
9232                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9233         }
9234
9235         if (eb) {
9236                 u64 header_gen;
9237                 u64 extent_gen;
9238
9239                 if (!(btrfs_extent_flags(leaf, ei) &
9240                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9241                         error(
9242                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9243                                 key.objectid, nodesize,
9244                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9245                         err = BACKREF_MISMATCH;
9246                 }
9247                 header_gen = btrfs_header_generation(eb);
9248                 extent_gen = btrfs_extent_generation(leaf, ei);
9249                 if (header_gen != extent_gen) {
9250                         error(
9251         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9252                                 key.objectid, nodesize, header_gen,
9253                                 extent_gen);
9254                         err = BACKREF_MISMATCH;
9255                 }
9256                 if (level != skinny_level) {
9257                         error(
9258                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9259                                 key.objectid, nodesize, level, skinny_level);
9260                         err = BACKREF_MISMATCH;
9261                 }
9262                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9263                         error(
9264                         "extent[%llu %u] is referred by other roots than %llu",
9265                                 key.objectid, nodesize, root->objectid);
9266                         err = BACKREF_MISMATCH;
9267                 }
9268         }
9269
9270         /*
9271          * Iterate the extent/metadata item to find the exact backref
9272          */
9273         item_size = btrfs_item_size_nr(leaf, slot);
9274         ptr = (unsigned long)iref;
9275         end = (unsigned long)ei + item_size;
9276         while (ptr < end) {
9277                 iref = (struct btrfs_extent_inline_ref *)ptr;
9278                 type = btrfs_extent_inline_ref_type(leaf, iref);
9279                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9280
9281                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9282                         (offset == root->objectid || offset == owner)) {
9283                         found_ref = 1;
9284                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9285                         /* Check if the backref points to valid referencer */
9286                         found_ref = !check_tree_block_ref(root, NULL, offset,
9287                                                           level + 1, owner);
9288                 }
9289
9290                 if (found_ref)
9291                         break;
9292                 ptr += btrfs_extent_inline_ref_size(type);
9293         }
9294
9295         /*
9296          * Inlined extent item doesn't have what we need, check
9297          * TREE_BLOCK_REF_KEY
9298          */
9299         if (!found_ref) {
9300                 btrfs_release_path(&path);
9301                 key.objectid = bytenr;
9302                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9303                 key.offset = root->objectid;
9304
9305                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9306                 if (!ret)
9307                         found_ref = 1;
9308         }
9309         if (!found_ref)
9310                 err |= BACKREF_MISSING;
9311 out:
9312         btrfs_release_path(&path);
9313         if (eb && (err & BACKREF_MISSING))
9314                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9315                         bytenr, nodesize, owner, level);
9316         return err;
9317 }
9318
9319 /*
9320  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9321  *
9322  * Return >0 any error found and output error message
9323  * Return 0 for no error found
9324  */
9325 static int check_extent_data_item(struct btrfs_root *root,
9326                                   struct extent_buffer *eb, int slot)
9327 {
9328         struct btrfs_file_extent_item *fi;
9329         struct btrfs_path path;
9330         struct btrfs_root *extent_root = root->fs_info->extent_root;
9331         struct btrfs_key fi_key;
9332         struct btrfs_key dbref_key;
9333         struct extent_buffer *leaf;
9334         struct btrfs_extent_item *ei;
9335         struct btrfs_extent_inline_ref *iref;
9336         struct btrfs_extent_data_ref *dref;
9337         u64 owner;
9338         u64 file_extent_gen;
9339         u64 disk_bytenr;
9340         u64 disk_num_bytes;
9341         u64 extent_num_bytes;
9342         u64 extent_flags;
9343         u64 extent_gen;
9344         u32 item_size;
9345         unsigned long end;
9346         unsigned long ptr;
9347         int type;
9348         u64 ref_root;
9349         int found_dbackref = 0;
9350         int err = 0;
9351         int ret;
9352
9353         btrfs_item_key_to_cpu(eb, &fi_key, slot);
9354         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9355         file_extent_gen = btrfs_file_extent_generation(eb, fi);
9356
9357         /* Nothing to check for hole and inline data extents */
9358         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9359             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9360                 return 0;
9361
9362         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9363         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9364         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9365
9366         /* Check unaligned disk_num_bytes and num_bytes */
9367         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9368                 error(
9369 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9370                         fi_key.objectid, fi_key.offset, disk_num_bytes,
9371                         root->sectorsize);
9372                 err |= BYTES_UNALIGNED;
9373         } else {
9374                 data_bytes_allocated += disk_num_bytes;
9375         }
9376         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9377                 error(
9378 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9379                         fi_key.objectid, fi_key.offset, extent_num_bytes,
9380                         root->sectorsize);
9381                 err |= BYTES_UNALIGNED;
9382         } else {
9383                 data_bytes_referenced += extent_num_bytes;
9384         }
9385         owner = btrfs_header_owner(eb);
9386
9387         /* Check the extent item of the file extent in extent tree */
9388         btrfs_init_path(&path);
9389         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9390         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9391         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9392
9393         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9394         if (ret) {
9395                 err |= BACKREF_MISSING;
9396                 goto error;
9397         }
9398
9399         leaf = path.nodes[0];
9400         slot = path.slots[0];
9401         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9402
9403         extent_flags = btrfs_extent_flags(leaf, ei);
9404         extent_gen = btrfs_extent_generation(leaf, ei);
9405
9406         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9407                 error(
9408                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9409                     disk_bytenr, disk_num_bytes,
9410                     BTRFS_EXTENT_FLAG_DATA);
9411                 err |= BACKREF_MISMATCH;
9412         }
9413
9414         if (file_extent_gen < extent_gen) {
9415                 error(
9416 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9417                         disk_bytenr, disk_num_bytes, file_extent_gen,
9418                         extent_gen);
9419                 err |= BACKREF_MISMATCH;
9420         }
9421
9422         /* Check data backref inside that extent item */
9423         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9424         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9425         ptr = (unsigned long)iref;
9426         end = (unsigned long)ei + item_size;
9427         while (ptr < end) {
9428                 iref = (struct btrfs_extent_inline_ref *)ptr;
9429                 type = btrfs_extent_inline_ref_type(leaf, iref);
9430                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9431
9432                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9433                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9434                         if (ref_root == owner || ref_root == root->objectid)
9435                                 found_dbackref = 1;
9436                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9437                         found_dbackref = !check_tree_block_ref(root, NULL,
9438                                 btrfs_extent_inline_ref_offset(leaf, iref),
9439                                 0, owner);
9440                 }
9441
9442                 if (found_dbackref)
9443                         break;
9444                 ptr += btrfs_extent_inline_ref_size(type);
9445         }
9446
9447         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9448         if (!found_dbackref) {
9449                 btrfs_release_path(&path);
9450
9451                 btrfs_init_path(&path);
9452                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9453                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9454                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9455                                 fi_key.objectid, fi_key.offset);
9456
9457                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9458                                         &dbref_key, &path, 0, 0);
9459                 if (!ret)
9460                         found_dbackref = 1;
9461         }
9462
9463         if (!found_dbackref)
9464                 err |= BACKREF_MISSING;
9465 error:
9466         btrfs_release_path(&path);
9467         if (err & BACKREF_MISSING) {
9468                 error("data extent[%llu %llu] backref lost",
9469                       disk_bytenr, disk_num_bytes);
9470         }
9471         return err;
9472 }
9473
9474 /*
9475  * Get real tree block level for the case like shared block
9476  * Return >= 0 as tree level
9477  * Return <0 for error
9478  */
9479 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9480 {
9481         struct extent_buffer *eb;
9482         struct btrfs_path path;
9483         struct btrfs_key key;
9484         struct btrfs_extent_item *ei;
9485         u64 flags;
9486         u64 transid;
9487         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9488         u8 backref_level;
9489         u8 header_level;
9490         int ret;
9491
9492         /* Search extent tree for extent generation and level */
9493         key.objectid = bytenr;
9494         key.type = BTRFS_METADATA_ITEM_KEY;
9495         key.offset = (u64)-1;
9496
9497         btrfs_init_path(&path);
9498         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9499         if (ret < 0)
9500                 goto release_out;
9501         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9502         if (ret < 0)
9503                 goto release_out;
9504         if (ret > 0) {
9505                 ret = -ENOENT;
9506                 goto release_out;
9507         }
9508
9509         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9510         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9511                             struct btrfs_extent_item);
9512         flags = btrfs_extent_flags(path.nodes[0], ei);
9513         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9514                 ret = -ENOENT;
9515                 goto release_out;
9516         }
9517
9518         /* Get transid for later read_tree_block() check */
9519         transid = btrfs_extent_generation(path.nodes[0], ei);
9520
9521         /* Get backref level as one source */
9522         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9523                 backref_level = key.offset;
9524         } else {
9525                 struct btrfs_tree_block_info *info;
9526
9527                 info = (struct btrfs_tree_block_info *)(ei + 1);
9528                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9529         }
9530         btrfs_release_path(&path);
9531
9532         /* Get level from tree block as an alternative source */
9533         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9534         if (!extent_buffer_uptodate(eb)) {
9535                 free_extent_buffer(eb);
9536                 return -EIO;
9537         }
9538         header_level = btrfs_header_level(eb);
9539         free_extent_buffer(eb);
9540
9541         if (header_level != backref_level)
9542                 return -EIO;
9543         return header_level;
9544
9545 release_out:
9546         btrfs_release_path(&path);
9547         return ret;
9548 }
9549
9550 /*
9551  * Check if a tree block backref is valid (points to a valid tree block)
9552  * if level == -1, level will be resolved
9553  * Return >0 for any error found and print error message
9554  */
9555 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9556                                     u64 bytenr, int level)
9557 {
9558         struct btrfs_root *root;
9559         struct btrfs_key key;
9560         struct btrfs_path path;
9561         struct extent_buffer *eb;
9562         struct extent_buffer *node;
9563         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9564         int err = 0;
9565         int ret;
9566
9567         /* Query level for level == -1 special case */
9568         if (level == -1)
9569                 level = query_tree_block_level(fs_info, bytenr);
9570         if (level < 0) {
9571                 err |= REFERENCER_MISSING;
9572                 goto out;
9573         }
9574
9575         key.objectid = root_id;
9576         key.type = BTRFS_ROOT_ITEM_KEY;
9577         key.offset = (u64)-1;
9578
9579         root = btrfs_read_fs_root(fs_info, &key);
9580         if (IS_ERR(root)) {
9581                 err |= REFERENCER_MISSING;
9582                 goto out;
9583         }
9584
9585         /* Read out the tree block to get item/node key */
9586         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9587         if (!extent_buffer_uptodate(eb)) {
9588                 err |= REFERENCER_MISSING;
9589                 free_extent_buffer(eb);
9590                 goto out;
9591         }
9592
9593         /* Empty tree, no need to check key */
9594         if (!btrfs_header_nritems(eb) && !level) {
9595                 free_extent_buffer(eb);
9596                 goto out;
9597         }
9598
9599         if (level)
9600                 btrfs_node_key_to_cpu(eb, &key, 0);
9601         else
9602                 btrfs_item_key_to_cpu(eb, &key, 0);
9603
9604         free_extent_buffer(eb);
9605
9606         btrfs_init_path(&path);
9607         path.lowest_level = level;
9608         /* Search with the first key, to ensure we can reach it */
9609         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9610         if (ret < 0) {
9611                 err |= REFERENCER_MISSING;
9612                 goto release_out;
9613         }
9614
9615         node = path.nodes[level];
9616         if (btrfs_header_bytenr(node) != bytenr) {
9617                 error(
9618         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9619                         bytenr, nodesize, bytenr,
9620                         btrfs_header_bytenr(node));
9621                 err |= REFERENCER_MISMATCH;
9622         }
9623         if (btrfs_header_level(node) != level) {
9624                 error(
9625         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9626                         bytenr, nodesize, level,
9627                         btrfs_header_level(node));
9628                 err |= REFERENCER_MISMATCH;
9629         }
9630
9631 release_out:
9632         btrfs_release_path(&path);
9633 out:
9634         if (err & REFERENCER_MISSING) {
9635                 if (level < 0)
9636                         error("extent [%llu %d] lost referencer (owner: %llu)",
9637                                 bytenr, nodesize, root_id);
9638                 else
9639                         error(
9640                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9641                                 bytenr, nodesize, root_id, level);
9642         }
9643
9644         return err;
9645 }
9646
9647 /*
9648  * Check referencer for shared block backref
9649  * If level == -1, this function will resolve the level.
9650  */
9651 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9652                                      u64 parent, u64 bytenr, int level)
9653 {
9654         struct extent_buffer *eb;
9655         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9656         u32 nr;
9657         int found_parent = 0;
9658         int i;
9659
9660         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9661         if (!extent_buffer_uptodate(eb))
9662                 goto out;
9663
9664         if (level == -1)
9665                 level = query_tree_block_level(fs_info, bytenr);
9666         if (level < 0)
9667                 goto out;
9668
9669         if (level + 1 != btrfs_header_level(eb))
9670                 goto out;
9671
9672         nr = btrfs_header_nritems(eb);
9673         for (i = 0; i < nr; i++) {
9674                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9675                         found_parent = 1;
9676                         break;
9677                 }
9678         }
9679 out:
9680         free_extent_buffer(eb);
9681         if (!found_parent) {
9682                 error(
9683         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9684                         bytenr, nodesize, parent, level);
9685                 return REFERENCER_MISSING;
9686         }
9687         return 0;
9688 }
9689
9690 /*
9691  * Check referencer for normal (inlined) data ref
9692  * If len == 0, it will be resolved by searching in extent tree
9693  */
9694 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9695                                      u64 root_id, u64 objectid, u64 offset,
9696                                      u64 bytenr, u64 len, u32 count)
9697 {
9698         struct btrfs_root *root;
9699         struct btrfs_root *extent_root = fs_info->extent_root;
9700         struct btrfs_key key;
9701         struct btrfs_path path;
9702         struct extent_buffer *leaf;
9703         struct btrfs_file_extent_item *fi;
9704         u32 found_count = 0;
9705         int slot;
9706         int ret = 0;
9707
9708         if (!len) {
9709                 key.objectid = bytenr;
9710                 key.type = BTRFS_EXTENT_ITEM_KEY;
9711                 key.offset = (u64)-1;
9712
9713                 btrfs_init_path(&path);
9714                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9715                 if (ret < 0)
9716                         goto out;
9717                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9718                 if (ret)
9719                         goto out;
9720                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9721                 if (key.objectid != bytenr ||
9722                     key.type != BTRFS_EXTENT_ITEM_KEY)
9723                         goto out;
9724                 len = key.offset;
9725                 btrfs_release_path(&path);
9726         }
9727         key.objectid = root_id;
9728         key.type = BTRFS_ROOT_ITEM_KEY;
9729         key.offset = (u64)-1;
9730         btrfs_init_path(&path);
9731
9732         root = btrfs_read_fs_root(fs_info, &key);
9733         if (IS_ERR(root))
9734                 goto out;
9735
9736         key.objectid = objectid;
9737         key.type = BTRFS_EXTENT_DATA_KEY;
9738         /*
9739          * It can be nasty as data backref offset is
9740          * file offset - file extent offset, which is smaller or
9741          * equal to original backref offset.  The only special case is
9742          * overflow.  So we need to special check and do further search.
9743          */
9744         key.offset = offset & (1ULL << 63) ? 0 : offset;
9745
9746         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9747         if (ret < 0)
9748                 goto out;
9749
9750         /*
9751          * Search afterwards to get correct one
9752          * NOTE: As we must do a comprehensive check on the data backref to
9753          * make sure the dref count also matches, we must iterate all file
9754          * extents for that inode.
9755          */
9756         while (1) {
9757                 leaf = path.nodes[0];
9758                 slot = path.slots[0];
9759
9760                 btrfs_item_key_to_cpu(leaf, &key, slot);
9761                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9762                         break;
9763                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9764                 /*
9765                  * Except normal disk bytenr and disk num bytes, we still
9766                  * need to do extra check on dbackref offset as
9767                  * dbackref offset = file_offset - file_extent_offset
9768                  */
9769                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9770                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9771                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9772                     offset)
9773                         found_count++;
9774
9775                 ret = btrfs_next_item(root, &path);
9776                 if (ret)
9777                         break;
9778         }
9779 out:
9780         btrfs_release_path(&path);
9781         if (found_count != count) {
9782                 error(
9783 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9784                         bytenr, len, root_id, objectid, offset, count, found_count);
9785                 return REFERENCER_MISSING;
9786         }
9787         return 0;
9788 }
9789
9790 /*
9791  * Check if the referencer of a shared data backref exists
9792  */
9793 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9794                                      u64 parent, u64 bytenr)
9795 {
9796         struct extent_buffer *eb;
9797         struct btrfs_key key;
9798         struct btrfs_file_extent_item *fi;
9799         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9800         u32 nr;
9801         int found_parent = 0;
9802         int i;
9803
9804         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9805         if (!extent_buffer_uptodate(eb))
9806                 goto out;
9807
9808         nr = btrfs_header_nritems(eb);
9809         for (i = 0; i < nr; i++) {
9810                 btrfs_item_key_to_cpu(eb, &key, i);
9811                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9812                         continue;
9813
9814                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9815                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9816                         continue;
9817
9818                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9819                         found_parent = 1;
9820                         break;
9821                 }
9822         }
9823
9824 out:
9825         free_extent_buffer(eb);
9826         if (!found_parent) {
9827                 error("shared extent %llu referencer lost (parent: %llu)",
9828                         bytenr, parent);
9829                 return REFERENCER_MISSING;
9830         }
9831         return 0;
9832 }
9833
9834 /*
9835  * This function will check a given extent item, including its backref and
9836  * itself (like crossing stripe boundary and type)
9837  *
9838  * Since we don't use extent_record anymore, introduce new error bit
9839  */
9840 static int check_extent_item(struct btrfs_fs_info *fs_info,
9841                              struct extent_buffer *eb, int slot)
9842 {
9843         struct btrfs_extent_item *ei;
9844         struct btrfs_extent_inline_ref *iref;
9845         struct btrfs_extent_data_ref *dref;
9846         unsigned long end;
9847         unsigned long ptr;
9848         int type;
9849         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9850         u32 item_size = btrfs_item_size_nr(eb, slot);
9851         u64 flags;
9852         u64 offset;
9853         int metadata = 0;
9854         int level;
9855         struct btrfs_key key;
9856         int ret;
9857         int err = 0;
9858
9859         btrfs_item_key_to_cpu(eb, &key, slot);
9860         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9861                 bytes_used += key.offset;
9862         else
9863                 bytes_used += nodesize;
9864
9865         if (item_size < sizeof(*ei)) {
9866                 /*
9867                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9868                  * old thing when on disk format is still un-determined.
9869                  * No need to care about it anymore
9870                  */
9871                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9872                 return -ENOTTY;
9873         }
9874
9875         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9876         flags = btrfs_extent_flags(eb, ei);
9877
9878         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9879                 metadata = 1;
9880         if (metadata && check_crossing_stripes(global_info, key.objectid,
9881                                                eb->len)) {
9882                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9883                       key.objectid, key.objectid + nodesize);
9884                 err |= CROSSING_STRIPE_BOUNDARY;
9885         }
9886
9887         ptr = (unsigned long)(ei + 1);
9888
9889         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9890                 /* Old EXTENT_ITEM metadata */
9891                 struct btrfs_tree_block_info *info;
9892
9893                 info = (struct btrfs_tree_block_info *)ptr;
9894                 level = btrfs_tree_block_level(eb, info);
9895                 ptr += sizeof(struct btrfs_tree_block_info);
9896         } else {
9897                 /* New METADATA_ITEM */
9898                 level = key.offset;
9899         }
9900         end = (unsigned long)ei + item_size;
9901
9902         if (ptr >= end) {
9903                 err |= ITEM_SIZE_MISMATCH;
9904                 goto out;
9905         }
9906
9907         /* Now check every backref in this extent item */
9908 next:
9909         iref = (struct btrfs_extent_inline_ref *)ptr;
9910         type = btrfs_extent_inline_ref_type(eb, iref);
9911         offset = btrfs_extent_inline_ref_offset(eb, iref);
9912         switch (type) {
9913         case BTRFS_TREE_BLOCK_REF_KEY:
9914                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9915                                                level);
9916                 err |= ret;
9917                 break;
9918         case BTRFS_SHARED_BLOCK_REF_KEY:
9919                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9920                                                  level);
9921                 err |= ret;
9922                 break;
9923         case BTRFS_EXTENT_DATA_REF_KEY:
9924                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9925                 ret = check_extent_data_backref(fs_info,
9926                                 btrfs_extent_data_ref_root(eb, dref),
9927                                 btrfs_extent_data_ref_objectid(eb, dref),
9928                                 btrfs_extent_data_ref_offset(eb, dref),
9929                                 key.objectid, key.offset,
9930                                 btrfs_extent_data_ref_count(eb, dref));
9931                 err |= ret;
9932                 break;
9933         case BTRFS_SHARED_DATA_REF_KEY:
9934                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9935                 err |= ret;
9936                 break;
9937         default:
9938                 error("extent[%llu %d %llu] has unknown ref type: %d",
9939                         key.objectid, key.type, key.offset, type);
9940                 err |= UNKNOWN_TYPE;
9941                 goto out;
9942         }
9943
9944         ptr += btrfs_extent_inline_ref_size(type);
9945         if (ptr < end)
9946                 goto next;
9947
9948 out:
9949         return err;
9950 }
9951
9952 /*
9953  * Check if a dev extent item is referred correctly by its chunk
9954  */
9955 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9956                                  struct extent_buffer *eb, int slot)
9957 {
9958         struct btrfs_root *chunk_root = fs_info->chunk_root;
9959         struct btrfs_dev_extent *ptr;
9960         struct btrfs_path path;
9961         struct btrfs_key chunk_key;
9962         struct btrfs_key devext_key;
9963         struct btrfs_chunk *chunk;
9964         struct extent_buffer *l;
9965         int num_stripes;
9966         u64 length;
9967         int i;
9968         int found_chunk = 0;
9969         int ret;
9970
9971         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9972         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9973         length = btrfs_dev_extent_length(eb, ptr);
9974
9975         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9976         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9977         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9978
9979         btrfs_init_path(&path);
9980         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9981         if (ret)
9982                 goto out;
9983
9984         l = path.nodes[0];
9985         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9986         if (btrfs_chunk_length(l, chunk) != length)
9987                 goto out;
9988
9989         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9990         for (i = 0; i < num_stripes; i++) {
9991                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9992                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9993
9994                 if (devid == devext_key.objectid &&
9995                     offset == devext_key.offset) {
9996                         found_chunk = 1;
9997                         break;
9998                 }
9999         }
10000 out:
10001         btrfs_release_path(&path);
10002         if (!found_chunk) {
10003                 error(
10004                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10005                         devext_key.objectid, devext_key.offset, length);
10006                 return REFERENCER_MISSING;
10007         }
10008         return 0;
10009 }
10010
10011 /*
10012  * Check if the used space is correct with the dev item
10013  */
10014 static int check_dev_item(struct btrfs_fs_info *fs_info,
10015                           struct extent_buffer *eb, int slot)
10016 {
10017         struct btrfs_root *dev_root = fs_info->dev_root;
10018         struct btrfs_dev_item *dev_item;
10019         struct btrfs_path path;
10020         struct btrfs_key key;
10021         struct btrfs_dev_extent *ptr;
10022         u64 dev_id;
10023         u64 used;
10024         u64 total = 0;
10025         int ret;
10026
10027         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10028         dev_id = btrfs_device_id(eb, dev_item);
10029         used = btrfs_device_bytes_used(eb, dev_item);
10030
10031         key.objectid = dev_id;
10032         key.type = BTRFS_DEV_EXTENT_KEY;
10033         key.offset = 0;
10034
10035         btrfs_init_path(&path);
10036         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10037         if (ret < 0) {
10038                 btrfs_item_key_to_cpu(eb, &key, slot);
10039                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10040                         key.objectid, key.type, key.offset);
10041                 btrfs_release_path(&path);
10042                 return REFERENCER_MISSING;
10043         }
10044
10045         /* Iterate dev_extents to calculate the used space of a device */
10046         while (1) {
10047                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10048
10049                 if (key.objectid > dev_id)
10050                         break;
10051                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10052                         goto next;
10053
10054                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10055                                      struct btrfs_dev_extent);
10056                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10057 next:
10058                 ret = btrfs_next_item(dev_root, &path);
10059                 if (ret)
10060                         break;
10061         }
10062         btrfs_release_path(&path);
10063
10064         if (used != total) {
10065                 btrfs_item_key_to_cpu(eb, &key, slot);
10066                 error(
10067 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10068                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10069                         BTRFS_DEV_EXTENT_KEY, dev_id);
10070                 return ACCOUNTING_MISMATCH;
10071         }
10072         return 0;
10073 }
10074
10075 /*
10076  * Check a block group item with its referener (chunk) and its used space
10077  * with extent/metadata item
10078  */
10079 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10080                                   struct extent_buffer *eb, int slot)
10081 {
10082         struct btrfs_root *extent_root = fs_info->extent_root;
10083         struct btrfs_root *chunk_root = fs_info->chunk_root;
10084         struct btrfs_block_group_item *bi;
10085         struct btrfs_block_group_item bg_item;
10086         struct btrfs_path path;
10087         struct btrfs_key bg_key;
10088         struct btrfs_key chunk_key;
10089         struct btrfs_key extent_key;
10090         struct btrfs_chunk *chunk;
10091         struct extent_buffer *leaf;
10092         struct btrfs_extent_item *ei;
10093         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10094         u64 flags;
10095         u64 bg_flags;
10096         u64 used;
10097         u64 total = 0;
10098         int ret;
10099         int err = 0;
10100
10101         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10102         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10103         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10104         used = btrfs_block_group_used(&bg_item);
10105         bg_flags = btrfs_block_group_flags(&bg_item);
10106
10107         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10108         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10109         chunk_key.offset = bg_key.objectid;
10110
10111         btrfs_init_path(&path);
10112         /* Search for the referencer chunk */
10113         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10114         if (ret) {
10115                 error(
10116                 "block group[%llu %llu] did not find the related chunk item",
10117                         bg_key.objectid, bg_key.offset);
10118                 err |= REFERENCER_MISSING;
10119         } else {
10120                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10121                                         struct btrfs_chunk);
10122                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10123                                                 bg_key.offset) {
10124                         error(
10125         "block group[%llu %llu] related chunk item length does not match",
10126                                 bg_key.objectid, bg_key.offset);
10127                         err |= REFERENCER_MISMATCH;
10128                 }
10129         }
10130         btrfs_release_path(&path);
10131
10132         /* Search from the block group bytenr */
10133         extent_key.objectid = bg_key.objectid;
10134         extent_key.type = 0;
10135         extent_key.offset = 0;
10136
10137         btrfs_init_path(&path);
10138         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10139         if (ret < 0)
10140                 goto out;
10141
10142         /* Iterate extent tree to account used space */
10143         while (1) {
10144                 leaf = path.nodes[0];
10145                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10146                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10147                         break;
10148
10149                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10150                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10151                         goto next;
10152                 if (extent_key.objectid < bg_key.objectid)
10153                         goto next;
10154
10155                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10156                         total += nodesize;
10157                 else
10158                         total += extent_key.offset;
10159
10160                 ei = btrfs_item_ptr(leaf, path.slots[0],
10161                                     struct btrfs_extent_item);
10162                 flags = btrfs_extent_flags(leaf, ei);
10163                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10164                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10165                                 error(
10166                         "bad extent[%llu, %llu) type mismatch with chunk",
10167                                         extent_key.objectid,
10168                                         extent_key.objectid + extent_key.offset);
10169                                 err |= CHUNK_TYPE_MISMATCH;
10170                         }
10171                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10172                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10173                                     BTRFS_BLOCK_GROUP_METADATA))) {
10174                                 error(
10175                         "bad extent[%llu, %llu) type mismatch with chunk",
10176                                         extent_key.objectid,
10177                                         extent_key.objectid + nodesize);
10178                                 err |= CHUNK_TYPE_MISMATCH;
10179                         }
10180                 }
10181 next:
10182                 ret = btrfs_next_item(extent_root, &path);
10183                 if (ret)
10184                         break;
10185         }
10186
10187 out:
10188         btrfs_release_path(&path);
10189
10190         if (total != used) {
10191                 error(
10192                 "block group[%llu %llu] used %llu but extent items used %llu",
10193                         bg_key.objectid, bg_key.offset, used, total);
10194                 err |= ACCOUNTING_MISMATCH;
10195         }
10196         return err;
10197 }
10198
10199 /*
10200  * Check a chunk item.
10201  * Including checking all referred dev_extents and block group
10202  */
10203 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10204                             struct extent_buffer *eb, int slot)
10205 {
10206         struct btrfs_root *extent_root = fs_info->extent_root;
10207         struct btrfs_root *dev_root = fs_info->dev_root;
10208         struct btrfs_path path;
10209         struct btrfs_key chunk_key;
10210         struct btrfs_key bg_key;
10211         struct btrfs_key devext_key;
10212         struct btrfs_chunk *chunk;
10213         struct extent_buffer *leaf;
10214         struct btrfs_block_group_item *bi;
10215         struct btrfs_block_group_item bg_item;
10216         struct btrfs_dev_extent *ptr;
10217         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10218         u64 length;
10219         u64 chunk_end;
10220         u64 type;
10221         u64 profile;
10222         int num_stripes;
10223         u64 offset;
10224         u64 objectid;
10225         int i;
10226         int ret;
10227         int err = 0;
10228
10229         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10230         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10231         length = btrfs_chunk_length(eb, chunk);
10232         chunk_end = chunk_key.offset + length;
10233         if (!IS_ALIGNED(length, sectorsize)) {
10234                 error("chunk[%llu %llu) not aligned to %u",
10235                         chunk_key.offset, chunk_end, sectorsize);
10236                 err |= BYTES_UNALIGNED;
10237                 goto out;
10238         }
10239
10240         type = btrfs_chunk_type(eb, chunk);
10241         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10242         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10243                 error("chunk[%llu %llu) has no chunk type",
10244                         chunk_key.offset, chunk_end);
10245                 err |= UNKNOWN_TYPE;
10246         }
10247         if (profile && (profile & (profile - 1))) {
10248                 error("chunk[%llu %llu) multiple profiles detected: %llx",
10249                         chunk_key.offset, chunk_end, profile);
10250                 err |= UNKNOWN_TYPE;
10251         }
10252
10253         bg_key.objectid = chunk_key.offset;
10254         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10255         bg_key.offset = length;
10256
10257         btrfs_init_path(&path);
10258         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10259         if (ret) {
10260                 error(
10261                 "chunk[%llu %llu) did not find the related block group item",
10262                         chunk_key.offset, chunk_end);
10263                 err |= REFERENCER_MISSING;
10264         } else{
10265                 leaf = path.nodes[0];
10266                 bi = btrfs_item_ptr(leaf, path.slots[0],
10267                                     struct btrfs_block_group_item);
10268                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10269                                    sizeof(bg_item));
10270                 if (btrfs_block_group_flags(&bg_item) != type) {
10271                         error(
10272 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10273                                 chunk_key.offset, chunk_end, type,
10274                                 btrfs_block_group_flags(&bg_item));
10275                         err |= REFERENCER_MISSING;
10276                 }
10277         }
10278
10279         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10280         for (i = 0; i < num_stripes; i++) {
10281                 btrfs_release_path(&path);
10282                 btrfs_init_path(&path);
10283                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10284                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10285                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10286
10287                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10288                                         0, 0);
10289                 if (ret)
10290                         goto not_match_dev;
10291
10292                 leaf = path.nodes[0];
10293                 ptr = btrfs_item_ptr(leaf, path.slots[0],
10294                                      struct btrfs_dev_extent);
10295                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10296                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10297                 if (objectid != chunk_key.objectid ||
10298                     offset != chunk_key.offset ||
10299                     btrfs_dev_extent_length(leaf, ptr) != length)
10300                         goto not_match_dev;
10301                 continue;
10302 not_match_dev:
10303                 err |= BACKREF_MISSING;
10304                 error(
10305                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10306                         chunk_key.objectid, chunk_end, i);
10307                 continue;
10308         }
10309         btrfs_release_path(&path);
10310 out:
10311         return err;
10312 }
10313
10314 /*
10315  * Main entry function to check known items and update related accounting info
10316  */
10317 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10318 {
10319         struct btrfs_fs_info *fs_info = root->fs_info;
10320         struct btrfs_key key;
10321         int slot = 0;
10322         int type;
10323         struct btrfs_extent_data_ref *dref;
10324         int ret;
10325         int err = 0;
10326
10327 next:
10328         btrfs_item_key_to_cpu(eb, &key, slot);
10329         type = key.type;
10330
10331         switch (type) {
10332         case BTRFS_EXTENT_DATA_KEY:
10333                 ret = check_extent_data_item(root, eb, slot);
10334                 err |= ret;
10335                 break;
10336         case BTRFS_BLOCK_GROUP_ITEM_KEY:
10337                 ret = check_block_group_item(fs_info, eb, slot);
10338                 err |= ret;
10339                 break;
10340         case BTRFS_DEV_ITEM_KEY:
10341                 ret = check_dev_item(fs_info, eb, slot);
10342                 err |= ret;
10343                 break;
10344         case BTRFS_CHUNK_ITEM_KEY:
10345                 ret = check_chunk_item(fs_info, eb, slot);
10346                 err |= ret;
10347                 break;
10348         case BTRFS_DEV_EXTENT_KEY:
10349                 ret = check_dev_extent_item(fs_info, eb, slot);
10350                 err |= ret;
10351                 break;
10352         case BTRFS_EXTENT_ITEM_KEY:
10353         case BTRFS_METADATA_ITEM_KEY:
10354                 ret = check_extent_item(fs_info, eb, slot);
10355                 err |= ret;
10356                 break;
10357         case BTRFS_EXTENT_CSUM_KEY:
10358                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10359                 break;
10360         case BTRFS_TREE_BLOCK_REF_KEY:
10361                 ret = check_tree_block_backref(fs_info, key.offset,
10362                                                key.objectid, -1);
10363                 err |= ret;
10364                 break;
10365         case BTRFS_EXTENT_DATA_REF_KEY:
10366                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10367                 ret = check_extent_data_backref(fs_info,
10368                                 btrfs_extent_data_ref_root(eb, dref),
10369                                 btrfs_extent_data_ref_objectid(eb, dref),
10370                                 btrfs_extent_data_ref_offset(eb, dref),
10371                                 key.objectid, 0,
10372                                 btrfs_extent_data_ref_count(eb, dref));
10373                 err |= ret;
10374                 break;
10375         case BTRFS_SHARED_BLOCK_REF_KEY:
10376                 ret = check_shared_block_backref(fs_info, key.offset,
10377                                                  key.objectid, -1);
10378                 err |= ret;
10379                 break;
10380         case BTRFS_SHARED_DATA_REF_KEY:
10381                 ret = check_shared_data_backref(fs_info, key.offset,
10382                                                 key.objectid);
10383                 err |= ret;
10384                 break;
10385         default:
10386                 break;
10387         }
10388
10389         if (++slot < btrfs_header_nritems(eb))
10390                 goto next;
10391
10392         return err;
10393 }
10394
10395 /*
10396  * Helper function for later fs/subvol tree check.  To determine if a tree
10397  * block should be checked.
10398  * This function will ensure only the direct referencer with lowest rootid to
10399  * check a fs/subvolume tree block.
10400  *
10401  * Backref check at extent tree would detect errors like missing subvolume
10402  * tree, so we can do aggressive check to reduce duplicated checks.
10403  */
10404 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10405 {
10406         struct btrfs_root *extent_root = root->fs_info->extent_root;
10407         struct btrfs_key key;
10408         struct btrfs_path path;
10409         struct extent_buffer *leaf;
10410         int slot;
10411         struct btrfs_extent_item *ei;
10412         unsigned long ptr;
10413         unsigned long end;
10414         int type;
10415         u32 item_size;
10416         u64 offset;
10417         struct btrfs_extent_inline_ref *iref;
10418         int ret;
10419
10420         btrfs_init_path(&path);
10421         key.objectid = btrfs_header_bytenr(eb);
10422         key.type = BTRFS_METADATA_ITEM_KEY;
10423         key.offset = (u64)-1;
10424
10425         /*
10426          * Any failure in backref resolving means we can't determine
10427          * whom the tree block belongs to.
10428          * So in that case, we need to check that tree block
10429          */
10430         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10431         if (ret < 0)
10432                 goto need_check;
10433
10434         ret = btrfs_previous_extent_item(extent_root, &path,
10435                                          btrfs_header_bytenr(eb));
10436         if (ret)
10437                 goto need_check;
10438
10439         leaf = path.nodes[0];
10440         slot = path.slots[0];
10441         btrfs_item_key_to_cpu(leaf, &key, slot);
10442         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10443
10444         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10445                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10446         } else {
10447                 struct btrfs_tree_block_info *info;
10448
10449                 info = (struct btrfs_tree_block_info *)(ei + 1);
10450                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10451         }
10452
10453         item_size = btrfs_item_size_nr(leaf, slot);
10454         ptr = (unsigned long)iref;
10455         end = (unsigned long)ei + item_size;
10456         while (ptr < end) {
10457                 iref = (struct btrfs_extent_inline_ref *)ptr;
10458                 type = btrfs_extent_inline_ref_type(leaf, iref);
10459                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10460
10461                 /*
10462                  * We only check the tree block if current root is
10463                  * the lowest referencer of it.
10464                  */
10465                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10466                     offset < root->objectid) {
10467                         btrfs_release_path(&path);
10468                         return 0;
10469                 }
10470
10471                 ptr += btrfs_extent_inline_ref_size(type);
10472         }
10473         /*
10474          * Normally we should also check keyed tree block ref, but that may be
10475          * very time consuming.  Inlined ref should already make us skip a lot
10476          * of refs now.  So skip search keyed tree block ref.
10477          */
10478
10479 need_check:
10480         btrfs_release_path(&path);
10481         return 1;
10482 }
10483
10484 /*
10485  * Traversal function for tree block. We will do:
10486  * 1) Skip shared fs/subvolume tree blocks
10487  * 2) Update related bytes accounting
10488  * 3) Pre-order traversal
10489  */
10490 static int traverse_tree_block(struct btrfs_root *root,
10491                                 struct extent_buffer *node)
10492 {
10493         struct extent_buffer *eb;
10494         struct btrfs_key key;
10495         struct btrfs_key drop_key;
10496         int level;
10497         u64 nr;
10498         int i;
10499         int err = 0;
10500         int ret;
10501
10502         /*
10503          * Skip shared fs/subvolume tree block, in that case they will
10504          * be checked by referencer with lowest rootid
10505          */
10506         if (is_fstree(root->objectid) && !should_check(root, node))
10507                 return 0;
10508
10509         /* Update bytes accounting */
10510         total_btree_bytes += node->len;
10511         if (fs_root_objectid(btrfs_header_owner(node)))
10512                 total_fs_tree_bytes += node->len;
10513         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10514                 total_extent_tree_bytes += node->len;
10515         if (!found_old_backref &&
10516             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10517             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10518             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10519                 found_old_backref = 1;
10520
10521         /* pre-order tranversal, check itself first */
10522         level = btrfs_header_level(node);
10523         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10524                                    btrfs_header_level(node),
10525                                    btrfs_header_owner(node));
10526         err |= ret;
10527         if (err)
10528                 error(
10529         "check %s failed root %llu bytenr %llu level %d, force continue check",
10530                         level ? "node":"leaf", root->objectid,
10531                         btrfs_header_bytenr(node), btrfs_header_level(node));
10532
10533         if (!level) {
10534                 btree_space_waste += btrfs_leaf_free_space(root, node);
10535                 ret = check_leaf_items(root, node);
10536                 err |= ret;
10537                 return err;
10538         }
10539
10540         nr = btrfs_header_nritems(node);
10541         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10542         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10543                 sizeof(struct btrfs_key_ptr);
10544
10545         /* Then check all its children */
10546         for (i = 0; i < nr; i++) {
10547                 u64 blocknr = btrfs_node_blockptr(node, i);
10548
10549                 btrfs_node_key_to_cpu(node, &key, i);
10550                 if (level == root->root_item.drop_level &&
10551                     is_dropped_key(&key, &drop_key))
10552                         continue;
10553
10554                 /*
10555                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10556                  * to call the function itself.
10557                  */
10558                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10559                 if (extent_buffer_uptodate(eb)) {
10560                         ret = traverse_tree_block(root, eb);
10561                         err |= ret;
10562                 }
10563                 free_extent_buffer(eb);
10564         }
10565
10566         return err;
10567 }
10568
10569 /*
10570  * Low memory usage version check_chunks_and_extents.
10571  */
10572 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10573 {
10574         struct btrfs_path path;
10575         struct btrfs_key key;
10576         struct btrfs_root *root1;
10577         struct btrfs_root *cur_root;
10578         int err = 0;
10579         int ret;
10580
10581         root1 = root->fs_info->chunk_root;
10582         ret = traverse_tree_block(root1, root1->node);
10583         err |= ret;
10584
10585         root1 = root->fs_info->tree_root;
10586         ret = traverse_tree_block(root1, root1->node);
10587         err |= ret;
10588
10589         btrfs_init_path(&path);
10590         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10591         key.offset = 0;
10592         key.type = BTRFS_ROOT_ITEM_KEY;
10593
10594         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10595         if (ret) {
10596                 error("cannot find extent treet in tree_root");
10597                 goto out;
10598         }
10599
10600         while (1) {
10601                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10602                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10603                         goto next;
10604                 key.offset = (u64)-1;
10605
10606                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10607                 if (IS_ERR(cur_root) || !cur_root) {
10608                         error("failed to read tree: %lld", key.objectid);
10609                         goto next;
10610                 }
10611
10612                 ret = traverse_tree_block(cur_root, cur_root->node);
10613                 err |= ret;
10614
10615 next:
10616                 ret = btrfs_next_item(root1, &path);
10617                 if (ret)
10618                         goto out;
10619         }
10620
10621 out:
10622         btrfs_release_path(&path);
10623         return err;
10624 }
10625
10626 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10627                            struct btrfs_root *root, int overwrite)
10628 {
10629         struct extent_buffer *c;
10630         struct extent_buffer *old = root->node;
10631         int level;
10632         int ret;
10633         struct btrfs_disk_key disk_key = {0,0,0};
10634
10635         level = 0;
10636
10637         if (overwrite) {
10638                 c = old;
10639                 extent_buffer_get(c);
10640                 goto init;
10641         }
10642         c = btrfs_alloc_free_block(trans, root,
10643                                    root->nodesize,
10644                                    root->root_key.objectid,
10645                                    &disk_key, level, 0, 0);
10646         if (IS_ERR(c)) {
10647                 c = old;
10648                 extent_buffer_get(c);
10649                 overwrite = 1;
10650         }
10651 init:
10652         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10653         btrfs_set_header_level(c, level);
10654         btrfs_set_header_bytenr(c, c->start);
10655         btrfs_set_header_generation(c, trans->transid);
10656         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10657         btrfs_set_header_owner(c, root->root_key.objectid);
10658
10659         write_extent_buffer(c, root->fs_info->fsid,
10660                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10661
10662         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10663                             btrfs_header_chunk_tree_uuid(c),
10664                             BTRFS_UUID_SIZE);
10665
10666         btrfs_mark_buffer_dirty(c);
10667         /*
10668          * this case can happen in the following case:
10669          *
10670          * 1.overwrite previous root.
10671          *
10672          * 2.reinit reloc data root, this is because we skip pin
10673          * down reloc data tree before which means we can allocate
10674          * same block bytenr here.
10675          */
10676         if (old->start == c->start) {
10677                 btrfs_set_root_generation(&root->root_item,
10678                                           trans->transid);
10679                 root->root_item.level = btrfs_header_level(root->node);
10680                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10681                                         &root->root_key, &root->root_item);
10682                 if (ret) {
10683                         free_extent_buffer(c);
10684                         return ret;
10685                 }
10686         }
10687         free_extent_buffer(old);
10688         root->node = c;
10689         add_root_to_dirty_list(root);
10690         return 0;
10691 }
10692
10693 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10694                                 struct extent_buffer *eb, int tree_root)
10695 {
10696         struct extent_buffer *tmp;
10697         struct btrfs_root_item *ri;
10698         struct btrfs_key key;
10699         u64 bytenr;
10700         u32 nodesize;
10701         int level = btrfs_header_level(eb);
10702         int nritems;
10703         int ret;
10704         int i;
10705
10706         /*
10707          * If we have pinned this block before, don't pin it again.
10708          * This can not only avoid forever loop with broken filesystem
10709          * but also give us some speedups.
10710          */
10711         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10712                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10713                 return 0;
10714
10715         btrfs_pin_extent(fs_info, eb->start, eb->len);
10716
10717         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10718         nritems = btrfs_header_nritems(eb);
10719         for (i = 0; i < nritems; i++) {
10720                 if (level == 0) {
10721                         btrfs_item_key_to_cpu(eb, &key, i);
10722                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10723                                 continue;
10724                         /* Skip the extent root and reloc roots */
10725                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10726                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10727                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10728                                 continue;
10729                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10730                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10731
10732                         /*
10733                          * If at any point we start needing the real root we
10734                          * will have to build a stump root for the root we are
10735                          * in, but for now this doesn't actually use the root so
10736                          * just pass in extent_root.
10737                          */
10738                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10739                                               nodesize, 0);
10740                         if (!extent_buffer_uptodate(tmp)) {
10741                                 fprintf(stderr, "Error reading root block\n");
10742                                 return -EIO;
10743                         }
10744                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10745                         free_extent_buffer(tmp);
10746                         if (ret)
10747                                 return ret;
10748                 } else {
10749                         bytenr = btrfs_node_blockptr(eb, i);
10750
10751                         /* If we aren't the tree root don't read the block */
10752                         if (level == 1 && !tree_root) {
10753                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10754                                 continue;
10755                         }
10756
10757                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10758                                               nodesize, 0);
10759                         if (!extent_buffer_uptodate(tmp)) {
10760                                 fprintf(stderr, "Error reading tree block\n");
10761                                 return -EIO;
10762                         }
10763                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10764                         free_extent_buffer(tmp);
10765                         if (ret)
10766                                 return ret;
10767                 }
10768         }
10769
10770         return 0;
10771 }
10772
10773 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10774 {
10775         int ret;
10776
10777         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10778         if (ret)
10779                 return ret;
10780
10781         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10782 }
10783
10784 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10785 {
10786         struct btrfs_block_group_cache *cache;
10787         struct btrfs_path path;
10788         struct extent_buffer *leaf;
10789         struct btrfs_chunk *chunk;
10790         struct btrfs_key key;
10791         int ret;
10792         u64 start;
10793
10794         btrfs_init_path(&path);
10795         key.objectid = 0;
10796         key.type = BTRFS_CHUNK_ITEM_KEY;
10797         key.offset = 0;
10798         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10799         if (ret < 0) {
10800                 btrfs_release_path(&path);
10801                 return ret;
10802         }
10803
10804         /*
10805          * We do this in case the block groups were screwed up and had alloc
10806          * bits that aren't actually set on the chunks.  This happens with
10807          * restored images every time and could happen in real life I guess.
10808          */
10809         fs_info->avail_data_alloc_bits = 0;
10810         fs_info->avail_metadata_alloc_bits = 0;
10811         fs_info->avail_system_alloc_bits = 0;
10812
10813         /* First we need to create the in-memory block groups */
10814         while (1) {
10815                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10816                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10817                         if (ret < 0) {
10818                                 btrfs_release_path(&path);
10819                                 return ret;
10820                         }
10821                         if (ret) {
10822                                 ret = 0;
10823                                 break;
10824                         }
10825                 }
10826                 leaf = path.nodes[0];
10827                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10828                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10829                         path.slots[0]++;
10830                         continue;
10831                 }
10832
10833                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10834                 btrfs_add_block_group(fs_info, 0,
10835                                       btrfs_chunk_type(leaf, chunk),
10836                                       key.objectid, key.offset,
10837                                       btrfs_chunk_length(leaf, chunk));
10838                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10839                                  key.offset + btrfs_chunk_length(leaf, chunk),
10840                                  GFP_NOFS);
10841                 path.slots[0]++;
10842         }
10843         start = 0;
10844         while (1) {
10845                 cache = btrfs_lookup_first_block_group(fs_info, start);
10846                 if (!cache)
10847                         break;
10848                 cache->cached = 1;
10849                 start = cache->key.objectid + cache->key.offset;
10850         }
10851
10852         btrfs_release_path(&path);
10853         return 0;
10854 }
10855
10856 static int reset_balance(struct btrfs_trans_handle *trans,
10857                          struct btrfs_fs_info *fs_info)
10858 {
10859         struct btrfs_root *root = fs_info->tree_root;
10860         struct btrfs_path path;
10861         struct extent_buffer *leaf;
10862         struct btrfs_key key;
10863         int del_slot, del_nr = 0;
10864         int ret;
10865         int found = 0;
10866
10867         btrfs_init_path(&path);
10868         key.objectid = BTRFS_BALANCE_OBJECTID;
10869         key.type = BTRFS_BALANCE_ITEM_KEY;
10870         key.offset = 0;
10871         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10872         if (ret) {
10873                 if (ret > 0)
10874                         ret = 0;
10875                 if (!ret)
10876                         goto reinit_data_reloc;
10877                 else
10878                         goto out;
10879         }
10880
10881         ret = btrfs_del_item(trans, root, &path);
10882         if (ret)
10883                 goto out;
10884         btrfs_release_path(&path);
10885
10886         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10887         key.type = BTRFS_ROOT_ITEM_KEY;
10888         key.offset = 0;
10889         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10890         if (ret < 0)
10891                 goto out;
10892         while (1) {
10893                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10894                         if (!found)
10895                                 break;
10896
10897                         if (del_nr) {
10898                                 ret = btrfs_del_items(trans, root, &path,
10899                                                       del_slot, del_nr);
10900                                 del_nr = 0;
10901                                 if (ret)
10902                                         goto out;
10903                         }
10904                         key.offset++;
10905                         btrfs_release_path(&path);
10906
10907                         found = 0;
10908                         ret = btrfs_search_slot(trans, root, &key, &path,
10909                                                 -1, 1);
10910                         if (ret < 0)
10911                                 goto out;
10912                         continue;
10913                 }
10914                 found = 1;
10915                 leaf = path.nodes[0];
10916                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10917                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10918                         break;
10919                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10920                         path.slots[0]++;
10921                         continue;
10922                 }
10923                 if (!del_nr) {
10924                         del_slot = path.slots[0];
10925                         del_nr = 1;
10926                 } else {
10927                         del_nr++;
10928                 }
10929                 path.slots[0]++;
10930         }
10931
10932         if (del_nr) {
10933                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
10934                 if (ret)
10935                         goto out;
10936         }
10937         btrfs_release_path(&path);
10938
10939 reinit_data_reloc:
10940         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10941         key.type = BTRFS_ROOT_ITEM_KEY;
10942         key.offset = (u64)-1;
10943         root = btrfs_read_fs_root(fs_info, &key);
10944         if (IS_ERR(root)) {
10945                 fprintf(stderr, "Error reading data reloc tree\n");
10946                 ret = PTR_ERR(root);
10947                 goto out;
10948         }
10949         record_root_in_trans(trans, root);
10950         ret = btrfs_fsck_reinit_root(trans, root, 0);
10951         if (ret)
10952                 goto out;
10953         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10954 out:
10955         btrfs_release_path(&path);
10956         return ret;
10957 }
10958
10959 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10960                               struct btrfs_fs_info *fs_info)
10961 {
10962         u64 start = 0;
10963         int ret;
10964
10965         /*
10966          * The only reason we don't do this is because right now we're just
10967          * walking the trees we find and pinning down their bytes, we don't look
10968          * at any of the leaves.  In order to do mixed groups we'd have to check
10969          * the leaves of any fs roots and pin down the bytes for any file
10970          * extents we find.  Not hard but why do it if we don't have to?
10971          */
10972         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10973                 fprintf(stderr, "We don't support re-initing the extent tree "
10974                         "for mixed block groups yet, please notify a btrfs "
10975                         "developer you want to do this so they can add this "
10976                         "functionality.\n");
10977                 return -EINVAL;
10978         }
10979
10980         /*
10981          * first we need to walk all of the trees except the extent tree and pin
10982          * down the bytes that are in use so we don't overwrite any existing
10983          * metadata.
10984          */
10985         ret = pin_metadata_blocks(fs_info);
10986         if (ret) {
10987                 fprintf(stderr, "error pinning down used bytes\n");
10988                 return ret;
10989         }
10990
10991         /*
10992          * Need to drop all the block groups since we're going to recreate all
10993          * of them again.
10994          */
10995         btrfs_free_block_groups(fs_info);
10996         ret = reset_block_groups(fs_info);
10997         if (ret) {
10998                 fprintf(stderr, "error resetting the block groups\n");
10999                 return ret;
11000         }
11001
11002         /* Ok we can allocate now, reinit the extent root */
11003         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11004         if (ret) {
11005                 fprintf(stderr, "extent root initialization failed\n");
11006                 /*
11007                  * When the transaction code is updated we should end the
11008                  * transaction, but for now progs only knows about commit so
11009                  * just return an error.
11010                  */
11011                 return ret;
11012         }
11013
11014         /*
11015          * Now we have all the in-memory block groups setup so we can make
11016          * allocations properly, and the metadata we care about is safe since we
11017          * pinned all of it above.
11018          */
11019         while (1) {
11020                 struct btrfs_block_group_cache *cache;
11021
11022                 cache = btrfs_lookup_first_block_group(fs_info, start);
11023                 if (!cache)
11024                         break;
11025                 start = cache->key.objectid + cache->key.offset;
11026                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11027                                         &cache->key, &cache->item,
11028                                         sizeof(cache->item));
11029                 if (ret) {
11030                         fprintf(stderr, "Error adding block group\n");
11031                         return ret;
11032                 }
11033                 btrfs_extent_post_op(trans, fs_info->extent_root);
11034         }
11035
11036         ret = reset_balance(trans, fs_info);
11037         if (ret)
11038                 fprintf(stderr, "error resetting the pending balance\n");
11039
11040         return ret;
11041 }
11042
11043 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11044 {
11045         struct btrfs_path path;
11046         struct btrfs_trans_handle *trans;
11047         struct btrfs_key key;
11048         int ret;
11049
11050         printf("Recowing metadata block %llu\n", eb->start);
11051         key.objectid = btrfs_header_owner(eb);
11052         key.type = BTRFS_ROOT_ITEM_KEY;
11053         key.offset = (u64)-1;
11054
11055         root = btrfs_read_fs_root(root->fs_info, &key);
11056         if (IS_ERR(root)) {
11057                 fprintf(stderr, "Couldn't find owner root %llu\n",
11058                         key.objectid);
11059                 return PTR_ERR(root);
11060         }
11061
11062         trans = btrfs_start_transaction(root, 1);
11063         if (IS_ERR(trans))
11064                 return PTR_ERR(trans);
11065
11066         btrfs_init_path(&path);
11067         path.lowest_level = btrfs_header_level(eb);
11068         if (path.lowest_level)
11069                 btrfs_node_key_to_cpu(eb, &key, 0);
11070         else
11071                 btrfs_item_key_to_cpu(eb, &key, 0);
11072
11073         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11074         btrfs_commit_transaction(trans, root);
11075         btrfs_release_path(&path);
11076         return ret;
11077 }
11078
11079 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11080 {
11081         struct btrfs_path path;
11082         struct btrfs_trans_handle *trans;
11083         struct btrfs_key key;
11084         int ret;
11085
11086         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11087                bad->key.type, bad->key.offset);
11088         key.objectid = bad->root_id;
11089         key.type = BTRFS_ROOT_ITEM_KEY;
11090         key.offset = (u64)-1;
11091
11092         root = btrfs_read_fs_root(root->fs_info, &key);
11093         if (IS_ERR(root)) {
11094                 fprintf(stderr, "Couldn't find owner root %llu\n",
11095                         key.objectid);
11096                 return PTR_ERR(root);
11097         }
11098
11099         trans = btrfs_start_transaction(root, 1);
11100         if (IS_ERR(trans))
11101                 return PTR_ERR(trans);
11102
11103         btrfs_init_path(&path);
11104         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11105         if (ret) {
11106                 if (ret > 0)
11107                         ret = 0;
11108                 goto out;
11109         }
11110         ret = btrfs_del_item(trans, root, &path);
11111 out:
11112         btrfs_commit_transaction(trans, root);
11113         btrfs_release_path(&path);
11114         return ret;
11115 }
11116
11117 static int zero_log_tree(struct btrfs_root *root)
11118 {
11119         struct btrfs_trans_handle *trans;
11120         int ret;
11121
11122         trans = btrfs_start_transaction(root, 1);
11123         if (IS_ERR(trans)) {
11124                 ret = PTR_ERR(trans);
11125                 return ret;
11126         }
11127         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11128         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11129         ret = btrfs_commit_transaction(trans, root);
11130         return ret;
11131 }
11132
11133 static int populate_csum(struct btrfs_trans_handle *trans,
11134                          struct btrfs_root *csum_root, char *buf, u64 start,
11135                          u64 len)
11136 {
11137         u64 offset = 0;
11138         u64 sectorsize;
11139         int ret = 0;
11140
11141         while (offset < len) {
11142                 sectorsize = csum_root->sectorsize;
11143                 ret = read_extent_data(csum_root, buf, start + offset,
11144                                        &sectorsize, 0);
11145                 if (ret)
11146                         break;
11147                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11148                                             start + offset, buf, sectorsize);
11149                 if (ret)
11150                         break;
11151                 offset += sectorsize;
11152         }
11153         return ret;
11154 }
11155
11156 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11157                                       struct btrfs_root *csum_root,
11158                                       struct btrfs_root *cur_root)
11159 {
11160         struct btrfs_path path;
11161         struct btrfs_key key;
11162         struct extent_buffer *node;
11163         struct btrfs_file_extent_item *fi;
11164         char *buf = NULL;
11165         u64 start = 0;
11166         u64 len = 0;
11167         int slot = 0;
11168         int ret = 0;
11169
11170         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11171         if (!buf)
11172                 return -ENOMEM;
11173
11174         btrfs_init_path(&path);
11175         key.objectid = 0;
11176         key.offset = 0;
11177         key.type = 0;
11178         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11179         if (ret < 0)
11180                 goto out;
11181         /* Iterate all regular file extents and fill its csum */
11182         while (1) {
11183                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11184
11185                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11186                         goto next;
11187                 node = path.nodes[0];
11188                 slot = path.slots[0];
11189                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11190                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11191                         goto next;
11192                 start = btrfs_file_extent_disk_bytenr(node, fi);
11193                 len = btrfs_file_extent_disk_num_bytes(node, fi);
11194
11195                 ret = populate_csum(trans, csum_root, buf, start, len);
11196                 if (ret == -EEXIST)
11197                         ret = 0;
11198                 if (ret < 0)
11199                         goto out;
11200 next:
11201                 /*
11202                  * TODO: if next leaf is corrupted, jump to nearest next valid
11203                  * leaf.
11204                  */
11205                 ret = btrfs_next_item(cur_root, &path);
11206                 if (ret < 0)
11207                         goto out;
11208                 if (ret > 0) {
11209                         ret = 0;
11210                         goto out;
11211                 }
11212         }
11213
11214 out:
11215         btrfs_release_path(&path);
11216         free(buf);
11217         return ret;
11218 }
11219
11220 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11221                                   struct btrfs_root *csum_root)
11222 {
11223         struct btrfs_fs_info *fs_info = csum_root->fs_info;
11224         struct btrfs_path path;
11225         struct btrfs_root *tree_root = fs_info->tree_root;
11226         struct btrfs_root *cur_root;
11227         struct extent_buffer *node;
11228         struct btrfs_key key;
11229         int slot = 0;
11230         int ret = 0;
11231
11232         btrfs_init_path(&path);
11233         key.objectid = BTRFS_FS_TREE_OBJECTID;
11234         key.offset = 0;
11235         key.type = BTRFS_ROOT_ITEM_KEY;
11236         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11237         if (ret < 0)
11238                 goto out;
11239         if (ret > 0) {
11240                 ret = -ENOENT;
11241                 goto out;
11242         }
11243
11244         while (1) {
11245                 node = path.nodes[0];
11246                 slot = path.slots[0];
11247                 btrfs_item_key_to_cpu(node, &key, slot);
11248                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11249                         goto out;
11250                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11251                         goto next;
11252                 if (!is_fstree(key.objectid))
11253                         goto next;
11254                 key.offset = (u64)-1;
11255
11256                 cur_root = btrfs_read_fs_root(fs_info, &key);
11257                 if (IS_ERR(cur_root) || !cur_root) {
11258                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11259                                 key.objectid);
11260                         goto out;
11261                 }
11262                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11263                                 cur_root);
11264                 if (ret < 0)
11265                         goto out;
11266 next:
11267                 ret = btrfs_next_item(tree_root, &path);
11268                 if (ret > 0) {
11269                         ret = 0;
11270                         goto out;
11271                 }
11272                 if (ret < 0)
11273                         goto out;
11274         }
11275
11276 out:
11277         btrfs_release_path(&path);
11278         return ret;
11279 }
11280
11281 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11282                                       struct btrfs_root *csum_root)
11283 {
11284         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11285         struct btrfs_path path;
11286         struct btrfs_extent_item *ei;
11287         struct extent_buffer *leaf;
11288         char *buf;
11289         struct btrfs_key key;
11290         int ret;
11291
11292         btrfs_init_path(&path);
11293         key.objectid = 0;
11294         key.type = BTRFS_EXTENT_ITEM_KEY;
11295         key.offset = 0;
11296         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11297         if (ret < 0) {
11298                 btrfs_release_path(&path);
11299                 return ret;
11300         }
11301
11302         buf = malloc(csum_root->sectorsize);
11303         if (!buf) {
11304                 btrfs_release_path(&path);
11305                 return -ENOMEM;
11306         }
11307
11308         while (1) {
11309                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11310                         ret = btrfs_next_leaf(extent_root, &path);
11311                         if (ret < 0)
11312                                 break;
11313                         if (ret) {
11314                                 ret = 0;
11315                                 break;
11316                         }
11317                 }
11318                 leaf = path.nodes[0];
11319
11320                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11321                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11322                         path.slots[0]++;
11323                         continue;
11324                 }
11325
11326                 ei = btrfs_item_ptr(leaf, path.slots[0],
11327                                     struct btrfs_extent_item);
11328                 if (!(btrfs_extent_flags(leaf, ei) &
11329                       BTRFS_EXTENT_FLAG_DATA)) {
11330                         path.slots[0]++;
11331                         continue;
11332                 }
11333
11334                 ret = populate_csum(trans, csum_root, buf, key.objectid,
11335                                     key.offset);
11336                 if (ret)
11337                         break;
11338                 path.slots[0]++;
11339         }
11340
11341         btrfs_release_path(&path);
11342         free(buf);
11343         return ret;
11344 }
11345
11346 /*
11347  * Recalculate the csum and put it into the csum tree.
11348  *
11349  * Extent tree init will wipe out all the extent info, so in that case, we
11350  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
11351  * will use fs/subvol trees to init the csum tree.
11352  */
11353 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11354                           struct btrfs_root *csum_root,
11355                           int search_fs_tree)
11356 {
11357         if (search_fs_tree)
11358                 return fill_csum_tree_from_fs(trans, csum_root);
11359         else
11360                 return fill_csum_tree_from_extent(trans, csum_root);
11361 }
11362
11363 static void free_roots_info_cache(void)
11364 {
11365         if (!roots_info_cache)
11366                 return;
11367
11368         while (!cache_tree_empty(roots_info_cache)) {
11369                 struct cache_extent *entry;
11370                 struct root_item_info *rii;
11371
11372                 entry = first_cache_extent(roots_info_cache);
11373                 if (!entry)
11374                         break;
11375                 remove_cache_extent(roots_info_cache, entry);
11376                 rii = container_of(entry, struct root_item_info, cache_extent);
11377                 free(rii);
11378         }
11379
11380         free(roots_info_cache);
11381         roots_info_cache = NULL;
11382 }
11383
11384 static int build_roots_info_cache(struct btrfs_fs_info *info)
11385 {
11386         int ret = 0;
11387         struct btrfs_key key;
11388         struct extent_buffer *leaf;
11389         struct btrfs_path path;
11390
11391         if (!roots_info_cache) {
11392                 roots_info_cache = malloc(sizeof(*roots_info_cache));
11393                 if (!roots_info_cache)
11394                         return -ENOMEM;
11395                 cache_tree_init(roots_info_cache);
11396         }
11397
11398         btrfs_init_path(&path);
11399         key.objectid = 0;
11400         key.type = BTRFS_EXTENT_ITEM_KEY;
11401         key.offset = 0;
11402         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11403         if (ret < 0)
11404                 goto out;
11405         leaf = path.nodes[0];
11406
11407         while (1) {
11408                 struct btrfs_key found_key;
11409                 struct btrfs_extent_item *ei;
11410                 struct btrfs_extent_inline_ref *iref;
11411                 int slot = path.slots[0];
11412                 int type;
11413                 u64 flags;
11414                 u64 root_id;
11415                 u8 level;
11416                 struct cache_extent *entry;
11417                 struct root_item_info *rii;
11418
11419                 if (slot >= btrfs_header_nritems(leaf)) {
11420                         ret = btrfs_next_leaf(info->extent_root, &path);
11421                         if (ret < 0) {
11422                                 break;
11423                         } else if (ret) {
11424                                 ret = 0;
11425                                 break;
11426                         }
11427                         leaf = path.nodes[0];
11428                         slot = path.slots[0];
11429                 }
11430
11431                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11432
11433                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11434                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11435                         goto next;
11436
11437                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11438                 flags = btrfs_extent_flags(leaf, ei);
11439
11440                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11441                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11442                         goto next;
11443
11444                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11445                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11446                         level = found_key.offset;
11447                 } else {
11448                         struct btrfs_tree_block_info *binfo;
11449
11450                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11451                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11452                         level = btrfs_tree_block_level(leaf, binfo);
11453                 }
11454
11455                 /*
11456                  * For a root extent, it must be of the following type and the
11457                  * first (and only one) iref in the item.
11458                  */
11459                 type = btrfs_extent_inline_ref_type(leaf, iref);
11460                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11461                         goto next;
11462
11463                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11464                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11465                 if (!entry) {
11466                         rii = malloc(sizeof(struct root_item_info));
11467                         if (!rii) {
11468                                 ret = -ENOMEM;
11469                                 goto out;
11470                         }
11471                         rii->cache_extent.start = root_id;
11472                         rii->cache_extent.size = 1;
11473                         rii->level = (u8)-1;
11474                         entry = &rii->cache_extent;
11475                         ret = insert_cache_extent(roots_info_cache, entry);
11476                         ASSERT(ret == 0);
11477                 } else {
11478                         rii = container_of(entry, struct root_item_info,
11479                                            cache_extent);
11480                 }
11481
11482                 ASSERT(rii->cache_extent.start == root_id);
11483                 ASSERT(rii->cache_extent.size == 1);
11484
11485                 if (level > rii->level || rii->level == (u8)-1) {
11486                         rii->level = level;
11487                         rii->bytenr = found_key.objectid;
11488                         rii->gen = btrfs_extent_generation(leaf, ei);
11489                         rii->node_count = 1;
11490                 } else if (level == rii->level) {
11491                         rii->node_count++;
11492                 }
11493 next:
11494                 path.slots[0]++;
11495         }
11496
11497 out:
11498         btrfs_release_path(&path);
11499
11500         return ret;
11501 }
11502
11503 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11504                                   struct btrfs_path *path,
11505                                   const struct btrfs_key *root_key,
11506                                   const int read_only_mode)
11507 {
11508         const u64 root_id = root_key->objectid;
11509         struct cache_extent *entry;
11510         struct root_item_info *rii;
11511         struct btrfs_root_item ri;
11512         unsigned long offset;
11513
11514         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11515         if (!entry) {
11516                 fprintf(stderr,
11517                         "Error: could not find extent items for root %llu\n",
11518                         root_key->objectid);
11519                 return -ENOENT;
11520         }
11521
11522         rii = container_of(entry, struct root_item_info, cache_extent);
11523         ASSERT(rii->cache_extent.start == root_id);
11524         ASSERT(rii->cache_extent.size == 1);
11525
11526         if (rii->node_count != 1) {
11527                 fprintf(stderr,
11528                         "Error: could not find btree root extent for root %llu\n",
11529                         root_id);
11530                 return -ENOENT;
11531         }
11532
11533         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11534         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11535
11536         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11537             btrfs_root_level(&ri) != rii->level ||
11538             btrfs_root_generation(&ri) != rii->gen) {
11539
11540                 /*
11541                  * If we're in repair mode but our caller told us to not update
11542                  * the root item, i.e. just check if it needs to be updated, don't
11543                  * print this message, since the caller will call us again shortly
11544                  * for the same root item without read only mode (the caller will
11545                  * open a transaction first).
11546                  */
11547                 if (!(read_only_mode && repair))
11548                         fprintf(stderr,
11549                                 "%sroot item for root %llu,"
11550                                 " current bytenr %llu, current gen %llu, current level %u,"
11551                                 " new bytenr %llu, new gen %llu, new level %u\n",
11552                                 (read_only_mode ? "" : "fixing "),
11553                                 root_id,
11554                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11555                                 btrfs_root_level(&ri),
11556                                 rii->bytenr, rii->gen, rii->level);
11557
11558                 if (btrfs_root_generation(&ri) > rii->gen) {
11559                         fprintf(stderr,
11560                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11561                                 root_id, btrfs_root_generation(&ri), rii->gen);
11562                         return -EINVAL;
11563                 }
11564
11565                 if (!read_only_mode) {
11566                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11567                         btrfs_set_root_level(&ri, rii->level);
11568                         btrfs_set_root_generation(&ri, rii->gen);
11569                         write_extent_buffer(path->nodes[0], &ri,
11570                                             offset, sizeof(ri));
11571                 }
11572
11573                 return 1;
11574         }
11575
11576         return 0;
11577 }
11578
11579 /*
11580  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11581  * caused read-only snapshots to be corrupted if they were created at a moment
11582  * when the source subvolume/snapshot had orphan items. The issue was that the
11583  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11584  * node instead of the post orphan cleanup root node.
11585  * So this function, and its callees, just detects and fixes those cases. Even
11586  * though the regression was for read-only snapshots, this function applies to
11587  * any snapshot/subvolume root.
11588  * This must be run before any other repair code - not doing it so, makes other
11589  * repair code delete or modify backrefs in the extent tree for example, which
11590  * will result in an inconsistent fs after repairing the root items.
11591  */
11592 static int repair_root_items(struct btrfs_fs_info *info)
11593 {
11594         struct btrfs_path path;
11595         struct btrfs_key key;
11596         struct extent_buffer *leaf;
11597         struct btrfs_trans_handle *trans = NULL;
11598         int ret = 0;
11599         int bad_roots = 0;
11600         int need_trans = 0;
11601
11602         btrfs_init_path(&path);
11603
11604         ret = build_roots_info_cache(info);
11605         if (ret)
11606                 goto out;
11607
11608         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11609         key.type = BTRFS_ROOT_ITEM_KEY;
11610         key.offset = 0;
11611
11612 again:
11613         /*
11614          * Avoid opening and committing transactions if a leaf doesn't have
11615          * any root items that need to be fixed, so that we avoid rotating
11616          * backup roots unnecessarily.
11617          */
11618         if (need_trans) {
11619                 trans = btrfs_start_transaction(info->tree_root, 1);
11620                 if (IS_ERR(trans)) {
11621                         ret = PTR_ERR(trans);
11622                         goto out;
11623                 }
11624         }
11625
11626         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11627                                 0, trans ? 1 : 0);
11628         if (ret < 0)
11629                 goto out;
11630         leaf = path.nodes[0];
11631
11632         while (1) {
11633                 struct btrfs_key found_key;
11634
11635                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11636                         int no_more_keys = find_next_key(&path, &key);
11637
11638                         btrfs_release_path(&path);
11639                         if (trans) {
11640                                 ret = btrfs_commit_transaction(trans,
11641                                                                info->tree_root);
11642                                 trans = NULL;
11643                                 if (ret < 0)
11644                                         goto out;
11645                         }
11646                         need_trans = 0;
11647                         if (no_more_keys)
11648                                 break;
11649                         goto again;
11650                 }
11651
11652                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11653
11654                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11655                         goto next;
11656                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11657                         goto next;
11658
11659                 ret = maybe_repair_root_item(info, &path, &found_key,
11660                                              trans ? 0 : 1);
11661                 if (ret < 0)
11662                         goto out;
11663                 if (ret) {
11664                         if (!trans && repair) {
11665                                 need_trans = 1;
11666                                 key = found_key;
11667                                 btrfs_release_path(&path);
11668                                 goto again;
11669                         }
11670                         bad_roots++;
11671                 }
11672 next:
11673                 path.slots[0]++;
11674         }
11675         ret = 0;
11676 out:
11677         free_roots_info_cache();
11678         btrfs_release_path(&path);
11679         if (trans)
11680                 btrfs_commit_transaction(trans, info->tree_root);
11681         if (ret < 0)
11682                 return ret;
11683
11684         return bad_roots;
11685 }
11686
11687 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11688 {
11689         struct btrfs_trans_handle *trans;
11690         struct btrfs_block_group_cache *bg_cache;
11691         u64 current = 0;
11692         int ret = 0;
11693
11694         /* Clear all free space cache inodes and its extent data */
11695         while (1) {
11696                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11697                 if (!bg_cache)
11698                         break;
11699                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11700                 if (ret < 0)
11701                         return ret;
11702                 current = bg_cache->key.objectid + bg_cache->key.offset;
11703         }
11704
11705         /* Don't forget to set cache_generation to -1 */
11706         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11707         if (IS_ERR(trans)) {
11708                 error("failed to update super block cache generation");
11709                 return PTR_ERR(trans);
11710         }
11711         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11712         btrfs_commit_transaction(trans, fs_info->tree_root);
11713
11714         return ret;
11715 }
11716
11717 const char * const cmd_check_usage[] = {
11718         "btrfs check [options] <device>",
11719         "Check structural integrity of a filesystem (unmounted).",
11720         "Check structural integrity of an unmounted filesystem. Verify internal",
11721         "trees' consistency and item connectivity. In the repair mode try to",
11722         "fix the problems found. ",
11723         "WARNING: the repair mode is considered dangerous",
11724         "",
11725         "-s|--super <superblock>     use this superblock copy",
11726         "-b|--backup                 use the first valid backup root copy",
11727         "--repair                    try to repair the filesystem",
11728         "--readonly                  run in read-only mode (default)",
11729         "--init-csum-tree            create a new CRC tree",
11730         "--init-extent-tree          create a new extent tree",
11731         "--mode <MODE>               allows choice of memory/IO trade-offs",
11732         "                            where MODE is one of:",
11733         "                            original - read inodes and extents to memory (requires",
11734         "                                       more memory, does less IO)",
11735         "                            lowmem   - try to use less memory but read blocks again",
11736         "                                       when needed",
11737         "--check-data-csum           verify checksums of data blocks",
11738         "-Q|--qgroup-report          print a report on qgroup consistency",
11739         "-E|--subvol-extents <subvolid>",
11740         "                            print subvolume extents and sharing state",
11741         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11742         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11743         "-p|--progress               indicate progress",
11744         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11745         NULL
11746 };
11747
11748 int cmd_check(int argc, char **argv)
11749 {
11750         struct cache_tree root_cache;
11751         struct btrfs_root *root;
11752         struct btrfs_fs_info *info;
11753         u64 bytenr = 0;
11754         u64 subvolid = 0;
11755         u64 tree_root_bytenr = 0;
11756         u64 chunk_root_bytenr = 0;
11757         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11758         int ret;
11759         u64 num;
11760         int init_csum_tree = 0;
11761         int readonly = 0;
11762         int clear_space_cache = 0;
11763         int qgroup_report = 0;
11764         int qgroups_repaired = 0;
11765         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11766
11767         while(1) {
11768                 int c;
11769                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11770                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11771                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11772                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11773                 static const struct option long_options[] = {
11774                         { "super", required_argument, NULL, 's' },
11775                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11776                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11777                         { "init-csum-tree", no_argument, NULL,
11778                                 GETOPT_VAL_INIT_CSUM },
11779                         { "init-extent-tree", no_argument, NULL,
11780                                 GETOPT_VAL_INIT_EXTENT },
11781                         { "check-data-csum", no_argument, NULL,
11782                                 GETOPT_VAL_CHECK_CSUM },
11783                         { "backup", no_argument, NULL, 'b' },
11784                         { "subvol-extents", required_argument, NULL, 'E' },
11785                         { "qgroup-report", no_argument, NULL, 'Q' },
11786                         { "tree-root", required_argument, NULL, 'r' },
11787                         { "chunk-root", required_argument, NULL,
11788                                 GETOPT_VAL_CHUNK_TREE },
11789                         { "progress", no_argument, NULL, 'p' },
11790                         { "mode", required_argument, NULL,
11791                                 GETOPT_VAL_MODE },
11792                         { "clear-space-cache", required_argument, NULL,
11793                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11794                         { NULL, 0, NULL, 0}
11795                 };
11796
11797                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11798                 if (c < 0)
11799                         break;
11800                 switch(c) {
11801                         case 'a': /* ignored */ break;
11802                         case 'b':
11803                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11804                                 break;
11805                         case 's':
11806                                 num = arg_strtou64(optarg);
11807                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11808                                         error(
11809                                         "super mirror should be less than %d",
11810                                                 BTRFS_SUPER_MIRROR_MAX);
11811                                         exit(1);
11812                                 }
11813                                 bytenr = btrfs_sb_offset(((int)num));
11814                                 printf("using SB copy %llu, bytenr %llu\n", num,
11815                                        (unsigned long long)bytenr);
11816                                 break;
11817                         case 'Q':
11818                                 qgroup_report = 1;
11819                                 break;
11820                         case 'E':
11821                                 subvolid = arg_strtou64(optarg);
11822                                 break;
11823                         case 'r':
11824                                 tree_root_bytenr = arg_strtou64(optarg);
11825                                 break;
11826                         case GETOPT_VAL_CHUNK_TREE:
11827                                 chunk_root_bytenr = arg_strtou64(optarg);
11828                                 break;
11829                         case 'p':
11830                                 ctx.progress_enabled = true;
11831                                 break;
11832                         case '?':
11833                         case 'h':
11834                                 usage(cmd_check_usage);
11835                         case GETOPT_VAL_REPAIR:
11836                                 printf("enabling repair mode\n");
11837                                 repair = 1;
11838                                 ctree_flags |= OPEN_CTREE_WRITES;
11839                                 break;
11840                         case GETOPT_VAL_READONLY:
11841                                 readonly = 1;
11842                                 break;
11843                         case GETOPT_VAL_INIT_CSUM:
11844                                 printf("Creating a new CRC tree\n");
11845                                 init_csum_tree = 1;
11846                                 repair = 1;
11847                                 ctree_flags |= OPEN_CTREE_WRITES;
11848                                 break;
11849                         case GETOPT_VAL_INIT_EXTENT:
11850                                 init_extent_tree = 1;
11851                                 ctree_flags |= (OPEN_CTREE_WRITES |
11852                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11853                                 repair = 1;
11854                                 break;
11855                         case GETOPT_VAL_CHECK_CSUM:
11856                                 check_data_csum = 1;
11857                                 break;
11858                         case GETOPT_VAL_MODE:
11859                                 check_mode = parse_check_mode(optarg);
11860                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11861                                         error("unknown mode: %s", optarg);
11862                                         exit(1);
11863                                 }
11864                                 break;
11865                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11866                                 if (strcmp(optarg, "v1") == 0) {
11867                                         clear_space_cache = 1;
11868                                 } else if (strcmp(optarg, "v2") == 0) {
11869                                         clear_space_cache = 2;
11870                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11871                                 } else {
11872                                         error(
11873                 "invalid argument to --clear-space-cache, must be v1 or v2");
11874                                         exit(1);
11875                                 }
11876                                 ctree_flags |= OPEN_CTREE_WRITES;
11877                                 break;
11878                 }
11879         }
11880
11881         if (check_argc_exact(argc - optind, 1))
11882                 usage(cmd_check_usage);
11883
11884         if (ctx.progress_enabled) {
11885                 ctx.tp = TASK_NOTHING;
11886                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11887         }
11888
11889         /* This check is the only reason for --readonly to exist */
11890         if (readonly && repair) {
11891                 error("repair options are not compatible with --readonly");
11892                 exit(1);
11893         }
11894
11895         /*
11896          * Not supported yet
11897          */
11898         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11899                 error("low memory mode doesn't support repair yet");
11900                 exit(1);
11901         }
11902
11903         radix_tree_init();
11904         cache_tree_init(&root_cache);
11905
11906         if((ret = check_mounted(argv[optind])) < 0) {
11907                 error("could not check mount status: %s", strerror(-ret));
11908                 goto err_out;
11909         } else if(ret) {
11910                 error("%s is currently mounted, aborting", argv[optind]);
11911                 ret = -EBUSY;
11912                 goto err_out;
11913         }
11914
11915         /* only allow partial opening under repair mode */
11916         if (repair)
11917                 ctree_flags |= OPEN_CTREE_PARTIAL;
11918
11919         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11920                                   chunk_root_bytenr, ctree_flags);
11921         if (!info) {
11922                 error("cannot open file system");
11923                 ret = -EIO;
11924                 goto err_out;
11925         }
11926
11927         global_info = info;
11928         root = info->fs_root;
11929         if (clear_space_cache == 1) {
11930                 if (btrfs_fs_compat_ro(info,
11931                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11932                         error(
11933                 "free space cache v2 detected, use --clear-space-cache v2");
11934                         ret = 1;
11935                         goto close_out;
11936                 }
11937                 printf("Clearing free space cache\n");
11938                 ret = clear_free_space_cache(info);
11939                 if (ret) {
11940                         error("failed to clear free space cache");
11941                         ret = 1;
11942                 } else {
11943                         printf("Free space cache cleared\n");
11944                 }
11945                 goto close_out;
11946         } else if (clear_space_cache == 2) {
11947                 if (!btrfs_fs_compat_ro(info,
11948                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11949                         printf("no free space cache v2 to clear\n");
11950                         ret = 0;
11951                         goto close_out;
11952                 }
11953                 printf("Clear free space cache v2\n");
11954                 ret = btrfs_clear_free_space_tree(info);
11955                 if (ret) {
11956                         error("failed to clear free space cache v2: %d", ret);
11957                         ret = 1;
11958                 } else {
11959                         printf("free space cache v2 cleared\n");
11960                 }
11961                 goto close_out;
11962         }
11963
11964         /*
11965          * repair mode will force us to commit transaction which
11966          * will make us fail to load log tree when mounting.
11967          */
11968         if (repair && btrfs_super_log_root(info->super_copy)) {
11969                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11970                 if (!ret) {
11971                         ret = 1;
11972                         goto close_out;
11973                 }
11974                 ret = zero_log_tree(root);
11975                 if (ret) {
11976                         error("failed to zero log tree: %d", ret);
11977                         goto close_out;
11978                 }
11979         }
11980
11981         uuid_unparse(info->super_copy->fsid, uuidbuf);
11982         if (qgroup_report) {
11983                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11984                        uuidbuf);
11985                 ret = qgroup_verify_all(info);
11986                 if (ret == 0)
11987                         report_qgroups(1);
11988                 goto close_out;
11989         }
11990         if (subvolid) {
11991                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11992                        subvolid, argv[optind], uuidbuf);
11993                 ret = print_extent_state(info, subvolid);
11994                 goto close_out;
11995         }
11996         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11997
11998         if (!extent_buffer_uptodate(info->tree_root->node) ||
11999             !extent_buffer_uptodate(info->dev_root->node) ||
12000             !extent_buffer_uptodate(info->chunk_root->node)) {
12001                 error("critical roots corrupted, unable to check the filesystem");
12002                 ret = -EIO;
12003                 goto close_out;
12004         }
12005
12006         if (init_extent_tree || init_csum_tree) {
12007                 struct btrfs_trans_handle *trans;
12008
12009                 trans = btrfs_start_transaction(info->extent_root, 0);
12010                 if (IS_ERR(trans)) {
12011                         error("error starting transaction");
12012                         ret = PTR_ERR(trans);
12013                         goto close_out;
12014                 }
12015
12016                 if (init_extent_tree) {
12017                         printf("Creating a new extent tree\n");
12018                         ret = reinit_extent_tree(trans, info);
12019                         if (ret)
12020                                 goto close_out;
12021                 }
12022
12023                 if (init_csum_tree) {
12024                         printf("Reinitialize checksum tree\n");
12025                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12026                         if (ret) {
12027                                 error("checksum tree initialization failed: %d",
12028                                                 ret);
12029                                 ret = -EIO;
12030                                 goto close_out;
12031                         }
12032
12033                         ret = fill_csum_tree(trans, info->csum_root,
12034                                              init_extent_tree);
12035                         if (ret) {
12036                                 error("checksum tree refilling failed: %d", ret);
12037                                 return -EIO;
12038                         }
12039                 }
12040                 /*
12041                  * Ok now we commit and run the normal fsck, which will add
12042                  * extent entries for all of the items it finds.
12043                  */
12044                 ret = btrfs_commit_transaction(trans, info->extent_root);
12045                 if (ret)
12046                         goto close_out;
12047         }
12048         if (!extent_buffer_uptodate(info->extent_root->node)) {
12049                 error("critical: extent_root, unable to check the filesystem");
12050                 ret = -EIO;
12051                 goto close_out;
12052         }
12053         if (!extent_buffer_uptodate(info->csum_root->node)) {
12054                 error("critical: csum_root, unable to check the filesystem");
12055                 ret = -EIO;
12056                 goto close_out;
12057         }
12058
12059         if (!ctx.progress_enabled)
12060                 fprintf(stderr, "checking extents\n");
12061         if (check_mode == CHECK_MODE_LOWMEM)
12062                 ret = check_chunks_and_extents_v2(root);
12063         else
12064                 ret = check_chunks_and_extents(root);
12065         if (ret)
12066                 error(
12067                 "errors found in extent allocation tree or chunk allocation");
12068
12069         ret = repair_root_items(info);
12070         if (ret < 0)
12071                 goto close_out;
12072         if (repair) {
12073                 fprintf(stderr, "Fixed %d roots.\n", ret);
12074                 ret = 0;
12075         } else if (ret > 0) {
12076                 fprintf(stderr,
12077                        "Found %d roots with an outdated root item.\n",
12078                        ret);
12079                 fprintf(stderr,
12080                         "Please run a filesystem check with the option --repair to fix them.\n");
12081                 ret = 1;
12082                 goto close_out;
12083         }
12084
12085         if (!ctx.progress_enabled) {
12086                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12087                         fprintf(stderr, "checking free space tree\n");
12088                 else
12089                         fprintf(stderr, "checking free space cache\n");
12090         }
12091         ret = check_space_cache(root);
12092         if (ret)
12093                 goto out;
12094
12095         /*
12096          * We used to have to have these hole extents in between our real
12097          * extents so if we don't have this flag set we need to make sure there
12098          * are no gaps in the file extents for inodes, otherwise we can just
12099          * ignore it when this happens.
12100          */
12101         no_holes = btrfs_fs_incompat(root->fs_info,
12102                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12103         if (!ctx.progress_enabled)
12104                 fprintf(stderr, "checking fs roots\n");
12105         ret = check_fs_roots(root, &root_cache);
12106         if (ret)
12107                 goto out;
12108
12109         fprintf(stderr, "checking csums\n");
12110         ret = check_csums(root);
12111         if (ret)
12112                 goto out;
12113
12114         fprintf(stderr, "checking root refs\n");
12115         ret = check_root_refs(root, &root_cache);
12116         if (ret)
12117                 goto out;
12118
12119         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12120                 struct extent_buffer *eb;
12121
12122                 eb = list_first_entry(&root->fs_info->recow_ebs,
12123                                       struct extent_buffer, recow);
12124                 list_del_init(&eb->recow);
12125                 ret = recow_extent_buffer(root, eb);
12126                 if (ret)
12127                         break;
12128         }
12129
12130         while (!list_empty(&delete_items)) {
12131                 struct bad_item *bad;
12132
12133                 bad = list_first_entry(&delete_items, struct bad_item, list);
12134                 list_del_init(&bad->list);
12135                 if (repair)
12136                         ret = delete_bad_item(root, bad);
12137                 free(bad);
12138         }
12139
12140         if (info->quota_enabled) {
12141                 int err;
12142                 fprintf(stderr, "checking quota groups\n");
12143                 err = qgroup_verify_all(info);
12144                 if (err)
12145                         goto out;
12146                 report_qgroups(0);
12147                 err = repair_qgroups(info, &qgroups_repaired);
12148                 if (err)
12149                         goto out;
12150         }
12151
12152         if (!list_empty(&root->fs_info->recow_ebs)) {
12153                 error("transid errors in file system");
12154                 ret = 1;
12155         }
12156 out:
12157         /* Don't override original ret */
12158         if (!ret && qgroups_repaired)
12159                 ret = qgroups_repaired;
12160
12161         if (found_old_backref) { /*
12162                  * there was a disk format change when mixed
12163                  * backref was in testing tree. The old format
12164                  * existed about one week.
12165                  */
12166                 printf("\n * Found old mixed backref format. "
12167                        "The old format is not supported! *"
12168                        "\n * Please mount the FS in readonly mode, "
12169                        "backup data and re-format the FS. *\n\n");
12170                 ret = 1;
12171         }
12172         printf("found %llu bytes used err is %d\n",
12173                (unsigned long long)bytes_used, ret);
12174         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12175         printf("total tree bytes: %llu\n",
12176                (unsigned long long)total_btree_bytes);
12177         printf("total fs tree bytes: %llu\n",
12178                (unsigned long long)total_fs_tree_bytes);
12179         printf("total extent tree bytes: %llu\n",
12180                (unsigned long long)total_extent_tree_bytes);
12181         printf("btree space waste bytes: %llu\n",
12182                (unsigned long long)btree_space_waste);
12183         printf("file data blocks allocated: %llu\n referenced %llu\n",
12184                 (unsigned long long)data_bytes_allocated,
12185                 (unsigned long long)data_bytes_referenced);
12186
12187         free_qgroup_counts();
12188         free_root_recs_tree(&root_cache);
12189 close_out:
12190         close_ctree(root);
12191 err_out:
12192         if (ctx.progress_enabled)
12193                 task_deinit(ctx.info);
12194
12195         return ret;
12196 }