ede612d5505c2cc7bd0a6faa97c543ecbdfaf575
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 {
118         return container_of(back, struct data_backref, node);
119 }
120
121 /*
122  * Much like data_backref, just removed the undetermined members
123  * and change it to use list_head.
124  * During extent scan, it is stored in root->orphan_data_extent.
125  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126  */
127 struct orphan_data_extent {
128         struct list_head list;
129         u64 root;
130         u64 objectid;
131         u64 offset;
132         u64 disk_bytenr;
133         u64 disk_len;
134 };
135
136 struct tree_backref {
137         struct extent_backref node;
138         union {
139                 u64 parent;
140                 u64 root;
141         };
142 };
143
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 {
146         return container_of(back, struct tree_backref, node);
147 }
148
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
151
152 struct extent_record {
153         struct list_head backrefs;
154         struct list_head dups;
155         struct list_head list;
156         struct cache_extent cache;
157         struct btrfs_disk_key parent_key;
158         u64 start;
159         u64 max_size;
160         u64 nr;
161         u64 refs;
162         u64 extent_item_refs;
163         u64 generation;
164         u64 parent_generation;
165         u64 info_objectid;
166         u32 num_duplicates;
167         u8 info_level;
168         unsigned int flag_block_full_backref:2;
169         unsigned int found_rec:1;
170         unsigned int content_checked:1;
171         unsigned int owner_ref_checked:1;
172         unsigned int is_root:1;
173         unsigned int metadata:1;
174         unsigned int bad_full_backref:1;
175         unsigned int crossing_stripes:1;
176         unsigned int wrong_chunk_type:1;
177 };
178
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 {
181         return container_of(entry, struct extent_record, list);
182 }
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         u8 filetype;
190         u8 ref_type;
191         int errors;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 {
200         return list_entry(entry, struct inode_backref, list);
201 }
202
203 struct root_item_record {
204         struct list_head list;
205         u64 objectid;
206         u64 bytenr;
207         u64 last_snapshot;
208         u8 level;
209         u8 drop_level;
210         int level_size;
211         struct btrfs_key drop_key;
212 };
213
214 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
216 #define REF_ERR_NO_INODE_REF            (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
219 #define REF_ERR_DUP_INODE_REF           (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF             (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
227
228 struct file_extent_hole {
229         struct rb_node node;
230         u64 start;
231         u64 len;
232 };
233
234 struct inode_record {
235         struct list_head backrefs;
236         unsigned int checked:1;
237         unsigned int merging:1;
238         unsigned int found_inode_item:1;
239         unsigned int found_dir_item:1;
240         unsigned int found_file_extent:1;
241         unsigned int found_csum_item:1;
242         unsigned int some_csum_missing:1;
243         unsigned int nodatasum:1;
244         int errors;
245
246         u64 ino;
247         u32 nlink;
248         u32 imode;
249         u64 isize;
250         u64 nbytes;
251
252         u32 found_link;
253         u64 found_size;
254         u64 extent_start;
255         u64 extent_end;
256         struct rb_root holes;
257         struct list_head orphan_extents;
258
259         u32 refs;
260 };
261
262 #define I_ERR_NO_INODE_ITEM             (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
277
278 struct root_backref {
279         struct list_head list;
280         unsigned int found_dir_item:1;
281         unsigned int found_dir_index:1;
282         unsigned int found_back_ref:1;
283         unsigned int found_forward_ref:1;
284         unsigned int reachable:1;
285         int errors;
286         u64 ref_root;
287         u64 dir;
288         u64 index;
289         u16 namelen;
290         char name[0];
291 };
292
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 {
295         return list_entry(entry, struct root_backref, list);
296 }
297
298 struct root_record {
299         struct list_head backrefs;
300         struct cache_extent cache;
301         unsigned int found_root_item:1;
302         u64 objectid;
303         u32 found_ref;
304 };
305
306 struct ptr_node {
307         struct cache_extent cache;
308         void *data;
309 };
310
311 struct shared_node {
312         struct cache_extent cache;
313         struct cache_tree root_cache;
314         struct cache_tree inode_cache;
315         struct inode_record *current;
316         u32 refs;
317 };
318
319 struct block_info {
320         u64 start;
321         u32 size;
322 };
323
324 struct walk_control {
325         struct cache_tree shared;
326         struct shared_node *nodes[BTRFS_MAX_LEVEL];
327         int active_node;
328         int root_level;
329 };
330
331 struct bad_item {
332         struct btrfs_key key;
333         u64 root_id;
334         struct list_head list;
335 };
336
337 struct extent_entry {
338         u64 bytenr;
339         u64 bytes;
340         int count;
341         int broken;
342         struct list_head list;
343 };
344
345 struct root_item_info {
346         /* level of the root */
347         u8 level;
348         /* number of nodes at this level, must be 1 for a root */
349         int node_count;
350         u64 bytenr;
351         u64 gen;
352         struct cache_extent cache_extent;
353 };
354
355 /*
356  * Error bit for low memory mode check.
357  *
358  * Currently no caller cares about it yet.  Just internal use for error
359  * classification.
360  */
361 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH     (1 << 8)
371
372 static void *print_status_check(void *p)
373 {
374         struct task_ctx *priv = p;
375         const char work_indicator[] = { '.', 'o', 'O', 'o' };
376         uint32_t count = 0;
377         static char *task_position_string[] = {
378                 "checking extents",
379                 "checking free space cache",
380                 "checking fs roots",
381         };
382
383         task_period_start(priv->info, 1000 /* 1s */);
384
385         if (priv->tp == TASK_NOTHING)
386                 return NULL;
387
388         while (1) {
389                 printf("%s [%c]\r", task_position_string[priv->tp],
390                                 work_indicator[count % 4]);
391                 count++;
392                 fflush(stdout);
393                 task_period_wait(priv->info);
394         }
395         return NULL;
396 }
397
398 static int print_status_return(void *p)
399 {
400         printf("\n");
401         fflush(stdout);
402
403         return 0;
404 }
405
406 static enum btrfs_check_mode parse_check_mode(const char *str)
407 {
408         if (strcmp(str, "lowmem") == 0)
409                 return CHECK_MODE_LOWMEM;
410         if (strcmp(str, "orig") == 0)
411                 return CHECK_MODE_ORIGINAL;
412         if (strcmp(str, "original") == 0)
413                 return CHECK_MODE_ORIGINAL;
414
415         return CHECK_MODE_UNKNOWN;
416 }
417
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
420 {
421         struct file_extent_hole *hole;
422
423         if (RB_EMPTY_ROOT(holes))
424                 return (u64)-1;
425
426         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
427         return hole->start;
428 }
429
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 {
432         struct file_extent_hole *hole1;
433         struct file_extent_hole *hole2;
434
435         hole1 = rb_entry(node1, struct file_extent_hole, node);
436         hole2 = rb_entry(node2, struct file_extent_hole, node);
437
438         if (hole1->start > hole2->start)
439                 return -1;
440         if (hole1->start < hole2->start)
441                 return 1;
442         /* Now hole1->start == hole2->start */
443         if (hole1->len >= hole2->len)
444                 /*
445                  * Hole 1 will be merge center
446                  * Same hole will be merged later
447                  */
448                 return -1;
449         /* Hole 2 will be merge center */
450         return 1;
451 }
452
453 /*
454  * Add a hole to the record
455  *
456  * This will do hole merge for copy_file_extent_holes(),
457  * which will ensure there won't be continuous holes.
458  */
459 static int add_file_extent_hole(struct rb_root *holes,
460                                 u64 start, u64 len)
461 {
462         struct file_extent_hole *hole;
463         struct file_extent_hole *prev = NULL;
464         struct file_extent_hole *next = NULL;
465
466         hole = malloc(sizeof(*hole));
467         if (!hole)
468                 return -ENOMEM;
469         hole->start = start;
470         hole->len = len;
471         /* Since compare will not return 0, no -EEXIST will happen */
472         rb_insert(holes, &hole->node, compare_hole);
473
474         /* simple merge with previous hole */
475         if (rb_prev(&hole->node))
476                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477                                 node);
478         if (prev && prev->start + prev->len >= hole->start) {
479                 hole->len = hole->start + hole->len - prev->start;
480                 hole->start = prev->start;
481                 rb_erase(&prev->node, holes);
482                 free(prev);
483                 prev = NULL;
484         }
485
486         /* iterate merge with next holes */
487         while (1) {
488                 if (!rb_next(&hole->node))
489                         break;
490                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491                                         node);
492                 if (hole->start + hole->len >= next->start) {
493                         if (hole->start + hole->len <= next->start + next->len)
494                                 hole->len = next->start + next->len -
495                                             hole->start;
496                         rb_erase(&next->node, holes);
497                         free(next);
498                         next = NULL;
499                 } else
500                         break;
501         }
502         return 0;
503 }
504
505 static int compare_hole_range(struct rb_node *node, void *data)
506 {
507         struct file_extent_hole *hole;
508         u64 start;
509
510         hole = (struct file_extent_hole *)data;
511         start = hole->start;
512
513         hole = rb_entry(node, struct file_extent_hole, node);
514         if (start < hole->start)
515                 return -1;
516         if (start >= hole->start && start < hole->start + hole->len)
517                 return 0;
518         return 1;
519 }
520
521 /*
522  * Delete a hole in the record
523  *
524  * This will do the hole split and is much restrict than add.
525  */
526 static int del_file_extent_hole(struct rb_root *holes,
527                                 u64 start, u64 len)
528 {
529         struct file_extent_hole *hole;
530         struct file_extent_hole tmp;
531         u64 prev_start = 0;
532         u64 prev_len = 0;
533         u64 next_start = 0;
534         u64 next_len = 0;
535         struct rb_node *node;
536         int have_prev = 0;
537         int have_next = 0;
538         int ret = 0;
539
540         tmp.start = start;
541         tmp.len = len;
542         node = rb_search(holes, &tmp, compare_hole_range, NULL);
543         if (!node)
544                 return -EEXIST;
545         hole = rb_entry(node, struct file_extent_hole, node);
546         if (start + len > hole->start + hole->len)
547                 return -EEXIST;
548
549         /*
550          * Now there will be no overlap, delete the hole and re-add the
551          * split(s) if they exists.
552          */
553         if (start > hole->start) {
554                 prev_start = hole->start;
555                 prev_len = start - hole->start;
556                 have_prev = 1;
557         }
558         if (hole->start + hole->len > start + len) {
559                 next_start = start + len;
560                 next_len = hole->start + hole->len - start - len;
561                 have_next = 1;
562         }
563         rb_erase(node, holes);
564         free(hole);
565         if (have_prev) {
566                 ret = add_file_extent_hole(holes, prev_start, prev_len);
567                 if (ret < 0)
568                         return ret;
569         }
570         if (have_next) {
571                 ret = add_file_extent_hole(holes, next_start, next_len);
572                 if (ret < 0)
573                         return ret;
574         }
575         return 0;
576 }
577
578 static int copy_file_extent_holes(struct rb_root *dst,
579                                   struct rb_root *src)
580 {
581         struct file_extent_hole *hole;
582         struct rb_node *node;
583         int ret = 0;
584
585         node = rb_first(src);
586         while (node) {
587                 hole = rb_entry(node, struct file_extent_hole, node);
588                 ret = add_file_extent_hole(dst, hole->start, hole->len);
589                 if (ret)
590                         break;
591                 node = rb_next(node);
592         }
593         return ret;
594 }
595
596 static void free_file_extent_holes(struct rb_root *holes)
597 {
598         struct rb_node *node;
599         struct file_extent_hole *hole;
600
601         node = rb_first(holes);
602         while (node) {
603                 hole = rb_entry(node, struct file_extent_hole, node);
604                 rb_erase(node, holes);
605                 free(hole);
606                 node = rb_first(holes);
607         }
608 }
609
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613                                  struct btrfs_root *root)
614 {
615         if (root->last_trans != trans->transid) {
616                 root->track_dirty = 1;
617                 root->last_trans = trans->transid;
618                 root->commit_root = root->node;
619                 extent_buffer_get(root->node);
620         }
621 }
622
623 static u8 imode_to_type(u32 imode)
624 {
625 #define S_SHIFT 12
626         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
628                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
629                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
630                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
631                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
632                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
633                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
634         };
635
636         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
637 #undef S_SHIFT
638 }
639
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 {
642         struct device_record *rec1;
643         struct device_record *rec2;
644
645         rec1 = rb_entry(node1, struct device_record, node);
646         rec2 = rb_entry(node2, struct device_record, node);
647         if (rec1->devid > rec2->devid)
648                 return -1;
649         else if (rec1->devid < rec2->devid)
650                 return 1;
651         else
652                 return 0;
653 }
654
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 {
657         struct inode_record *rec;
658         struct inode_backref *backref;
659         struct inode_backref *orig;
660         struct inode_backref *tmp;
661         struct orphan_data_extent *src_orphan;
662         struct orphan_data_extent *dst_orphan;
663         struct rb_node *rb;
664         size_t size;
665         int ret;
666
667         rec = malloc(sizeof(*rec));
668         if (!rec)
669                 return ERR_PTR(-ENOMEM);
670         memcpy(rec, orig_rec, sizeof(*rec));
671         rec->refs = 1;
672         INIT_LIST_HEAD(&rec->backrefs);
673         INIT_LIST_HEAD(&rec->orphan_extents);
674         rec->holes = RB_ROOT;
675
676         list_for_each_entry(orig, &orig_rec->backrefs, list) {
677                 size = sizeof(*orig) + orig->namelen + 1;
678                 backref = malloc(size);
679                 if (!backref) {
680                         ret = -ENOMEM;
681                         goto cleanup;
682                 }
683                 memcpy(backref, orig, size);
684                 list_add_tail(&backref->list, &rec->backrefs);
685         }
686         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687                 dst_orphan = malloc(sizeof(*dst_orphan));
688                 if (!dst_orphan) {
689                         ret = -ENOMEM;
690                         goto cleanup;
691                 }
692                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694         }
695         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
696         if (ret < 0)
697                 goto cleanup_rb;
698
699         return rec;
700
701 cleanup_rb:
702         rb = rb_first(&rec->holes);
703         while (rb) {
704                 struct file_extent_hole *hole;
705
706                 hole = rb_entry(rb, struct file_extent_hole, node);
707                 rb = rb_next(rb);
708                 free(hole);
709         }
710
711 cleanup:
712         if (!list_empty(&rec->backrefs))
713                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714                         list_del(&orig->list);
715                         free(orig);
716                 }
717
718         if (!list_empty(&rec->orphan_extents))
719                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720                         list_del(&orig->list);
721                         free(orig);
722                 }
723
724         free(rec);
725
726         return ERR_PTR(ret);
727 }
728
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
730                                       u64 objectid)
731 {
732         struct orphan_data_extent *orphan;
733
734         if (list_empty(orphan_extents))
735                 return;
736         printf("The following data extent is lost in tree %llu:\n",
737                objectid);
738         list_for_each_entry(orphan, orphan_extents, list) {
739                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
741                        orphan->disk_len);
742         }
743 }
744
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 {
747         u64 root_objectid = root->root_key.objectid;
748         int errors = rec->errors;
749
750         if (!errors)
751                 return;
752         /* reloc root errors, we print its corresponding fs root objectid*/
753         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754                 root_objectid = root->root_key.offset;
755                 fprintf(stderr, "reloc");
756         }
757         fprintf(stderr, "root %llu inode %llu errors %x",
758                 (unsigned long long) root_objectid,
759                 (unsigned long long) rec->ino, rec->errors);
760
761         if (errors & I_ERR_NO_INODE_ITEM)
762                 fprintf(stderr, ", no inode item");
763         if (errors & I_ERR_NO_ORPHAN_ITEM)
764                 fprintf(stderr, ", no orphan item");
765         if (errors & I_ERR_DUP_INODE_ITEM)
766                 fprintf(stderr, ", dup inode item");
767         if (errors & I_ERR_DUP_DIR_INDEX)
768                 fprintf(stderr, ", dup dir index");
769         if (errors & I_ERR_ODD_DIR_ITEM)
770                 fprintf(stderr, ", odd dir item");
771         if (errors & I_ERR_ODD_FILE_EXTENT)
772                 fprintf(stderr, ", odd file extent");
773         if (errors & I_ERR_BAD_FILE_EXTENT)
774                 fprintf(stderr, ", bad file extent");
775         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776                 fprintf(stderr, ", file extent overlap");
777         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778                 fprintf(stderr, ", file extent discount");
779         if (errors & I_ERR_DIR_ISIZE_WRONG)
780                 fprintf(stderr, ", dir isize wrong");
781         if (errors & I_ERR_FILE_NBYTES_WRONG)
782                 fprintf(stderr, ", nbytes wrong");
783         if (errors & I_ERR_ODD_CSUM_ITEM)
784                 fprintf(stderr, ", odd csum item");
785         if (errors & I_ERR_SOME_CSUM_MISSING)
786                 fprintf(stderr, ", some csum missing");
787         if (errors & I_ERR_LINK_COUNT_WRONG)
788                 fprintf(stderr, ", link count wrong");
789         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790                 fprintf(stderr, ", orphan file extent");
791         fprintf(stderr, "\n");
792         /* Print the orphan extents if needed */
793         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795
796         /* Print the holes if needed */
797         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798                 struct file_extent_hole *hole;
799                 struct rb_node *node;
800                 int found = 0;
801
802                 node = rb_first(&rec->holes);
803                 fprintf(stderr, "Found file extent holes:\n");
804                 while (node) {
805                         found = 1;
806                         hole = rb_entry(node, struct file_extent_hole, node);
807                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
808                                 hole->start, hole->len);
809                         node = rb_next(node);
810                 }
811                 if (!found)
812                         fprintf(stderr, "\tstart: 0, len: %llu\n",
813                                 round_up(rec->isize, root->sectorsize));
814         }
815 }
816
817 static void print_ref_error(int errors)
818 {
819         if (errors & REF_ERR_NO_DIR_ITEM)
820                 fprintf(stderr, ", no dir item");
821         if (errors & REF_ERR_NO_DIR_INDEX)
822                 fprintf(stderr, ", no dir index");
823         if (errors & REF_ERR_NO_INODE_REF)
824                 fprintf(stderr, ", no inode ref");
825         if (errors & REF_ERR_DUP_DIR_ITEM)
826                 fprintf(stderr, ", dup dir item");
827         if (errors & REF_ERR_DUP_DIR_INDEX)
828                 fprintf(stderr, ", dup dir index");
829         if (errors & REF_ERR_DUP_INODE_REF)
830                 fprintf(stderr, ", dup inode ref");
831         if (errors & REF_ERR_INDEX_UNMATCH)
832                 fprintf(stderr, ", index mismatch");
833         if (errors & REF_ERR_FILETYPE_UNMATCH)
834                 fprintf(stderr, ", filetype mismatch");
835         if (errors & REF_ERR_NAME_TOO_LONG)
836                 fprintf(stderr, ", name too long");
837         if (errors & REF_ERR_NO_ROOT_REF)
838                 fprintf(stderr, ", no root ref");
839         if (errors & REF_ERR_NO_ROOT_BACKREF)
840                 fprintf(stderr, ", no root backref");
841         if (errors & REF_ERR_DUP_ROOT_REF)
842                 fprintf(stderr, ", dup root ref");
843         if (errors & REF_ERR_DUP_ROOT_BACKREF)
844                 fprintf(stderr, ", dup root backref");
845         fprintf(stderr, "\n");
846 }
847
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
849                                           u64 ino, int mod)
850 {
851         struct ptr_node *node;
852         struct cache_extent *cache;
853         struct inode_record *rec = NULL;
854         int ret;
855
856         cache = lookup_cache_extent(inode_cache, ino, 1);
857         if (cache) {
858                 node = container_of(cache, struct ptr_node, cache);
859                 rec = node->data;
860                 if (mod && rec->refs > 1) {
861                         node->data = clone_inode_rec(rec);
862                         if (IS_ERR(node->data))
863                                 return node->data;
864                         rec->refs--;
865                         rec = node->data;
866                 }
867         } else if (mod) {
868                 rec = calloc(1, sizeof(*rec));
869                 if (!rec)
870                         return ERR_PTR(-ENOMEM);
871                 rec->ino = ino;
872                 rec->extent_start = (u64)-1;
873                 rec->refs = 1;
874                 INIT_LIST_HEAD(&rec->backrefs);
875                 INIT_LIST_HEAD(&rec->orphan_extents);
876                 rec->holes = RB_ROOT;
877
878                 node = malloc(sizeof(*node));
879                 if (!node) {
880                         free(rec);
881                         return ERR_PTR(-ENOMEM);
882                 }
883                 node->cache.start = ino;
884                 node->cache.size = 1;
885                 node->data = rec;
886
887                 if (ino == BTRFS_FREE_INO_OBJECTID)
888                         rec->found_link = 1;
889
890                 ret = insert_cache_extent(inode_cache, &node->cache);
891                 if (ret)
892                         return ERR_PTR(-EEXIST);
893         }
894         return rec;
895 }
896
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 {
899         struct orphan_data_extent *orphan;
900
901         while (!list_empty(orphan_extents)) {
902                 orphan = list_entry(orphan_extents->next,
903                                     struct orphan_data_extent, list);
904                 list_del(&orphan->list);
905                 free(orphan);
906         }
907 }
908
909 static void free_inode_rec(struct inode_record *rec)
910 {
911         struct inode_backref *backref;
912
913         if (--rec->refs > 0)
914                 return;
915
916         while (!list_empty(&rec->backrefs)) {
917                 backref = to_inode_backref(rec->backrefs.next);
918                 list_del(&backref->list);
919                 free(backref);
920         }
921         free_orphan_data_extents(&rec->orphan_extents);
922         free_file_extent_holes(&rec->holes);
923         free(rec);
924 }
925
926 static int can_free_inode_rec(struct inode_record *rec)
927 {
928         if (!rec->errors && rec->checked && rec->found_inode_item &&
929             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
930                 return 1;
931         return 0;
932 }
933
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935                                  struct inode_record *rec)
936 {
937         struct cache_extent *cache;
938         struct inode_backref *tmp, *backref;
939         struct ptr_node *node;
940         u8 filetype;
941
942         if (!rec->found_inode_item)
943                 return;
944
945         filetype = imode_to_type(rec->imode);
946         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947                 if (backref->found_dir_item && backref->found_dir_index) {
948                         if (backref->filetype != filetype)
949                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950                         if (!backref->errors && backref->found_inode_ref &&
951                             rec->nlink == rec->found_link) {
952                                 list_del(&backref->list);
953                                 free(backref);
954                         }
955                 }
956         }
957
958         if (!rec->checked || rec->merging)
959                 return;
960
961         if (S_ISDIR(rec->imode)) {
962                 if (rec->found_size != rec->isize)
963                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964                 if (rec->found_file_extent)
965                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
966         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967                 if (rec->found_dir_item)
968                         rec->errors |= I_ERR_ODD_DIR_ITEM;
969                 if (rec->found_size != rec->nbytes)
970                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971                 if (rec->nlink > 0 && !no_holes &&
972                     (rec->extent_end < rec->isize ||
973                      first_extent_gap(&rec->holes) < rec->isize))
974                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
975         }
976
977         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978                 if (rec->found_csum_item && rec->nodatasum)
979                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
980                 if (rec->some_csum_missing && !rec->nodatasum)
981                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
982         }
983
984         BUG_ON(rec->refs != 1);
985         if (can_free_inode_rec(rec)) {
986                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987                 node = container_of(cache, struct ptr_node, cache);
988                 BUG_ON(node->data != rec);
989                 remove_cache_extent(inode_cache, &node->cache);
990                 free(node);
991                 free_inode_rec(rec);
992         }
993 }
994
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 {
997         struct btrfs_path path;
998         struct btrfs_key key;
999         int ret;
1000
1001         key.objectid = BTRFS_ORPHAN_OBJECTID;
1002         key.type = BTRFS_ORPHAN_ITEM_KEY;
1003         key.offset = ino;
1004
1005         btrfs_init_path(&path);
1006         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007         btrfs_release_path(&path);
1008         if (ret > 0)
1009                 ret = -ENOENT;
1010         return ret;
1011 }
1012
1013 static int process_inode_item(struct extent_buffer *eb,
1014                               int slot, struct btrfs_key *key,
1015                               struct shared_node *active_node)
1016 {
1017         struct inode_record *rec;
1018         struct btrfs_inode_item *item;
1019
1020         rec = active_node->current;
1021         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022         if (rec->found_inode_item) {
1023                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1024                 return 1;
1025         }
1026         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027         rec->nlink = btrfs_inode_nlink(eb, item);
1028         rec->isize = btrfs_inode_size(eb, item);
1029         rec->nbytes = btrfs_inode_nbytes(eb, item);
1030         rec->imode = btrfs_inode_mode(eb, item);
1031         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032                 rec->nodatasum = 1;
1033         rec->found_inode_item = 1;
1034         if (rec->nlink == 0)
1035                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036         maybe_free_inode_rec(&active_node->inode_cache, rec);
1037         return 0;
1038 }
1039
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041                                                 const char *name,
1042                                                 int namelen, u64 dir)
1043 {
1044         struct inode_backref *backref;
1045
1046         list_for_each_entry(backref, &rec->backrefs, list) {
1047                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048                         break;
1049                 if (backref->dir != dir || backref->namelen != namelen)
1050                         continue;
1051                 if (memcmp(name, backref->name, namelen))
1052                         continue;
1053                 return backref;
1054         }
1055
1056         backref = malloc(sizeof(*backref) + namelen + 1);
1057         if (!backref)
1058                 return NULL;
1059         memset(backref, 0, sizeof(*backref));
1060         backref->dir = dir;
1061         backref->namelen = namelen;
1062         memcpy(backref->name, name, namelen);
1063         backref->name[namelen] = '\0';
1064         list_add_tail(&backref->list, &rec->backrefs);
1065         return backref;
1066 }
1067
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069                              u64 ino, u64 dir, u64 index,
1070                              const char *name, int namelen,
1071                              u8 filetype, u8 itemtype, int errors)
1072 {
1073         struct inode_record *rec;
1074         struct inode_backref *backref;
1075
1076         rec = get_inode_rec(inode_cache, ino, 1);
1077         BUG_ON(IS_ERR(rec));
1078         backref = get_inode_backref(rec, name, namelen, dir);
1079         BUG_ON(!backref);
1080         if (errors)
1081                 backref->errors |= errors;
1082         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083                 if (backref->found_dir_index)
1084                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085                 if (backref->found_inode_ref && backref->index != index)
1086                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1087                 if (backref->found_dir_item && backref->filetype != filetype)
1088                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089
1090                 backref->index = index;
1091                 backref->filetype = filetype;
1092                 backref->found_dir_index = 1;
1093         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094                 rec->found_link++;
1095                 if (backref->found_dir_item)
1096                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097                 if (backref->found_dir_index && backref->filetype != filetype)
1098                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099
1100                 backref->filetype = filetype;
1101                 backref->found_dir_item = 1;
1102         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104                 if (backref->found_inode_ref)
1105                         backref->errors |= REF_ERR_DUP_INODE_REF;
1106                 if (backref->found_dir_index && backref->index != index)
1107                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1108                 else
1109                         backref->index = index;
1110
1111                 backref->ref_type = itemtype;
1112                 backref->found_inode_ref = 1;
1113         } else {
1114                 BUG_ON(1);
1115         }
1116
1117         maybe_free_inode_rec(inode_cache, rec);
1118         return 0;
1119 }
1120
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122                             struct cache_tree *dst_cache)
1123 {
1124         struct inode_backref *backref;
1125         u32 dir_count = 0;
1126         int ret = 0;
1127
1128         dst->merging = 1;
1129         list_for_each_entry(backref, &src->backrefs, list) {
1130                 if (backref->found_dir_index) {
1131                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1132                                         backref->index, backref->name,
1133                                         backref->namelen, backref->filetype,
1134                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1135                 }
1136                 if (backref->found_dir_item) {
1137                         dir_count++;
1138                         add_inode_backref(dst_cache, dst->ino,
1139                                         backref->dir, 0, backref->name,
1140                                         backref->namelen, backref->filetype,
1141                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1142                 }
1143                 if (backref->found_inode_ref) {
1144                         add_inode_backref(dst_cache, dst->ino,
1145                                         backref->dir, backref->index,
1146                                         backref->name, backref->namelen, 0,
1147                                         backref->ref_type, backref->errors);
1148                 }
1149         }
1150
1151         if (src->found_dir_item)
1152                 dst->found_dir_item = 1;
1153         if (src->found_file_extent)
1154                 dst->found_file_extent = 1;
1155         if (src->found_csum_item)
1156                 dst->found_csum_item = 1;
1157         if (src->some_csum_missing)
1158                 dst->some_csum_missing = 1;
1159         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1161                 if (ret < 0)
1162                         return ret;
1163         }
1164
1165         BUG_ON(src->found_link < dir_count);
1166         dst->found_link += src->found_link - dir_count;
1167         dst->found_size += src->found_size;
1168         if (src->extent_start != (u64)-1) {
1169                 if (dst->extent_start == (u64)-1) {
1170                         dst->extent_start = src->extent_start;
1171                         dst->extent_end = src->extent_end;
1172                 } else {
1173                         if (dst->extent_end > src->extent_start)
1174                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175                         else if (dst->extent_end < src->extent_start) {
1176                                 ret = add_file_extent_hole(&dst->holes,
1177                                         dst->extent_end,
1178                                         src->extent_start - dst->extent_end);
1179                         }
1180                         if (dst->extent_end < src->extent_end)
1181                                 dst->extent_end = src->extent_end;
1182                 }
1183         }
1184
1185         dst->errors |= src->errors;
1186         if (src->found_inode_item) {
1187                 if (!dst->found_inode_item) {
1188                         dst->nlink = src->nlink;
1189                         dst->isize = src->isize;
1190                         dst->nbytes = src->nbytes;
1191                         dst->imode = src->imode;
1192                         dst->nodatasum = src->nodatasum;
1193                         dst->found_inode_item = 1;
1194                 } else {
1195                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1196                 }
1197         }
1198         dst->merging = 0;
1199
1200         return 0;
1201 }
1202
1203 static int splice_shared_node(struct shared_node *src_node,
1204                               struct shared_node *dst_node)
1205 {
1206         struct cache_extent *cache;
1207         struct ptr_node *node, *ins;
1208         struct cache_tree *src, *dst;
1209         struct inode_record *rec, *conflict;
1210         u64 current_ino = 0;
1211         int splice = 0;
1212         int ret;
1213
1214         if (--src_node->refs == 0)
1215                 splice = 1;
1216         if (src_node->current)
1217                 current_ino = src_node->current->ino;
1218
1219         src = &src_node->root_cache;
1220         dst = &dst_node->root_cache;
1221 again:
1222         cache = search_cache_extent(src, 0);
1223         while (cache) {
1224                 node = container_of(cache, struct ptr_node, cache);
1225                 rec = node->data;
1226                 cache = next_cache_extent(cache);
1227
1228                 if (splice) {
1229                         remove_cache_extent(src, &node->cache);
1230                         ins = node;
1231                 } else {
1232                         ins = malloc(sizeof(*ins));
1233                         BUG_ON(!ins);
1234                         ins->cache.start = node->cache.start;
1235                         ins->cache.size = node->cache.size;
1236                         ins->data = rec;
1237                         rec->refs++;
1238                 }
1239                 ret = insert_cache_extent(dst, &ins->cache);
1240                 if (ret == -EEXIST) {
1241                         conflict = get_inode_rec(dst, rec->ino, 1);
1242                         BUG_ON(IS_ERR(conflict));
1243                         merge_inode_recs(rec, conflict, dst);
1244                         if (rec->checked) {
1245                                 conflict->checked = 1;
1246                                 if (dst_node->current == conflict)
1247                                         dst_node->current = NULL;
1248                         }
1249                         maybe_free_inode_rec(dst, conflict);
1250                         free_inode_rec(rec);
1251                         free(ins);
1252                 } else {
1253                         BUG_ON(ret);
1254                 }
1255         }
1256
1257         if (src == &src_node->root_cache) {
1258                 src = &src_node->inode_cache;
1259                 dst = &dst_node->inode_cache;
1260                 goto again;
1261         }
1262
1263         if (current_ino > 0 && (!dst_node->current ||
1264             current_ino > dst_node->current->ino)) {
1265                 if (dst_node->current) {
1266                         dst_node->current->checked = 1;
1267                         maybe_free_inode_rec(dst, dst_node->current);
1268                 }
1269                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270                 BUG_ON(IS_ERR(dst_node->current));
1271         }
1272         return 0;
1273 }
1274
1275 static void free_inode_ptr(struct cache_extent *cache)
1276 {
1277         struct ptr_node *node;
1278         struct inode_record *rec;
1279
1280         node = container_of(cache, struct ptr_node, cache);
1281         rec = node->data;
1282         free_inode_rec(rec);
1283         free(node);
1284 }
1285
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1289                                             u64 bytenr)
1290 {
1291         struct cache_extent *cache;
1292         struct shared_node *node;
1293
1294         cache = lookup_cache_extent(shared, bytenr, 1);
1295         if (cache) {
1296                 node = container_of(cache, struct shared_node, cache);
1297                 return node;
1298         }
1299         return NULL;
1300 }
1301
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1303 {
1304         int ret;
1305         struct shared_node *node;
1306
1307         node = calloc(1, sizeof(*node));
1308         if (!node)
1309                 return -ENOMEM;
1310         node->cache.start = bytenr;
1311         node->cache.size = 1;
1312         cache_tree_init(&node->root_cache);
1313         cache_tree_init(&node->inode_cache);
1314         node->refs = refs;
1315
1316         ret = insert_cache_extent(shared, &node->cache);
1317
1318         return ret;
1319 }
1320
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322                              struct walk_control *wc, int level)
1323 {
1324         struct shared_node *node;
1325         struct shared_node *dest;
1326         int ret;
1327
1328         if (level == wc->active_node)
1329                 return 0;
1330
1331         BUG_ON(wc->active_node <= level);
1332         node = find_shared_node(&wc->shared, bytenr);
1333         if (!node) {
1334                 ret = add_shared_node(&wc->shared, bytenr, refs);
1335                 BUG_ON(ret);
1336                 node = find_shared_node(&wc->shared, bytenr);
1337                 wc->nodes[level] = node;
1338                 wc->active_node = level;
1339                 return 0;
1340         }
1341
1342         if (wc->root_level == wc->active_node &&
1343             btrfs_root_refs(&root->root_item) == 0) {
1344                 if (--node->refs == 0) {
1345                         free_inode_recs_tree(&node->root_cache);
1346                         free_inode_recs_tree(&node->inode_cache);
1347                         remove_cache_extent(&wc->shared, &node->cache);
1348                         free(node);
1349                 }
1350                 return 1;
1351         }
1352
1353         dest = wc->nodes[wc->active_node];
1354         splice_shared_node(node, dest);
1355         if (node->refs == 0) {
1356                 remove_cache_extent(&wc->shared, &node->cache);
1357                 free(node);
1358         }
1359         return 1;
1360 }
1361
1362 static int leave_shared_node(struct btrfs_root *root,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int i;
1368
1369         if (level == wc->root_level)
1370                 return 0;
1371
1372         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1373                 if (wc->nodes[i])
1374                         break;
1375         }
1376         BUG_ON(i >= BTRFS_MAX_LEVEL);
1377
1378         node = wc->nodes[wc->active_node];
1379         wc->nodes[wc->active_node] = NULL;
1380         wc->active_node = i;
1381
1382         dest = wc->nodes[wc->active_node];
1383         if (wc->active_node < wc->root_level ||
1384             btrfs_root_refs(&root->root_item) > 0) {
1385                 BUG_ON(node->refs <= 1);
1386                 splice_shared_node(node, dest);
1387         } else {
1388                 BUG_ON(node->refs < 2);
1389                 node->refs--;
1390         }
1391         return 0;
1392 }
1393
1394 /*
1395  * Returns:
1396  * < 0 - on error
1397  * 1   - if the root with id child_root_id is a child of root parent_root_id
1398  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1399  *       has other root(s) as parent(s)
1400  * 2   - if the root child_root_id doesn't have any parent roots
1401  */
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1403                          u64 child_root_id)
1404 {
1405         struct btrfs_path path;
1406         struct btrfs_key key;
1407         struct extent_buffer *leaf;
1408         int has_parent = 0;
1409         int ret;
1410
1411         btrfs_init_path(&path);
1412
1413         key.objectid = parent_root_id;
1414         key.type = BTRFS_ROOT_REF_KEY;
1415         key.offset = child_root_id;
1416         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1417                                 0, 0);
1418         if (ret < 0)
1419                 return ret;
1420         btrfs_release_path(&path);
1421         if (!ret)
1422                 return 1;
1423
1424         key.objectid = child_root_id;
1425         key.type = BTRFS_ROOT_BACKREF_KEY;
1426         key.offset = 0;
1427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1428                                 0, 0);
1429         if (ret < 0)
1430                 goto out;
1431
1432         while (1) {
1433                 leaf = path.nodes[0];
1434                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1436                         if (ret)
1437                                 break;
1438                         leaf = path.nodes[0];
1439                 }
1440
1441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442                 if (key.objectid != child_root_id ||
1443                     key.type != BTRFS_ROOT_BACKREF_KEY)
1444                         break;
1445
1446                 has_parent = 1;
1447
1448                 if (key.offset == parent_root_id) {
1449                         btrfs_release_path(&path);
1450                         return 1;
1451                 }
1452
1453                 path.slots[0]++;
1454         }
1455 out:
1456         btrfs_release_path(&path);
1457         if (ret < 0)
1458                 return ret;
1459         return has_parent ? 0 : 2;
1460 }
1461
1462 static int process_dir_item(struct btrfs_root *root,
1463                             struct extent_buffer *eb,
1464                             int slot, struct btrfs_key *key,
1465                             struct shared_node *active_node)
1466 {
1467         u32 total;
1468         u32 cur = 0;
1469         u32 len;
1470         u32 name_len;
1471         u32 data_len;
1472         int error;
1473         int nritems = 0;
1474         u8 filetype;
1475         struct btrfs_dir_item *di;
1476         struct inode_record *rec;
1477         struct cache_tree *root_cache;
1478         struct cache_tree *inode_cache;
1479         struct btrfs_key location;
1480         char namebuf[BTRFS_NAME_LEN];
1481
1482         root_cache = &active_node->root_cache;
1483         inode_cache = &active_node->inode_cache;
1484         rec = active_node->current;
1485         rec->found_dir_item = 1;
1486
1487         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 nritems++;
1491                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492                 name_len = btrfs_dir_name_len(eb, di);
1493                 data_len = btrfs_dir_data_len(eb, di);
1494                 filetype = btrfs_dir_type(eb, di);
1495
1496                 rec->found_size += name_len;
1497                 if (name_len <= BTRFS_NAME_LEN) {
1498                         len = name_len;
1499                         error = 0;
1500                 } else {
1501                         len = BTRFS_NAME_LEN;
1502                         error = REF_ERR_NAME_TOO_LONG;
1503                 }
1504                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505
1506                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507                         add_inode_backref(inode_cache, location.objectid,
1508                                           key->objectid, key->offset, namebuf,
1509                                           len, filetype, key->type, error);
1510                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511                         add_inode_backref(root_cache, location.objectid,
1512                                           key->objectid, key->offset,
1513                                           namebuf, len, filetype,
1514                                           key->type, error);
1515                 } else {
1516                         fprintf(stderr, "invalid location in dir item %u\n",
1517                                 location.type);
1518                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519                                           key->objectid, key->offset, namebuf,
1520                                           len, filetype, key->type, error);
1521                 }
1522
1523                 len = sizeof(*di) + name_len + data_len;
1524                 di = (struct btrfs_dir_item *)((char *)di + len);
1525                 cur += len;
1526         }
1527         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1529
1530         return 0;
1531 }
1532
1533 static int process_inode_ref(struct extent_buffer *eb,
1534                              int slot, struct btrfs_key *key,
1535                              struct shared_node *active_node)
1536 {
1537         u32 total;
1538         u32 cur = 0;
1539         u32 len;
1540         u32 name_len;
1541         u64 index;
1542         int error;
1543         struct cache_tree *inode_cache;
1544         struct btrfs_inode_ref *ref;
1545         char namebuf[BTRFS_NAME_LEN];
1546
1547         inode_cache = &active_node->inode_cache;
1548
1549         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550         total = btrfs_item_size_nr(eb, slot);
1551         while (cur < total) {
1552                 name_len = btrfs_inode_ref_name_len(eb, ref);
1553                 index = btrfs_inode_ref_index(eb, ref);
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562                 add_inode_backref(inode_cache, key->objectid, key->offset,
1563                                   index, namebuf, len, 0, key->type, error);
1564
1565                 len = sizeof(*ref) + name_len;
1566                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1567                 cur += len;
1568         }
1569         return 0;
1570 }
1571
1572 static int process_inode_extref(struct extent_buffer *eb,
1573                                 int slot, struct btrfs_key *key,
1574                                 struct shared_node *active_node)
1575 {
1576         u32 total;
1577         u32 cur = 0;
1578         u32 len;
1579         u32 name_len;
1580         u64 index;
1581         u64 parent;
1582         int error;
1583         struct cache_tree *inode_cache;
1584         struct btrfs_inode_extref *extref;
1585         char namebuf[BTRFS_NAME_LEN];
1586
1587         inode_cache = &active_node->inode_cache;
1588
1589         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590         total = btrfs_item_size_nr(eb, slot);
1591         while (cur < total) {
1592                 name_len = btrfs_inode_extref_name_len(eb, extref);
1593                 index = btrfs_inode_extref_index(eb, extref);
1594                 parent = btrfs_inode_extref_parent(eb, extref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf,
1603                                    (unsigned long)(extref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, parent,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*extref) + name_len;
1608                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612
1613 }
1614
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616                             u64 len, u64 *found)
1617 {
1618         struct btrfs_key key;
1619         struct btrfs_path path;
1620         struct extent_buffer *leaf;
1621         int ret;
1622         size_t size;
1623         *found = 0;
1624         u64 csum_end;
1625         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626
1627         btrfs_init_path(&path);
1628
1629         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630         key.offset = start;
1631         key.type = BTRFS_EXTENT_CSUM_KEY;
1632
1633         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1634                                 &key, &path, 0, 0);
1635         if (ret < 0)
1636                 goto out;
1637         if (ret > 0 && path.slots[0] > 0) {
1638                 leaf = path.nodes[0];
1639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641                     key.type == BTRFS_EXTENT_CSUM_KEY)
1642                         path.slots[0]--;
1643         }
1644
1645         while (len > 0) {
1646                 leaf = path.nodes[0];
1647                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1649                         if (ret > 0)
1650                                 break;
1651                         else if (ret < 0)
1652                                 goto out;
1653                         leaf = path.nodes[0];
1654                 }
1655
1656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658                     key.type != BTRFS_EXTENT_CSUM_KEY)
1659                         break;
1660
1661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662                 if (key.offset >= start + len)
1663                         break;
1664
1665                 if (key.offset > start)
1666                         start = key.offset;
1667
1668                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670                 if (csum_end > start) {
1671                         size = min(csum_end - start, len);
1672                         len -= size;
1673                         start += size;
1674                         *found += size;
1675                 }
1676
1677                 path.slots[0]++;
1678         }
1679 out:
1680         btrfs_release_path(&path);
1681         if (ret < 0)
1682                 return ret;
1683         return 0;
1684 }
1685
1686 static int process_file_extent(struct btrfs_root *root,
1687                                 struct extent_buffer *eb,
1688                                 int slot, struct btrfs_key *key,
1689                                 struct shared_node *active_node)
1690 {
1691         struct inode_record *rec;
1692         struct btrfs_file_extent_item *fi;
1693         u64 num_bytes = 0;
1694         u64 disk_bytenr = 0;
1695         u64 extent_offset = 0;
1696         u64 mask = root->sectorsize - 1;
1697         int extent_type;
1698         int ret;
1699
1700         rec = active_node->current;
1701         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702         rec->found_file_extent = 1;
1703
1704         if (rec->extent_start == (u64)-1) {
1705                 rec->extent_start = key->offset;
1706                 rec->extent_end = key->offset;
1707         }
1708
1709         if (rec->extent_end > key->offset)
1710                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711         else if (rec->extent_end < key->offset) {
1712                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713                                            key->offset - rec->extent_end);
1714                 if (ret < 0)
1715                         return ret;
1716         }
1717
1718         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719         extent_type = btrfs_file_extent_type(eb, fi);
1720
1721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723                 if (num_bytes == 0)
1724                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725                 rec->found_size += num_bytes;
1726                 num_bytes = (num_bytes + mask) & ~mask;
1727         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731                 extent_offset = btrfs_file_extent_offset(eb, fi);
1732                 if (num_bytes == 0 || (num_bytes & mask))
1733                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734                 if (num_bytes + extent_offset >
1735                     btrfs_file_extent_ram_bytes(eb, fi))
1736                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738                     (btrfs_file_extent_compression(eb, fi) ||
1739                      btrfs_file_extent_encryption(eb, fi) ||
1740                      btrfs_file_extent_other_encoding(eb, fi)))
1741                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742                 if (disk_bytenr > 0)
1743                         rec->found_size += num_bytes;
1744         } else {
1745                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746         }
1747         rec->extent_end = key->offset + num_bytes;
1748
1749         /*
1750          * The data reloc tree will copy full extents into its inode and then
1751          * copy the corresponding csums.  Because the extent it copied could be
1752          * a preallocated extent that hasn't been written to yet there may be no
1753          * csums to copy, ergo we won't have csums for our file extent.  This is
1754          * ok so just don't bother checking csums if the inode belongs to the
1755          * data reloc tree.
1756          */
1757         if (disk_bytenr > 0 &&
1758             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759                 u64 found;
1760                 if (btrfs_file_extent_compression(eb, fi))
1761                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762                 else
1763                         disk_bytenr += extent_offset;
1764
1765                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1766                 if (ret < 0)
1767                         return ret;
1768                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769                         if (found > 0)
1770                                 rec->found_csum_item = 1;
1771                         if (found < num_bytes)
1772                                 rec->some_csum_missing = 1;
1773                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774                         if (found > 0)
1775                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782                             struct walk_control *wc)
1783 {
1784         struct btrfs_key key;
1785         u32 nritems;
1786         int i;
1787         int ret = 0;
1788         struct cache_tree *inode_cache;
1789         struct shared_node *active_node;
1790
1791         if (wc->root_level == wc->active_node &&
1792             btrfs_root_refs(&root->root_item) == 0)
1793                 return 0;
1794
1795         active_node = wc->nodes[wc->active_node];
1796         inode_cache = &active_node->inode_cache;
1797         nritems = btrfs_header_nritems(eb);
1798         for (i = 0; i < nritems; i++) {
1799                 btrfs_item_key_to_cpu(eb, &key, i);
1800
1801                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802                         continue;
1803                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1804                         continue;
1805
1806                 if (active_node->current == NULL ||
1807                     active_node->current->ino < key.objectid) {
1808                         if (active_node->current) {
1809                                 active_node->current->checked = 1;
1810                                 maybe_free_inode_rec(inode_cache,
1811                                                      active_node->current);
1812                         }
1813                         active_node->current = get_inode_rec(inode_cache,
1814                                                              key.objectid, 1);
1815                         BUG_ON(IS_ERR(active_node->current));
1816                 }
1817                 switch (key.type) {
1818                 case BTRFS_DIR_ITEM_KEY:
1819                 case BTRFS_DIR_INDEX_KEY:
1820                         ret = process_dir_item(root, eb, i, &key, active_node);
1821                         break;
1822                 case BTRFS_INODE_REF_KEY:
1823                         ret = process_inode_ref(eb, i, &key, active_node);
1824                         break;
1825                 case BTRFS_INODE_EXTREF_KEY:
1826                         ret = process_inode_extref(eb, i, &key, active_node);
1827                         break;
1828                 case BTRFS_INODE_ITEM_KEY:
1829                         ret = process_inode_item(eb, i, &key, active_node);
1830                         break;
1831                 case BTRFS_EXTENT_DATA_KEY:
1832                         ret = process_file_extent(root, eb, i, &key,
1833                                                   active_node);
1834                         break;
1835                 default:
1836                         break;
1837                 };
1838         }
1839         return ret;
1840 }
1841
1842 static void reada_walk_down(struct btrfs_root *root,
1843                             struct extent_buffer *node, int slot)
1844 {
1845         u64 bytenr;
1846         u64 ptr_gen;
1847         u32 nritems;
1848         u32 blocksize;
1849         int i;
1850         int level;
1851
1852         level = btrfs_header_level(node);
1853         if (level != 1)
1854                 return;
1855
1856         nritems = btrfs_header_nritems(node);
1857         blocksize = root->nodesize;
1858         for (i = slot; i < nritems; i++) {
1859                 bytenr = btrfs_node_blockptr(node, i);
1860                 ptr_gen = btrfs_node_ptr_generation(node, i);
1861                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1862         }
1863 }
1864
1865 /*
1866  * Check the child node/leaf by the following condition:
1867  * 1. the first item key of the node/leaf should be the same with the one
1868  *    in parent.
1869  * 2. block in parent node should match the child node/leaf.
1870  * 3. generation of parent node and child's header should be consistent.
1871  *
1872  * Or the child node/leaf pointed by the key in parent is not valid.
1873  *
1874  * We hope to check leaf owner too, but since subvol may share leaves,
1875  * which makes leaf owner check not so strong, key check should be
1876  * sufficient enough for that case.
1877  */
1878 static int check_child_node(struct btrfs_root *root,
1879                             struct extent_buffer *parent, int slot,
1880                             struct extent_buffer *child)
1881 {
1882         struct btrfs_key parent_key;
1883         struct btrfs_key child_key;
1884         int ret = 0;
1885
1886         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887         if (btrfs_header_level(child) == 0)
1888                 btrfs_item_key_to_cpu(child, &child_key, 0);
1889         else
1890                 btrfs_node_key_to_cpu(child, &child_key, 0);
1891
1892         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1893                 ret = -EINVAL;
1894                 fprintf(stderr,
1895                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896                         parent_key.objectid, parent_key.type, parent_key.offset,
1897                         child_key.objectid, child_key.type, child_key.offset);
1898         }
1899         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900                 ret = -EINVAL;
1901                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902                         btrfs_node_blockptr(parent, slot),
1903                         btrfs_header_bytenr(child));
1904         }
1905         if (btrfs_node_ptr_generation(parent, slot) !=
1906             btrfs_header_generation(child)) {
1907                 ret = -EINVAL;
1908                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909                         btrfs_header_generation(child),
1910                         btrfs_node_ptr_generation(parent, slot));
1911         }
1912         return ret;
1913 }
1914
1915 struct node_refs {
1916         u64 bytenr[BTRFS_MAX_LEVEL];
1917         u64 refs[BTRFS_MAX_LEVEL];
1918 };
1919
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                           struct walk_control *wc, int *level,
1922                           struct node_refs *nrefs)
1923 {
1924         enum btrfs_tree_block_status status;
1925         u64 bytenr;
1926         u64 ptr_gen;
1927         struct extent_buffer *next;
1928         struct extent_buffer *cur;
1929         u32 blocksize;
1930         int ret, err = 0;
1931         u64 refs;
1932
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937                 refs = nrefs->refs[*level];
1938                 ret = 0;
1939         } else {
1940                 ret = btrfs_lookup_extent_info(NULL, root,
1941                                        path->nodes[*level]->start,
1942                                        *level, 1, &refs, NULL);
1943                 if (ret < 0) {
1944                         err = ret;
1945                         goto out;
1946                 }
1947                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948                 nrefs->refs[*level] = refs;
1949         }
1950
1951         if (refs > 1) {
1952                 ret = enter_shared_node(root, path->nodes[*level]->start,
1953                                         refs, wc, *level);
1954                 if (ret > 0) {
1955                         err = ret;
1956                         goto out;
1957                 }
1958         }
1959
1960         while (*level >= 0) {
1961                 WARN_ON(*level < 0);
1962                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963                 cur = path->nodes[*level];
1964
1965                 if (btrfs_header_level(cur) != *level)
1966                         WARN_ON(1);
1967
1968                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1969                         break;
1970                 if (*level == 0) {
1971                         ret = process_one_leaf(root, cur, wc);
1972                         if (ret < 0)
1973                                 err = ret;
1974                         break;
1975                 }
1976                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978                 blocksize = root->nodesize;
1979
1980                 if (bytenr == nrefs->bytenr[*level - 1]) {
1981                         refs = nrefs->refs[*level - 1];
1982                 } else {
1983                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984                                         *level - 1, 1, &refs, NULL);
1985                         if (ret < 0) {
1986                                 refs = 0;
1987                         } else {
1988                                 nrefs->bytenr[*level - 1] = bytenr;
1989                                 nrefs->refs[*level - 1] = refs;
1990                         }
1991                 }
1992
1993                 if (refs > 1) {
1994                         ret = enter_shared_node(root, bytenr, refs,
1995                                                 wc, *level - 1);
1996                         if (ret > 0) {
1997                                 path->slots[*level]++;
1998                                 continue;
1999                         }
2000                 }
2001
2002                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004                         free_extent_buffer(next);
2005                         reada_walk_down(root, cur, path->slots[*level]);
2006                         next = read_tree_block(root, bytenr, blocksize,
2007                                                ptr_gen);
2008                         if (!extent_buffer_uptodate(next)) {
2009                                 struct btrfs_key node_key;
2010
2011                                 btrfs_node_key_to_cpu(path->nodes[*level],
2012                                                       &node_key,
2013                                                       path->slots[*level]);
2014                                 btrfs_add_corrupt_extent_record(root->fs_info,
2015                                                 &node_key,
2016                                                 path->nodes[*level]->start,
2017                                                 root->nodesize, *level);
2018                                 err = -EIO;
2019                                 goto out;
2020                         }
2021                 }
2022
2023                 ret = check_child_node(root, cur, path->slots[*level], next);
2024                 if (ret) {
2025                         err = ret;
2026                         goto out;
2027                 }
2028
2029                 if (btrfs_is_leaf(next))
2030                         status = btrfs_check_leaf(root, NULL, next);
2031                 else
2032                         status = btrfs_check_node(root, NULL, next);
2033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034                         free_extent_buffer(next);
2035                         err = -EIO;
2036                         goto out;
2037                 }
2038
2039                 *level = *level - 1;
2040                 free_extent_buffer(path->nodes[*level]);
2041                 path->nodes[*level] = next;
2042                 path->slots[*level] = 0;
2043         }
2044 out:
2045         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2046         return err;
2047 }
2048
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050                         struct walk_control *wc, int *level)
2051 {
2052         int i;
2053         struct extent_buffer *leaf;
2054
2055         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056                 leaf = path->nodes[i];
2057                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2058                         path->slots[i]++;
2059                         *level = i;
2060                         return 0;
2061                 } else {
2062                         free_extent_buffer(path->nodes[*level]);
2063                         path->nodes[*level] = NULL;
2064                         BUG_ON(*level > wc->active_node);
2065                         if (*level == wc->active_node)
2066                                 leave_shared_node(root, wc, *level);
2067                         *level = i + 1;
2068                 }
2069         }
2070         return 1;
2071 }
2072
2073 static int check_root_dir(struct inode_record *rec)
2074 {
2075         struct inode_backref *backref;
2076         int ret = -1;
2077
2078         if (!rec->found_inode_item || rec->errors)
2079                 goto out;
2080         if (rec->nlink != 1 || rec->found_link != 0)
2081                 goto out;
2082         if (list_empty(&rec->backrefs))
2083                 goto out;
2084         backref = to_inode_backref(rec->backrefs.next);
2085         if (!backref->found_inode_ref)
2086                 goto out;
2087         if (backref->index != 0 || backref->namelen != 2 ||
2088             memcmp(backref->name, "..", 2))
2089                 goto out;
2090         if (backref->found_dir_index || backref->found_dir_item)
2091                 goto out;
2092         ret = 0;
2093 out:
2094         return ret;
2095 }
2096
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098                               struct btrfs_root *root, struct btrfs_path *path,
2099                               struct inode_record *rec)
2100 {
2101         struct btrfs_inode_item *ei;
2102         struct btrfs_key key;
2103         int ret;
2104
2105         key.objectid = rec->ino;
2106         key.type = BTRFS_INODE_ITEM_KEY;
2107         key.offset = (u64)-1;
2108
2109         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2110         if (ret < 0)
2111                 goto out;
2112         if (ret) {
2113                 if (!path->slots[0]) {
2114                         ret = -ENOENT;
2115                         goto out;
2116                 }
2117                 path->slots[0]--;
2118                 ret = 0;
2119         }
2120         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121         if (key.objectid != rec->ino) {
2122                 ret = -ENOENT;
2123                 goto out;
2124         }
2125
2126         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127                             struct btrfs_inode_item);
2128         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129         btrfs_mark_buffer_dirty(path->nodes[0]);
2130         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132                root->root_key.objectid);
2133 out:
2134         btrfs_release_path(path);
2135         return ret;
2136 }
2137
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139                                     struct btrfs_root *root,
2140                                     struct btrfs_path *path,
2141                                     struct inode_record *rec)
2142 {
2143         int ret;
2144
2145         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146         btrfs_release_path(path);
2147         if (!ret)
2148                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2149         return ret;
2150 }
2151
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153                                struct btrfs_root *root,
2154                                struct btrfs_path *path,
2155                                struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret = 0;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = 0;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret) {
2167                 if (ret > 0)
2168                         ret = -ENOENT;
2169                 goto out;
2170         }
2171
2172         /* Since ret == 0, no need to check anything */
2173         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174                             struct btrfs_inode_item);
2175         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176         btrfs_mark_buffer_dirty(path->nodes[0]);
2177         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178         printf("reset nbytes for ino %llu root %llu\n",
2179                rec->ino, root->root_key.objectid);
2180 out:
2181         btrfs_release_path(path);
2182         return ret;
2183 }
2184
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186                                  struct cache_tree *inode_cache,
2187                                  struct inode_record *rec,
2188                                  struct inode_backref *backref)
2189 {
2190         struct btrfs_path path;
2191         struct btrfs_trans_handle *trans;
2192         struct btrfs_dir_item *dir_item;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_disk_key disk_key;
2196         struct inode_record *dir_rec;
2197         unsigned long name_ptr;
2198         u32 data_size = sizeof(*dir_item) + backref->namelen;
2199         int ret;
2200
2201         trans = btrfs_start_transaction(root, 1);
2202         if (IS_ERR(trans))
2203                 return PTR_ERR(trans);
2204
2205         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206                 (unsigned long long)rec->ino);
2207
2208         btrfs_init_path(&path);
2209         key.objectid = backref->dir;
2210         key.type = BTRFS_DIR_INDEX_KEY;
2211         key.offset = backref->index;
2212         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2213         BUG_ON(ret);
2214
2215         leaf = path.nodes[0];
2216         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217
2218         disk_key.objectid = cpu_to_le64(rec->ino);
2219         disk_key.type = BTRFS_INODE_ITEM_KEY;
2220         disk_key.offset = 0;
2221
2222         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224         btrfs_set_dir_data_len(leaf, dir_item, 0);
2225         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226         name_ptr = (unsigned long)(dir_item + 1);
2227         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228         btrfs_mark_buffer_dirty(leaf);
2229         btrfs_release_path(&path);
2230         btrfs_commit_transaction(trans, root);
2231
2232         backref->found_dir_index = 1;
2233         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234         BUG_ON(IS_ERR(dir_rec));
2235         if (!dir_rec)
2236                 return 0;
2237         dir_rec->found_size += backref->namelen;
2238         if (dir_rec->found_size == dir_rec->isize &&
2239             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241         if (dir_rec->found_size != dir_rec->isize)
2242                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2243
2244         return 0;
2245 }
2246
2247 static int delete_dir_index(struct btrfs_root *root,
2248                             struct cache_tree *inode_cache,
2249                             struct inode_record *rec,
2250                             struct inode_backref *backref)
2251 {
2252         struct btrfs_trans_handle *trans;
2253         struct btrfs_dir_item *di;
2254         struct btrfs_path path;
2255         int ret = 0;
2256
2257         trans = btrfs_start_transaction(root, 1);
2258         if (IS_ERR(trans))
2259                 return PTR_ERR(trans);
2260
2261         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262                 (unsigned long long)backref->dir,
2263                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264                 (unsigned long long)root->objectid);
2265
2266         btrfs_init_path(&path);
2267         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268                                     backref->name, backref->namelen,
2269                                     backref->index, -1);
2270         if (IS_ERR(di)) {
2271                 ret = PTR_ERR(di);
2272                 btrfs_release_path(&path);
2273                 btrfs_commit_transaction(trans, root);
2274                 if (ret == -ENOENT)
2275                         return 0;
2276                 return ret;
2277         }
2278
2279         if (!di)
2280                 ret = btrfs_del_item(trans, root, &path);
2281         else
2282                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283         BUG_ON(ret);
2284         btrfs_release_path(&path);
2285         btrfs_commit_transaction(trans, root);
2286         return ret;
2287 }
2288
2289 static int create_inode_item(struct btrfs_root *root,
2290                              struct inode_record *rec,
2291                              struct inode_backref *backref, int root_dir)
2292 {
2293         struct btrfs_trans_handle *trans;
2294         struct btrfs_inode_item inode_item;
2295         time_t now = time(NULL);
2296         int ret;
2297
2298         trans = btrfs_start_transaction(root, 1);
2299         if (IS_ERR(trans)) {
2300                 ret = PTR_ERR(trans);
2301                 return ret;
2302         }
2303
2304         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305                 "be incomplete, please check permissions and content after "
2306                 "the fsck completes.\n", (unsigned long long)root->objectid,
2307                 (unsigned long long)rec->ino);
2308
2309         memset(&inode_item, 0, sizeof(inode_item));
2310         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311         if (root_dir)
2312                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313         else
2314                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316         if (rec->found_dir_item) {
2317                 if (rec->found_file_extent)
2318                         fprintf(stderr, "root %llu inode %llu has both a dir "
2319                                 "item and extents, unsure if it is a dir or a "
2320                                 "regular file so setting it as a directory\n",
2321                                 (unsigned long long)root->objectid,
2322                                 (unsigned long long)rec->ino);
2323                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325         } else if (!rec->found_dir_item) {
2326                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328         }
2329         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337
2338         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339         BUG_ON(ret);
2340         btrfs_commit_transaction(trans, root);
2341         return 0;
2342 }
2343
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345                                  struct inode_record *rec,
2346                                  struct cache_tree *inode_cache,
2347                                  int delete)
2348 {
2349         struct inode_backref *tmp, *backref;
2350         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2351         int ret = 0;
2352         int repaired = 0;
2353
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 if (!delete && rec->ino == root_dirid) {
2356                         if (!rec->found_inode_item) {
2357                                 ret = create_inode_item(root, rec, backref, 1);
2358                                 if (ret)
2359                                         break;
2360                                 repaired++;
2361                         }
2362                 }
2363
2364                 /* Index 0 for root dir's are special, don't mess with it */
2365                 if (rec->ino == root_dirid && backref->index == 0)
2366                         continue;
2367
2368                 if (delete &&
2369                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2370                      (backref->found_dir_index && backref->found_inode_ref &&
2371                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372                         ret = delete_dir_index(root, inode_cache, rec, backref);
2373                         if (ret)
2374                                 break;
2375                         repaired++;
2376                         list_del(&backref->list);
2377                         free(backref);
2378                 }
2379
2380                 if (!delete && !backref->found_dir_index &&
2381                     backref->found_dir_item && backref->found_inode_ref) {
2382                         ret = add_missing_dir_index(root, inode_cache, rec,
2383                                                     backref);
2384                         if (ret)
2385                                 break;
2386                         repaired++;
2387                         if (backref->found_dir_item &&
2388                             backref->found_dir_index &&
2389                             backref->found_dir_index) {
2390                                 if (!backref->errors &&
2391                                     backref->found_inode_ref) {
2392                                         list_del(&backref->list);
2393                                         free(backref);
2394                                 }
2395                         }
2396                 }
2397
2398                 if (!delete && (!backref->found_dir_index &&
2399                                 !backref->found_dir_item &&
2400                                 backref->found_inode_ref)) {
2401                         struct btrfs_trans_handle *trans;
2402                         struct btrfs_key location;
2403
2404                         ret = check_dir_conflict(root, backref->name,
2405                                                  backref->namelen,
2406                                                  backref->dir,
2407                                                  backref->index);
2408                         if (ret) {
2409                                 /*
2410                                  * let nlink fixing routine to handle it,
2411                                  * which can do it better.
2412                                  */
2413                                 ret = 0;
2414                                 break;
2415                         }
2416                         location.objectid = rec->ino;
2417                         location.type = BTRFS_INODE_ITEM_KEY;
2418                         location.offset = 0;
2419
2420                         trans = btrfs_start_transaction(root, 1);
2421                         if (IS_ERR(trans)) {
2422                                 ret = PTR_ERR(trans);
2423                                 break;
2424                         }
2425                         fprintf(stderr, "adding missing dir index/item pair "
2426                                 "for inode %llu\n",
2427                                 (unsigned long long)rec->ino);
2428                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2429                                                     backref->namelen,
2430                                                     backref->dir, &location,
2431                                                     imode_to_type(rec->imode),
2432                                                     backref->index);
2433                         BUG_ON(ret);
2434                         btrfs_commit_transaction(trans, root);
2435                         repaired++;
2436                 }
2437
2438                 if (!delete && (backref->found_inode_ref &&
2439                                 backref->found_dir_index &&
2440                                 backref->found_dir_item &&
2441                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442                                 !rec->found_inode_item)) {
2443                         ret = create_inode_item(root, rec, backref, 0);
2444                         if (ret)
2445                                 break;
2446                         repaired++;
2447                 }
2448
2449         }
2450         return ret ? ret : repaired;
2451 }
2452
2453 /*
2454  * To determine the file type for nlink/inode_item repair
2455  *
2456  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457  * Return -ENOENT if file type is not found.
2458  */
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2460 {
2461         struct inode_backref *backref;
2462
2463         /* For inode item recovered case */
2464         if (rec->found_inode_item) {
2465                 *type = imode_to_type(rec->imode);
2466                 return 0;
2467         }
2468
2469         list_for_each_entry(backref, &rec->backrefs, list) {
2470                 if (backref->found_dir_index || backref->found_dir_item) {
2471                         *type = backref->filetype;
2472                         return 0;
2473                 }
2474         }
2475         return -ENOENT;
2476 }
2477
2478 /*
2479  * To determine the file name for nlink repair
2480  *
2481  * Return 0 if file name is found, set name and namelen.
2482  * Return -ENOENT if file name is not found.
2483  */
2484 static int find_file_name(struct inode_record *rec,
2485                           char *name, int *namelen)
2486 {
2487         struct inode_backref *backref;
2488
2489         list_for_each_entry(backref, &rec->backrefs, list) {
2490                 if (backref->found_dir_index || backref->found_dir_item ||
2491                     backref->found_inode_ref) {
2492                         memcpy(name, backref->name, backref->namelen);
2493                         *namelen = backref->namelen;
2494                         return 0;
2495                 }
2496         }
2497         return -ENOENT;
2498 }
2499
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502                        struct btrfs_root *root,
2503                        struct btrfs_path *path,
2504                        struct inode_record *rec)
2505 {
2506         struct inode_backref *backref;
2507         struct inode_backref *tmp;
2508         struct btrfs_key key;
2509         struct btrfs_inode_item *inode_item;
2510         int ret = 0;
2511
2512         /* We don't believe this either, reset it and iterate backref */
2513         rec->found_link = 0;
2514
2515         /* Remove all backref including the valid ones */
2516         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518                                    backref->index, backref->name,
2519                                    backref->namelen, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 /* remove invalid backref, so it won't be added back */
2524                 if (!(backref->found_dir_index &&
2525                       backref->found_dir_item &&
2526                       backref->found_inode_ref)) {
2527                         list_del(&backref->list);
2528                         free(backref);
2529                 } else {
2530                         rec->found_link++;
2531                 }
2532         }
2533
2534         /* Set nlink to 0 */
2535         key.objectid = rec->ino;
2536         key.type = BTRFS_INODE_ITEM_KEY;
2537         key.offset = 0;
2538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2539         if (ret < 0)
2540                 goto out;
2541         if (ret > 0) {
2542                 ret = -ENOENT;
2543                 goto out;
2544         }
2545         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546                                     struct btrfs_inode_item);
2547         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_release_path(path);
2550
2551         /*
2552          * Add back valid inode_ref/dir_item/dir_index,
2553          * add_link() will handle the nlink inc, so new nlink must be correct
2554          */
2555         list_for_each_entry(backref, &rec->backrefs, list) {
2556                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557                                      backref->name, backref->namelen,
2558                                      backref->filetype, &backref->index, 1);
2559                 if (ret < 0)
2560                         goto out;
2561         }
2562 out:
2563         btrfs_release_path(path);
2564         return ret;
2565 }
2566
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568                                struct btrfs_root *root,
2569                                struct btrfs_path *path,
2570                                struct inode_record *rec)
2571 {
2572         char *dir_name = "lost+found";
2573         char namebuf[BTRFS_NAME_LEN] = {0};
2574         u64 lost_found_ino;
2575         u32 mode = 0700;
2576         u8 type = 0;
2577         int namelen = 0;
2578         int name_recovered = 0;
2579         int type_recovered = 0;
2580         int ret = 0;
2581
2582         /*
2583          * Get file name and type first before these invalid inode ref
2584          * are deleted by remove_all_invalid_backref()
2585          */
2586         name_recovered = !find_file_name(rec, namebuf, &namelen);
2587         type_recovered = !find_file_type(rec, &type);
2588
2589         if (!name_recovered) {
2590                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591                        rec->ino, rec->ino);
2592                 namelen = count_digits(rec->ino);
2593                 sprintf(namebuf, "%llu", rec->ino);
2594                 name_recovered = 1;
2595         }
2596         if (!type_recovered) {
2597                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598                        rec->ino);
2599                 type = BTRFS_FT_REG_FILE;
2600                 type_recovered = 1;
2601         }
2602
2603         ret = reset_nlink(trans, root, path, rec);
2604         if (ret < 0) {
2605                 fprintf(stderr,
2606                         "Failed to reset nlink for inode %llu: %s\n",
2607                         rec->ino, strerror(-ret));
2608                 goto out;
2609         }
2610
2611         if (rec->found_link == 0) {
2612                 lost_found_ino = root->highest_inode;
2613                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2614                         ret = -EOVERFLOW;
2615                         goto out;
2616                 }
2617                 lost_found_ino++;
2618                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620                                   mode);
2621                 if (ret < 0) {
2622                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623                                 dir_name, strerror(-ret));
2624                         goto out;
2625                 }
2626                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627                                      namebuf, namelen, type, NULL, 1);
2628                 /*
2629                  * Add ".INO" suffix several times to handle case where
2630                  * "FILENAME.INO" is already taken by another file.
2631                  */
2632                 while (ret == -EEXIST) {
2633                         /*
2634                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635                          */
2636                         if (namelen + count_digits(rec->ino) + 1 >
2637                             BTRFS_NAME_LEN) {
2638                                 ret = -EFBIG;
2639                                 goto out;
2640                         }
2641                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642                                  ".%llu", rec->ino);
2643                         namelen += count_digits(rec->ino) + 1;
2644                         ret = btrfs_add_link(trans, root, rec->ino,
2645                                              lost_found_ino, namebuf,
2646                                              namelen, type, NULL, 1);
2647                 }
2648                 if (ret < 0) {
2649                         fprintf(stderr,
2650                                 "Failed to link the inode %llu to %s dir: %s\n",
2651                                 rec->ino, dir_name, strerror(-ret));
2652                         goto out;
2653                 }
2654                 /*
2655                  * Just increase the found_link, don't actually add the
2656                  * backref. This will make things easier and this inode
2657                  * record will be freed after the repair is done.
2658                  * So fsck will not report problem about this inode.
2659                  */
2660                 rec->found_link++;
2661                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662                        namelen, namebuf, dir_name);
2663         }
2664         printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 out:
2666         /*
2667          * Clear the flag anyway, or we will loop forever for the same inode
2668          * as it will not be removed from the bad inode list and the dead loop
2669          * happens.
2670          */
2671         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672         btrfs_release_path(path);
2673         return ret;
2674 }
2675
2676 /*
2677  * Check if there is any normal(reg or prealloc) file extent for given
2678  * ino.
2679  * This is used to determine the file type when neither its dir_index/item or
2680  * inode_item exists.
2681  *
2682  * This will *NOT* report error, if any error happens, just consider it does
2683  * not have any normal file extent.
2684  */
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_key key;
2689         struct btrfs_key found_key;
2690         struct btrfs_file_extent_item *fi;
2691         u8 type;
2692         int ret = 0;
2693
2694         btrfs_init_path(&path);
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705                 ret = btrfs_next_leaf(root, &path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713                                       path.slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path.nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_release_path(&path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         /*
2930          * For nlink repair, it may create a dir and add link, so
2931          * 2 for parent(256)'s dir_index and dir_item
2932          * 2 for lost+found dir's inode_item and inode_ref
2933          * 1 for the new inode_ref of the file
2934          * 2 for lost+found dir's dir_index and dir_item for the file
2935          */
2936         trans = btrfs_start_transaction(root, 7);
2937         if (IS_ERR(trans))
2938                 return PTR_ERR(trans);
2939
2940         btrfs_init_path(&path);
2941         if (rec->errors & I_ERR_NO_INODE_ITEM)
2942                 ret = repair_inode_no_item(trans, root, &path, rec);
2943         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948                 ret = repair_inode_isize(trans, root, &path, rec);
2949         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952                 ret = repair_inode_nlinks(trans, root, &path, rec);
2953         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954                 ret = repair_inode_nbytes(trans, root, &path, rec);
2955         btrfs_commit_transaction(trans, root);
2956         btrfs_release_path(&path);
2957         return ret;
2958 }
2959
2960 static int check_inode_recs(struct btrfs_root *root,
2961                             struct cache_tree *inode_cache)
2962 {
2963         struct cache_extent *cache;
2964         struct ptr_node *node;
2965         struct inode_record *rec;
2966         struct inode_backref *backref;
2967         int stage = 0;
2968         int ret = 0;
2969         int err = 0;
2970         u64 error = 0;
2971         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972
2973         if (btrfs_root_refs(&root->root_item) == 0) {
2974                 if (!cache_tree_empty(inode_cache))
2975                         fprintf(stderr, "warning line %d\n", __LINE__);
2976                 return 0;
2977         }
2978
2979         /*
2980          * We need to record the highest inode number for later 'lost+found'
2981          * dir creation.
2982          * We must select an ino not used/referred by any existing inode, or
2983          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984          * this may cause 'lost+found' dir has wrong nlinks.
2985          */
2986         cache = last_cache_extent(inode_cache);
2987         if (cache) {
2988                 node = container_of(cache, struct ptr_node, cache);
2989                 rec = node->data;
2990                 if (rec->ino > root->highest_inode)
2991                         root->highest_inode = rec->ino;
2992         }
2993
2994         /*
2995          * We need to repair backrefs first because we could change some of the
2996          * errors in the inode recs.
2997          *
2998          * We also need to go through and delete invalid backrefs first and then
2999          * add the correct ones second.  We do this because we may get EEXIST
3000          * when adding back the correct index because we hadn't yet deleted the
3001          * invalid index.
3002          *
3003          * For example, if we were missing a dir index then the directories
3004          * isize would be wrong, so if we fixed the isize to what we thought it
3005          * would be and then fixed the backref we'd still have a invalid fs, so
3006          * we need to add back the dir index and then check to see if the isize
3007          * is still wrong.
3008          */
3009         while (stage < 3) {
3010                 stage++;
3011                 if (stage == 3 && !err)
3012                         break;
3013
3014                 cache = search_cache_extent(inode_cache, 0);
3015                 while (repair && cache) {
3016                         node = container_of(cache, struct ptr_node, cache);
3017                         rec = node->data;
3018                         cache = next_cache_extent(cache);
3019
3020                         /* Need to free everything up and rescan */
3021                         if (stage == 3) {
3022                                 remove_cache_extent(inode_cache, &node->cache);
3023                                 free(node);
3024                                 free_inode_rec(rec);
3025                                 continue;
3026                         }
3027
3028                         if (list_empty(&rec->backrefs))
3029                                 continue;
3030
3031                         ret = repair_inode_backrefs(root, rec, inode_cache,
3032                                                     stage == 1);
3033                         if (ret < 0) {
3034                                 err = ret;
3035                                 stage = 2;
3036                                 break;
3037                         } if (ret > 0) {
3038                                 err = -EAGAIN;
3039                         }
3040                 }
3041         }
3042         if (err)
3043                 return err;
3044
3045         rec = get_inode_rec(inode_cache, root_dirid, 0);
3046         BUG_ON(IS_ERR(rec));
3047         if (rec) {
3048                 ret = check_root_dir(rec);
3049                 if (ret) {
3050                         fprintf(stderr, "root %llu root dir %llu error\n",
3051                                 (unsigned long long)root->root_key.objectid,
3052                                 (unsigned long long)root_dirid);
3053                         print_inode_error(root, rec);
3054                         error++;
3055                 }
3056         } else {
3057                 if (repair) {
3058                         struct btrfs_trans_handle *trans;
3059
3060                         trans = btrfs_start_transaction(root, 1);
3061                         if (IS_ERR(trans)) {
3062                                 err = PTR_ERR(trans);
3063                                 return err;
3064                         }
3065
3066                         fprintf(stderr,
3067                                 "root %llu missing its root dir, recreating\n",
3068                                 (unsigned long long)root->objectid);
3069
3070                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3071                         BUG_ON(ret);
3072
3073                         btrfs_commit_transaction(trans, root);
3074                         return -EAGAIN;
3075                 }
3076
3077                 fprintf(stderr, "root %llu root dir %llu not found\n",
3078                         (unsigned long long)root->root_key.objectid,
3079                         (unsigned long long)root_dirid);
3080         }
3081
3082         while (1) {
3083                 cache = search_cache_extent(inode_cache, 0);
3084                 if (!cache)
3085                         break;
3086                 node = container_of(cache, struct ptr_node, cache);
3087                 rec = node->data;
3088                 remove_cache_extent(inode_cache, &node->cache);
3089                 free(node);
3090                 if (rec->ino == root_dirid ||
3091                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092                         free_inode_rec(rec);
3093                         continue;
3094                 }
3095
3096                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097                         ret = check_orphan_item(root, rec->ino);
3098                         if (ret == 0)
3099                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100                         if (can_free_inode_rec(rec)) {
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104                 }
3105
3106                 if (!rec->found_inode_item)
3107                         rec->errors |= I_ERR_NO_INODE_ITEM;
3108                 if (rec->found_link != rec->nlink)
3109                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110                 if (repair) {
3111                         ret = try_repair_inode(root, rec);
3112                         if (ret == 0 && can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                         ret = 0;
3117                 }
3118
3119                 if (!(repair && ret == 0))
3120                         error++;
3121                 print_inode_error(root, rec);
3122                 list_for_each_entry(backref, &rec->backrefs, list) {
3123                         if (!backref->found_dir_item)
3124                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125                         if (!backref->found_dir_index)
3126                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127                         if (!backref->found_inode_ref)
3128                                 backref->errors |= REF_ERR_NO_INODE_REF;
3129                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130                                 " namelen %u name %s filetype %d errors %x",
3131                                 (unsigned long long)backref->dir,
3132                                 (unsigned long long)backref->index,
3133                                 backref->namelen, backref->name,
3134                                 backref->filetype, backref->errors);
3135                         print_ref_error(backref->errors);
3136                 }
3137                 free_inode_rec(rec);
3138         }
3139         return (error > 0) ? -1 : 0;
3140 }
3141
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3143                                         u64 objectid)
3144 {
3145         struct cache_extent *cache;
3146         struct root_record *rec = NULL;
3147         int ret;
3148
3149         cache = lookup_cache_extent(root_cache, objectid, 1);
3150         if (cache) {
3151                 rec = container_of(cache, struct root_record, cache);
3152         } else {
3153                 rec = calloc(1, sizeof(*rec));
3154                 if (!rec)
3155                         return ERR_PTR(-ENOMEM);
3156                 rec->objectid = objectid;
3157                 INIT_LIST_HEAD(&rec->backrefs);
3158                 rec->cache.start = objectid;
3159                 rec->cache.size = 1;
3160
3161                 ret = insert_cache_extent(root_cache, &rec->cache);
3162                 if (ret)
3163                         return ERR_PTR(-EEXIST);
3164         }
3165         return rec;
3166 }
3167
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169                                              u64 ref_root, u64 dir, u64 index,
3170                                              const char *name, int namelen)
3171 {
3172         struct root_backref *backref;
3173
3174         list_for_each_entry(backref, &rec->backrefs, list) {
3175                 if (backref->ref_root != ref_root || backref->dir != dir ||
3176                     backref->namelen != namelen)
3177                         continue;
3178                 if (memcmp(name, backref->name, namelen))
3179                         continue;
3180                 return backref;
3181         }
3182
3183         backref = calloc(1, sizeof(*backref) + namelen + 1);
3184         if (!backref)
3185                 return NULL;
3186         backref->ref_root = ref_root;
3187         backref->dir = dir;
3188         backref->index = index;
3189         backref->namelen = namelen;
3190         memcpy(backref->name, name, namelen);
3191         backref->name[namelen] = '\0';
3192         list_add_tail(&backref->list, &rec->backrefs);
3193         return backref;
3194 }
3195
3196 static void free_root_record(struct cache_extent *cache)
3197 {
3198         struct root_record *rec;
3199         struct root_backref *backref;
3200
3201         rec = container_of(cache, struct root_record, cache);
3202         while (!list_empty(&rec->backrefs)) {
3203                 backref = to_root_backref(rec->backrefs.next);
3204                 list_del(&backref->list);
3205                 free(backref);
3206         }
3207
3208         free(rec);
3209 }
3210
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212
3213 static int add_root_backref(struct cache_tree *root_cache,
3214                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3215                             const char *name, int namelen,
3216                             int item_type, int errors)
3217 {
3218         struct root_record *rec;
3219         struct root_backref *backref;
3220
3221         rec = get_root_rec(root_cache, root_id);
3222         BUG_ON(IS_ERR(rec));
3223         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3224         BUG_ON(!backref);
3225
3226         backref->errors |= errors;
3227
3228         if (item_type != BTRFS_DIR_ITEM_KEY) {
3229                 if (backref->found_dir_index || backref->found_back_ref ||
3230                     backref->found_forward_ref) {
3231                         if (backref->index != index)
3232                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233                 } else {
3234                         backref->index = index;
3235                 }
3236         }
3237
3238         if (item_type == BTRFS_DIR_ITEM_KEY) {
3239                 if (backref->found_forward_ref)
3240                         rec->found_ref++;
3241                 backref->found_dir_item = 1;
3242         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243                 backref->found_dir_index = 1;
3244         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245                 if (backref->found_forward_ref)
3246                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3247                 else if (backref->found_dir_item)
3248                         rec->found_ref++;
3249                 backref->found_forward_ref = 1;
3250         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251                 if (backref->found_back_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253                 backref->found_back_ref = 1;
3254         } else {
3255                 BUG_ON(1);
3256         }
3257
3258         if (backref->found_forward_ref && backref->found_dir_item)
3259                 backref->reachable = 1;
3260         return 0;
3261 }
3262
3263 static int merge_root_recs(struct btrfs_root *root,
3264                            struct cache_tree *src_cache,
3265                            struct cache_tree *dst_cache)
3266 {
3267         struct cache_extent *cache;
3268         struct ptr_node *node;
3269         struct inode_record *rec;
3270         struct inode_backref *backref;
3271         int ret = 0;
3272
3273         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274                 free_inode_recs_tree(src_cache);
3275                 return 0;
3276         }
3277
3278         while (1) {
3279                 cache = search_cache_extent(src_cache, 0);
3280                 if (!cache)
3281                         break;
3282                 node = container_of(cache, struct ptr_node, cache);
3283                 rec = node->data;
3284                 remove_cache_extent(src_cache, &node->cache);
3285                 free(node);
3286
3287                 ret = is_child_root(root, root->objectid, rec->ino);
3288                 if (ret < 0)
3289                         break;
3290                 else if (ret == 0)
3291                         goto skip;
3292
3293                 list_for_each_entry(backref, &rec->backrefs, list) {
3294                         BUG_ON(backref->found_inode_ref);
3295                         if (backref->found_dir_item)
3296                                 add_root_backref(dst_cache, rec->ino,
3297                                         root->root_key.objectid, backref->dir,
3298                                         backref->index, backref->name,
3299                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3300                                         backref->errors);
3301                         if (backref->found_dir_index)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3306                                         backref->errors);
3307                 }
3308 skip:
3309                 free_inode_rec(rec);
3310         }
3311         if (ret < 0)
3312                 return ret;
3313         return 0;
3314 }
3315
3316 static int check_root_refs(struct btrfs_root *root,
3317                            struct cache_tree *root_cache)
3318 {
3319         struct root_record *rec;
3320         struct root_record *ref_root;
3321         struct root_backref *backref;
3322         struct cache_extent *cache;
3323         int loop = 1;
3324         int ret;
3325         int error;
3326         int errors = 0;
3327
3328         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329         BUG_ON(IS_ERR(rec));
3330         rec->found_ref = 1;
3331
3332         /* fixme: this can not detect circular references */
3333         while (loop) {
3334                 loop = 0;
3335                 cache = search_cache_extent(root_cache, 0);
3336                 while (1) {
3337                         if (!cache)
3338                                 break;
3339                         rec = container_of(cache, struct root_record, cache);
3340                         cache = next_cache_extent(cache);
3341
3342                         if (rec->found_ref == 0)
3343                                 continue;
3344
3345                         list_for_each_entry(backref, &rec->backrefs, list) {
3346                                 if (!backref->reachable)
3347                                         continue;
3348
3349                                 ref_root = get_root_rec(root_cache,
3350                                                         backref->ref_root);
3351                                 BUG_ON(IS_ERR(ref_root));
3352                                 if (ref_root->found_ref > 0)
3353                                         continue;
3354
3355                                 backref->reachable = 0;
3356                                 rec->found_ref--;
3357                                 if (rec->found_ref == 0)
3358                                         loop = 1;
3359                         }
3360                 }
3361         }
3362
3363         cache = search_cache_extent(root_cache, 0);
3364         while (1) {
3365                 if (!cache)
3366                         break;
3367                 rec = container_of(cache, struct root_record, cache);
3368                 cache = next_cache_extent(cache);
3369
3370                 if (rec->found_ref == 0 &&
3371                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373                         ret = check_orphan_item(root->fs_info->tree_root,
3374                                                 rec->objectid);
3375                         if (ret == 0)
3376                                 continue;
3377
3378                         /*
3379                          * If we don't have a root item then we likely just have
3380                          * a dir item in a snapshot for this root but no actual
3381                          * ref key or anything so it's meaningless.
3382                          */
3383                         if (!rec->found_root_item)
3384                                 continue;
3385                         errors++;
3386                         fprintf(stderr, "fs tree %llu not referenced\n",
3387                                 (unsigned long long)rec->objectid);
3388                 }
3389
3390                 error = 0;
3391                 if (rec->found_ref > 0 && !rec->found_root_item)
3392                         error = 1;
3393                 list_for_each_entry(backref, &rec->backrefs, list) {
3394                         if (!backref->found_dir_item)
3395                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396                         if (!backref->found_dir_index)
3397                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398                         if (!backref->found_back_ref)
3399                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400                         if (!backref->found_forward_ref)
3401                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3402                         if (backref->reachable && backref->errors)
3403                                 error = 1;
3404                 }
3405                 if (!error)
3406                         continue;
3407
3408                 errors++;
3409                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410                         (unsigned long long)rec->objectid, rec->found_ref,
3411                          rec->found_root_item ? "" : "not found");
3412
3413                 list_for_each_entry(backref, &rec->backrefs, list) {
3414                         if (!backref->reachable)
3415                                 continue;
3416                         if (!backref->errors && rec->found_root_item)
3417                                 continue;
3418                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419                                 " index %llu namelen %u name %s errors %x\n",
3420                                 (unsigned long long)backref->ref_root,
3421                                 (unsigned long long)backref->dir,
3422                                 (unsigned long long)backref->index,
3423                                 backref->namelen, backref->name,
3424                                 backref->errors);
3425                         print_ref_error(backref->errors);
3426                 }
3427         }
3428         return errors > 0 ? 1 : 0;
3429 }
3430
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432                             struct btrfs_key *key,
3433                             struct cache_tree *root_cache)
3434 {
3435         u64 dirid;
3436         u64 index;
3437         u32 len;
3438         u32 name_len;
3439         struct btrfs_root_ref *ref;
3440         char namebuf[BTRFS_NAME_LEN];
3441         int error;
3442
3443         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444
3445         dirid = btrfs_root_ref_dirid(eb, ref);
3446         index = btrfs_root_ref_sequence(eb, ref);
3447         name_len = btrfs_root_ref_name_len(eb, ref);
3448
3449         if (name_len <= BTRFS_NAME_LEN) {
3450                 len = name_len;
3451                 error = 0;
3452         } else {
3453                 len = BTRFS_NAME_LEN;
3454                 error = REF_ERR_NAME_TOO_LONG;
3455         }
3456         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457
3458         if (key->type == BTRFS_ROOT_REF_KEY) {
3459                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460                                  index, namebuf, len, key->type, error);
3461         } else {
3462                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463                                  index, namebuf, len, key->type, error);
3464         }
3465         return 0;
3466 }
3467
3468 static void free_corrupt_block(struct cache_extent *cache)
3469 {
3470         struct btrfs_corrupt_block *corrupt;
3471
3472         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3473         free(corrupt);
3474 }
3475
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3477
3478 /*
3479  * Repair the btree of the given root.
3480  *
3481  * The fix is to remove the node key in corrupt_blocks cache_tree.
3482  * and rebalance the tree.
3483  * After the fix, the btree should be writeable.
3484  */
3485 static int repair_btree(struct btrfs_root *root,
3486                         struct cache_tree *corrupt_blocks)
3487 {
3488         struct btrfs_trans_handle *trans;
3489         struct btrfs_path path;
3490         struct btrfs_corrupt_block *corrupt;
3491         struct cache_extent *cache;
3492         struct btrfs_key key;
3493         u64 offset;
3494         int level;
3495         int ret = 0;
3496
3497         if (cache_tree_empty(corrupt_blocks))
3498                 return 0;
3499
3500         trans = btrfs_start_transaction(root, 1);
3501         if (IS_ERR(trans)) {
3502                 ret = PTR_ERR(trans);
3503                 fprintf(stderr, "Error starting transaction: %s\n",
3504                         strerror(-ret));
3505                 return ret;
3506         }
3507         btrfs_init_path(&path);
3508         cache = first_cache_extent(corrupt_blocks);
3509         while (cache) {
3510                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511                                        cache);
3512                 level = corrupt->level;
3513                 path.lowest_level = level;
3514                 key.objectid = corrupt->key.objectid;
3515                 key.type = corrupt->key.type;
3516                 key.offset = corrupt->key.offset;
3517
3518                 /*
3519                  * Here we don't want to do any tree balance, since it may
3520                  * cause a balance with corrupted brother leaf/node,
3521                  * so ins_len set to 0 here.
3522                  * Balance will be done after all corrupt node/leaf is deleted.
3523                  */
3524                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3525                 if (ret < 0)
3526                         goto out;
3527                 offset = btrfs_node_blockptr(path.nodes[level],
3528                                              path.slots[level]);
3529
3530                 /* Remove the ptr */
3531                 ret = btrfs_del_ptr(trans, root, &path, level,
3532                                     path.slots[level]);
3533                 if (ret < 0)
3534                         goto out;
3535                 /*
3536                  * Remove the corresponding extent
3537                  * return value is not concerned.
3538                  */
3539                 btrfs_release_path(&path);
3540                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541                                         0, root->root_key.objectid,
3542                                         level - 1, 0);
3543                 cache = next_cache_extent(cache);
3544         }
3545
3546         /* Balance the btree using btrfs_search_slot() */
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 memcpy(&key, &corrupt->key, sizeof(key));
3552                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3553                 if (ret < 0)
3554                         goto out;
3555                 /* return will always >0 since it won't find the item */
3556                 ret = 0;
3557                 btrfs_release_path(&path);
3558                 cache = next_cache_extent(cache);
3559         }
3560 out:
3561         btrfs_commit_transaction(trans, root);
3562         btrfs_release_path(&path);
3563         return ret;
3564 }
3565
3566 static int check_fs_root(struct btrfs_root *root,
3567                          struct cache_tree *root_cache,
3568                          struct walk_control *wc)
3569 {
3570         int ret = 0;
3571         int err = 0;
3572         int wret;
3573         int level;
3574         struct btrfs_path path;
3575         struct shared_node root_node;
3576         struct root_record *rec;
3577         struct btrfs_root_item *root_item = &root->root_item;
3578         struct cache_tree corrupt_blocks;
3579         struct orphan_data_extent *orphan;
3580         struct orphan_data_extent *tmp;
3581         enum btrfs_tree_block_status status;
3582         struct node_refs nrefs;
3583
3584         /*
3585          * Reuse the corrupt_block cache tree to record corrupted tree block
3586          *
3587          * Unlike the usage in extent tree check, here we do it in a per
3588          * fs/subvol tree base.
3589          */
3590         cache_tree_init(&corrupt_blocks);
3591         root->fs_info->corrupt_blocks = &corrupt_blocks;
3592
3593         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594                 rec = get_root_rec(root_cache, root->root_key.objectid);
3595                 BUG_ON(IS_ERR(rec));
3596                 if (btrfs_root_refs(root_item) > 0)
3597                         rec->found_root_item = 1;
3598         }
3599
3600         btrfs_init_path(&path);
3601         memset(&root_node, 0, sizeof(root_node));
3602         cache_tree_init(&root_node.root_cache);
3603         cache_tree_init(&root_node.inode_cache);
3604         memset(&nrefs, 0, sizeof(nrefs));
3605
3606         /* Move the orphan extent record to corresponding inode_record */
3607         list_for_each_entry_safe(orphan, tmp,
3608                                  &root->orphan_data_extents, list) {
3609                 struct inode_record *inode;
3610
3611                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612                                       1);
3613                 BUG_ON(IS_ERR(inode));
3614                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615                 list_move(&orphan->list, &inode->orphan_extents);
3616         }
3617
3618         level = btrfs_header_level(root->node);
3619         memset(wc->nodes, 0, sizeof(wc->nodes));
3620         wc->nodes[level] = &root_node;
3621         wc->active_node = level;
3622         wc->root_level = level;
3623
3624         /* We may not have checked the root block, lets do that now */
3625         if (btrfs_is_leaf(root->node))
3626                 status = btrfs_check_leaf(root, NULL, root->node);
3627         else
3628                 status = btrfs_check_node(root, NULL, root->node);
3629         if (status != BTRFS_TREE_BLOCK_CLEAN)
3630                 return -EIO;
3631
3632         if (btrfs_root_refs(root_item) > 0 ||
3633             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634                 path.nodes[level] = root->node;
3635                 extent_buffer_get(root->node);
3636                 path.slots[level] = 0;
3637         } else {
3638                 struct btrfs_key key;
3639                 struct btrfs_disk_key found_key;
3640
3641                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642                 level = root_item->drop_level;
3643                 path.lowest_level = level;
3644                 if (level > btrfs_header_level(root->node) ||
3645                     level >= BTRFS_MAX_LEVEL) {
3646                         error("ignoring invalid drop level: %u", level);
3647                         goto skip_walking;
3648                 }
3649                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3650                 if (wret < 0)
3651                         goto skip_walking;
3652                 btrfs_node_key(path.nodes[level], &found_key,
3653                                 path.slots[level]);
3654                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655                                         sizeof(found_key)));
3656         }
3657
3658         while (1) {
3659                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3660                 if (wret < 0)
3661                         ret = wret;
3662                 if (wret != 0)
3663                         break;
3664
3665                 wret = walk_up_tree(root, &path, wc, &level);
3666                 if (wret < 0)
3667                         ret = wret;
3668                 if (wret != 0)
3669                         break;
3670         }
3671 skip_walking:
3672         btrfs_release_path(&path);
3673
3674         if (!cache_tree_empty(&corrupt_blocks)) {
3675                 struct cache_extent *cache;
3676                 struct btrfs_corrupt_block *corrupt;
3677
3678                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679                        root->root_key.objectid);
3680                 cache = first_cache_extent(&corrupt_blocks);
3681                 while (cache) {
3682                         corrupt = container_of(cache,
3683                                                struct btrfs_corrupt_block,
3684                                                cache);
3685                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686                                cache->start, corrupt->level,
3687                                corrupt->key.objectid, corrupt->key.type,
3688                                corrupt->key.offset);
3689                         cache = next_cache_extent(cache);
3690                 }
3691                 if (repair) {
3692                         printf("Try to repair the btree for root %llu\n",
3693                                root->root_key.objectid);
3694                         ret = repair_btree(root, &corrupt_blocks);
3695                         if (ret < 0)
3696                                 fprintf(stderr, "Failed to repair btree: %s\n",
3697                                         strerror(-ret));
3698                         if (!ret)
3699                                 printf("Btree for root %llu is fixed\n",
3700                                        root->root_key.objectid);
3701                 }
3702         }
3703
3704         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3705         if (err < 0)
3706                 ret = err;
3707
3708         if (root_node.current) {
3709                 root_node.current->checked = 1;
3710                 maybe_free_inode_rec(&root_node.inode_cache,
3711                                 root_node.current);
3712         }
3713
3714         err = check_inode_recs(root, &root_node.inode_cache);
3715         if (!ret)
3716                 ret = err;
3717
3718         free_corrupt_blocks_tree(&corrupt_blocks);
3719         root->fs_info->corrupt_blocks = NULL;
3720         free_orphan_data_extents(&root->orphan_data_extents);
3721         return ret;
3722 }
3723
3724 static int fs_root_objectid(u64 objectid)
3725 {
3726         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728                 return 1;
3729         return is_fstree(objectid);
3730 }
3731
3732 static int check_fs_roots(struct btrfs_root *root,
3733                           struct cache_tree *root_cache)
3734 {
3735         struct btrfs_path path;
3736         struct btrfs_key key;
3737         struct walk_control wc;
3738         struct extent_buffer *leaf, *tree_node;
3739         struct btrfs_root *tmp_root;
3740         struct btrfs_root *tree_root = root->fs_info->tree_root;
3741         int ret;
3742         int err = 0;
3743
3744         if (ctx.progress_enabled) {
3745                 ctx.tp = TASK_FS_ROOTS;
3746                 task_start(ctx.info);
3747         }
3748
3749         /*
3750          * Just in case we made any changes to the extent tree that weren't
3751          * reflected into the free space cache yet.
3752          */
3753         if (repair)
3754                 reset_cached_block_groups(root->fs_info);
3755         memset(&wc, 0, sizeof(wc));
3756         cache_tree_init(&wc.shared);
3757         btrfs_init_path(&path);
3758
3759 again:
3760         key.offset = 0;
3761         key.objectid = 0;
3762         key.type = BTRFS_ROOT_ITEM_KEY;
3763         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3764         if (ret < 0) {
3765                 err = 1;
3766                 goto out;
3767         }
3768         tree_node = tree_root->node;
3769         while (1) {
3770                 if (tree_node != tree_root->node) {
3771                         free_root_recs_tree(root_cache);
3772                         btrfs_release_path(&path);
3773                         goto again;
3774                 }
3775                 leaf = path.nodes[0];
3776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777                         ret = btrfs_next_leaf(tree_root, &path);
3778                         if (ret) {
3779                                 if (ret < 0)
3780                                         err = 1;
3781                                 break;
3782                         }
3783                         leaf = path.nodes[0];
3784                 }
3785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787                     fs_root_objectid(key.objectid)) {
3788                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789                                 tmp_root = btrfs_read_fs_root_no_cache(
3790                                                 root->fs_info, &key);
3791                         } else {
3792                                 key.offset = (u64)-1;
3793                                 tmp_root = btrfs_read_fs_root(
3794                                                 root->fs_info, &key);
3795                         }
3796                         if (IS_ERR(tmp_root)) {
3797                                 err = 1;
3798                                 goto next;
3799                         }
3800                         ret = check_fs_root(tmp_root, root_cache, &wc);
3801                         if (ret == -EAGAIN) {
3802                                 free_root_recs_tree(root_cache);
3803                                 btrfs_release_path(&path);
3804                                 goto again;
3805                         }
3806                         if (ret)
3807                                 err = 1;
3808                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809                                 btrfs_free_fs_root(tmp_root);
3810                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3812                         process_root_ref(leaf, path.slots[0], &key,
3813                                          root_cache);
3814                 }
3815 next:
3816                 path.slots[0]++;
3817         }
3818 out:
3819         btrfs_release_path(&path);
3820         if (err)
3821                 free_extent_cache_tree(&wc.shared);
3822         if (!cache_tree_empty(&wc.shared))
3823                 fprintf(stderr, "warning line %d\n", __LINE__);
3824
3825         task_stop(ctx.info);
3826
3827         return err;
3828 }
3829
3830 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
3839 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
3840 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
3841 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
3842 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
3843 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
3844 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
3845 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
3846 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
3847
3848 /*
3849  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3850  * INODE_REF/INODE_EXTREF match.
3851  *
3852  * @root:       the root of the fs/file tree
3853  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3854  * @key:        the key of the DIR_ITEM/DIR_INDEX
3855  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3856  *              distinguish root_dir between normal dir/file
3857  * @name:       the name in the INODE_REF/INODE_EXTREF
3858  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3859  * @mode:       the st_mode of INODE_ITEM
3860  *
3861  * Return 0 if no error occurred.
3862  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3863  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3864  * dir/file.
3865  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3866  * not match for normal dir/file.
3867  */
3868 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3869                          struct btrfs_key *key, u64 index, char *name,
3870                          u32 namelen, u32 mode)
3871 {
3872         struct btrfs_path path;
3873         struct extent_buffer *node;
3874         struct btrfs_dir_item *di;
3875         struct btrfs_key location;
3876         char namebuf[BTRFS_NAME_LEN] = {0};
3877         u32 total;
3878         u32 cur = 0;
3879         u32 len;
3880         u32 name_len;
3881         u32 data_len;
3882         u8 filetype;
3883         int slot;
3884         int ret;
3885
3886         btrfs_init_path(&path);
3887         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3888         if (ret < 0) {
3889                 ret = DIR_ITEM_MISSING;
3890                 goto out;
3891         }
3892
3893         /* Process root dir and goto out*/
3894         if (index == 0) {
3895                 if (ret == 0) {
3896                         ret = ROOT_DIR_ERROR;
3897                         error(
3898                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3899                                 root->objectid,
3900                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3901                                         "REF" : "EXTREF",
3902                                 ref_key->objectid, ref_key->offset,
3903                                 key->type == BTRFS_DIR_ITEM_KEY ?
3904                                         "DIR_ITEM" : "DIR_INDEX");
3905                 } else {
3906                         ret = 0;
3907                 }
3908
3909                 goto out;
3910         }
3911
3912         /* Process normal file/dir */
3913         if (ret > 0) {
3914                 ret = DIR_ITEM_MISSING;
3915                 error(
3916                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3917                         root->objectid,
3918                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3919                         ref_key->objectid, ref_key->offset,
3920                         key->type == BTRFS_DIR_ITEM_KEY ?
3921                                 "DIR_ITEM" : "DIR_INDEX",
3922                         key->objectid, key->offset, namelen, name,
3923                         imode_to_type(mode));
3924                 goto out;
3925         }
3926
3927         /* Check whether inode_id/filetype/name match */
3928         node = path.nodes[0];
3929         slot = path.slots[0];
3930         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3931         total = btrfs_item_size_nr(node, slot);
3932         while (cur < total) {
3933                 ret = DIR_ITEM_MISMATCH;
3934                 name_len = btrfs_dir_name_len(node, di);
3935                 data_len = btrfs_dir_data_len(node, di);
3936
3937                 btrfs_dir_item_key_to_cpu(node, di, &location);
3938                 if (location.objectid != ref_key->objectid ||
3939                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3940                     location.offset != 0)
3941                         goto next;
3942
3943                 filetype = btrfs_dir_type(node, di);
3944                 if (imode_to_type(mode) != filetype)
3945                         goto next;
3946
3947                 if (name_len <= BTRFS_NAME_LEN) {
3948                         len = name_len;
3949                 } else {
3950                         len = BTRFS_NAME_LEN;
3951                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3952                         root->objectid,
3953                         key->type == BTRFS_DIR_ITEM_KEY ?
3954                         "DIR_ITEM" : "DIR_INDEX",
3955                         key->objectid, key->offset, name_len);
3956                 }
3957                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3958                 if (len != namelen || strncmp(namebuf, name, len))
3959                         goto next;
3960
3961                 ret = 0;
3962                 goto out;
3963 next:
3964                 len = sizeof(*di) + name_len + data_len;
3965                 di = (struct btrfs_dir_item *)((char *)di + len);
3966                 cur += len;
3967         }
3968         if (ret == DIR_ITEM_MISMATCH)
3969                 error(
3970                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3971                         root->objectid,
3972                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3973                         ref_key->objectid, ref_key->offset,
3974                         key->type == BTRFS_DIR_ITEM_KEY ?
3975                                 "DIR_ITEM" : "DIR_INDEX",
3976                         key->objectid, key->offset, namelen, name,
3977                         imode_to_type(mode));
3978 out:
3979         btrfs_release_path(&path);
3980         return ret;
3981 }
3982
3983 /*
3984  * Traverse the given INODE_REF and call find_dir_item() to find related
3985  * DIR_ITEM/DIR_INDEX.
3986  *
3987  * @root:       the root of the fs/file tree
3988  * @ref_key:    the key of the INODE_REF
3989  * @refs:       the count of INODE_REF
3990  * @mode:       the st_mode of INODE_ITEM
3991  *
3992  * Return 0 if no error occurred.
3993  */
3994 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3995                            struct extent_buffer *node, int slot, u64 *refs,
3996                            int mode)
3997 {
3998         struct btrfs_key key;
3999         struct btrfs_inode_ref *ref;
4000         char namebuf[BTRFS_NAME_LEN] = {0};
4001         u32 total;
4002         u32 cur = 0;
4003         u32 len;
4004         u32 name_len;
4005         u64 index;
4006         int ret, err = 0;
4007
4008         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4009         total = btrfs_item_size_nr(node, slot);
4010
4011 next:
4012         /* Update inode ref count */
4013         (*refs)++;
4014
4015         index = btrfs_inode_ref_index(node, ref);
4016         name_len = btrfs_inode_ref_name_len(node, ref);
4017         if (name_len <= BTRFS_NAME_LEN) {
4018                 len = name_len;
4019         } else {
4020                 len = BTRFS_NAME_LEN;
4021                 warning("root %llu INODE_REF[%llu %llu] name too long",
4022                         root->objectid, ref_key->objectid, ref_key->offset);
4023         }
4024
4025         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4026
4027         /* Check root dir ref name */
4028         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4029                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4030                       root->objectid, ref_key->objectid, ref_key->offset,
4031                       namebuf);
4032                 err |= ROOT_DIR_ERROR;
4033         }
4034
4035         /* Find related DIR_INDEX */
4036         key.objectid = ref_key->offset;
4037         key.type = BTRFS_DIR_INDEX_KEY;
4038         key.offset = index;
4039         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4040         err |= ret;
4041
4042         /* Find related dir_item */
4043         key.objectid = ref_key->offset;
4044         key.type = BTRFS_DIR_ITEM_KEY;
4045         key.offset = btrfs_name_hash(namebuf, len);
4046         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4047         err |= ret;
4048
4049         len = sizeof(*ref) + name_len;
4050         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4051         cur += len;
4052         if (cur < total)
4053                 goto next;
4054
4055         return err;
4056 }
4057
4058 /*
4059  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4060  * DIR_ITEM/DIR_INDEX.
4061  *
4062  * @root:       the root of the fs/file tree
4063  * @ref_key:    the key of the INODE_EXTREF
4064  * @refs:       the count of INODE_EXTREF
4065  * @mode:       the st_mode of INODE_ITEM
4066  *
4067  * Return 0 if no error occurred.
4068  */
4069 static int check_inode_extref(struct btrfs_root *root,
4070                               struct btrfs_key *ref_key,
4071                               struct extent_buffer *node, int slot, u64 *refs,
4072                               int mode)
4073 {
4074         struct btrfs_key key;
4075         struct btrfs_inode_extref *extref;
4076         char namebuf[BTRFS_NAME_LEN] = {0};
4077         u32 total;
4078         u32 cur = 0;
4079         u32 len;
4080         u32 name_len;
4081         u64 index;
4082         u64 parent;
4083         int ret;
4084         int err = 0;
4085
4086         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4087         total = btrfs_item_size_nr(node, slot);
4088
4089 next:
4090         /* update inode ref count */
4091         (*refs)++;
4092         name_len = btrfs_inode_extref_name_len(node, extref);
4093         index = btrfs_inode_extref_index(node, extref);
4094         parent = btrfs_inode_extref_parent(node, extref);
4095         if (name_len <= BTRFS_NAME_LEN) {
4096                 len = name_len;
4097         } else {
4098                 len = BTRFS_NAME_LEN;
4099                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4100                         root->objectid, ref_key->objectid, ref_key->offset);
4101         }
4102         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4103
4104         /* Check root dir ref name */
4105         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4106                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4107                       root->objectid, ref_key->objectid, ref_key->offset,
4108                       namebuf);
4109                 err |= ROOT_DIR_ERROR;
4110         }
4111
4112         /* find related dir_index */
4113         key.objectid = parent;
4114         key.type = BTRFS_DIR_INDEX_KEY;
4115         key.offset = index;
4116         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4117         err |= ret;
4118
4119         /* find related dir_item */
4120         key.objectid = parent;
4121         key.type = BTRFS_DIR_ITEM_KEY;
4122         key.offset = btrfs_name_hash(namebuf, len);
4123         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4124         err |= ret;
4125
4126         len = sizeof(*extref) + name_len;
4127         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4128         cur += len;
4129
4130         if (cur < total)
4131                 goto next;
4132
4133         return err;
4134 }
4135
4136 /*
4137  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4138  * DIR_ITEM/DIR_INDEX match.
4139  *
4140  * @root:       the root of the fs/file tree
4141  * @key:        the key of the INODE_REF/INODE_EXTREF
4142  * @name:       the name in the INODE_REF/INODE_EXTREF
4143  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4144  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4145  * to (u64)-1
4146  * @ext_ref:    the EXTENDED_IREF feature
4147  *
4148  * Return 0 if no error occurred.
4149  * Return >0 for error bitmap
4150  */
4151 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4152                           char *name, int namelen, u64 index,
4153                           unsigned int ext_ref)
4154 {
4155         struct btrfs_path path;
4156         struct btrfs_inode_ref *ref;
4157         struct btrfs_inode_extref *extref;
4158         struct extent_buffer *node;
4159         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4160         u32 total;
4161         u32 cur = 0;
4162         u32 len;
4163         u32 ref_namelen;
4164         u64 ref_index;
4165         u64 parent;
4166         u64 dir_id;
4167         int slot;
4168         int ret;
4169
4170         btrfs_init_path(&path);
4171         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4172         if (ret) {
4173                 ret = INODE_REF_MISSING;
4174                 goto extref;
4175         }
4176
4177         node = path.nodes[0];
4178         slot = path.slots[0];
4179
4180         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4181         total = btrfs_item_size_nr(node, slot);
4182
4183         /* Iterate all entry of INODE_REF */
4184         while (cur < total) {
4185                 ret = INODE_REF_MISSING;
4186
4187                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4188                 ref_index = btrfs_inode_ref_index(node, ref);
4189                 if (index != (u64)-1 && index != ref_index)
4190                         goto next_ref;
4191
4192                 if (ref_namelen <= BTRFS_NAME_LEN) {
4193                         len = ref_namelen;
4194                 } else {
4195                         len = BTRFS_NAME_LEN;
4196                         warning("root %llu INODE %s[%llu %llu] name too long",
4197                                 root->objectid,
4198                                 key->type == BTRFS_INODE_REF_KEY ?
4199                                         "REF" : "EXTREF",
4200                                 key->objectid, key->offset);
4201                 }
4202                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4203                                    len);
4204
4205                 if (len != namelen || strncmp(ref_namebuf, name, len))
4206                         goto next_ref;
4207
4208                 ret = 0;
4209                 goto out;
4210 next_ref:
4211                 len = sizeof(*ref) + ref_namelen;
4212                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4213                 cur += len;
4214         }
4215
4216 extref:
4217         /* Skip if not support EXTENDED_IREF feature */
4218         if (!ext_ref)
4219                 goto out;
4220
4221         btrfs_release_path(&path);
4222         btrfs_init_path(&path);
4223
4224         dir_id = key->offset;
4225         key->type = BTRFS_INODE_EXTREF_KEY;
4226         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4227
4228         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4229         if (ret) {
4230                 ret = INODE_REF_MISSING;
4231                 goto out;
4232         }
4233
4234         node = path.nodes[0];
4235         slot = path.slots[0];
4236
4237         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4238         cur = 0;
4239         total = btrfs_item_size_nr(node, slot);
4240
4241         /* Iterate all entry of INODE_EXTREF */
4242         while (cur < total) {
4243                 ret = INODE_REF_MISSING;
4244
4245                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4246                 ref_index = btrfs_inode_extref_index(node, extref);
4247                 parent = btrfs_inode_extref_parent(node, extref);
4248                 if (index != (u64)-1 && index != ref_index)
4249                         goto next_extref;
4250
4251                 if (parent != dir_id)
4252                         goto next_extref;
4253
4254                 if (ref_namelen <= BTRFS_NAME_LEN) {
4255                         len = ref_namelen;
4256                 } else {
4257                         len = BTRFS_NAME_LEN;
4258                         warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4259                                 root->objectid,
4260                                 key->type == BTRFS_INODE_REF_KEY ?
4261                                         "REF" : "EXTREF",
4262                                 key->objectid, key->offset);
4263                 }
4264                 read_extent_buffer(node, ref_namebuf,
4265                                    (unsigned long)(extref + 1), len);
4266
4267                 if (len != namelen || strncmp(ref_namebuf, name, len))
4268                         goto next_extref;
4269
4270                 ret = 0;
4271                 goto out;
4272
4273 next_extref:
4274                 len = sizeof(*extref) + ref_namelen;
4275                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4276                 cur += len;
4277
4278         }
4279 out:
4280         btrfs_release_path(&path);
4281         return ret;
4282 }
4283
4284 /*
4285  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4286  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4287  *
4288  * @root:       the root of the fs/file tree
4289  * @key:        the key of the INODE_REF/INODE_EXTREF
4290  * @size:       the st_size of the INODE_ITEM
4291  * @ext_ref:    the EXTENDED_IREF feature
4292  *
4293  * Return 0 if no error occurred.
4294  */
4295 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4296                           struct extent_buffer *node, int slot, u64 *size,
4297                           unsigned int ext_ref)
4298 {
4299         struct btrfs_dir_item *di;
4300         struct btrfs_inode_item *ii;
4301         struct btrfs_path path;
4302         struct btrfs_key location;
4303         char namebuf[BTRFS_NAME_LEN] = {0};
4304         u32 total;
4305         u32 cur = 0;
4306         u32 len;
4307         u32 name_len;
4308         u32 data_len;
4309         u8 filetype;
4310         u32 mode;
4311         u64 index;
4312         int ret;
4313         int err = 0;
4314
4315         /*
4316          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4317          * ignore index check.
4318          */
4319         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4320
4321         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4322         total = btrfs_item_size_nr(node, slot);
4323
4324         while (cur < total) {
4325                 data_len = btrfs_dir_data_len(node, di);
4326                 if (data_len)
4327                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4328                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4329                               "DIR_ITEM" : "DIR_INDEX",
4330                               key->objectid, key->offset, data_len);
4331
4332                 name_len = btrfs_dir_name_len(node, di);
4333                 if (name_len <= BTRFS_NAME_LEN) {
4334                         len = name_len;
4335                 } else {
4336                         len = BTRFS_NAME_LEN;
4337                         warning("root %llu %s[%llu %llu] name too long",
4338                                 root->objectid,
4339                                 key->type == BTRFS_DIR_ITEM_KEY ?
4340                                 "DIR_ITEM" : "DIR_INDEX",
4341                                 key->objectid, key->offset);
4342                 }
4343                 (*size) += name_len;
4344
4345                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4346                 filetype = btrfs_dir_type(node, di);
4347
4348                 btrfs_init_path(&path);
4349                 btrfs_dir_item_key_to_cpu(node, di, &location);
4350
4351                 /* Ignore related ROOT_ITEM check */
4352                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4353                         goto next;
4354
4355                 /* Check relative INODE_ITEM(existence/filetype) */
4356                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4357                 if (ret) {
4358                         err |= INODE_ITEM_MISSING;
4359                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4360                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4361                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4362                               key->offset, location.objectid, name_len,
4363                               namebuf, filetype);
4364                         goto next;
4365                 }
4366
4367                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4368                                     struct btrfs_inode_item);
4369                 mode = btrfs_inode_mode(path.nodes[0], ii);
4370
4371                 if (imode_to_type(mode) != filetype) {
4372                         err |= INODE_ITEM_MISMATCH;
4373                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4374                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4375                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4376                               key->offset, name_len, namebuf, filetype);
4377                 }
4378
4379                 /* Check relative INODE_REF/INODE_EXTREF */
4380                 location.type = BTRFS_INODE_REF_KEY;
4381                 location.offset = key->objectid;
4382                 ret = find_inode_ref(root, &location, namebuf, len,
4383                                        index, ext_ref);
4384                 err |= ret;
4385                 if (ret & INODE_REF_MISSING)
4386                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4387                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4388                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4389                               key->offset, name_len, namebuf, filetype);
4390
4391 next:
4392                 btrfs_release_path(&path);
4393                 len = sizeof(*di) + name_len + data_len;
4394                 di = (struct btrfs_dir_item *)((char *)di + len);
4395                 cur += len;
4396
4397                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4398                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4399                               root->objectid, key->objectid, key->offset);
4400                         break;
4401                 }
4402         }
4403
4404         return err;
4405 }
4406
4407 /*
4408  * Check file extent datasum/hole, update the size of the file extents,
4409  * check and update the last offset of the file extent.
4410  *
4411  * @root:       the root of fs/file tree.
4412  * @fkey:       the key of the file extent.
4413  * @nodatasum:  INODE_NODATASUM feature.
4414  * @size:       the sum of all EXTENT_DATA items size for this inode.
4415  * @end:        the offset of the last extent.
4416  *
4417  * Return 0 if no error occurred.
4418  */
4419 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4420                              struct extent_buffer *node, int slot,
4421                              unsigned int nodatasum, u64 *size, u64 *end)
4422 {
4423         struct btrfs_file_extent_item *fi;
4424         u64 disk_bytenr;
4425         u64 disk_num_bytes;
4426         u64 extent_num_bytes;
4427         u64 found;
4428         unsigned int extent_type;
4429         unsigned int is_hole;
4430         int ret;
4431         int err = 0;
4432
4433         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4434
4435         extent_type = btrfs_file_extent_type(node, fi);
4436         /* Skip if file extent is inline */
4437         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4438                 struct btrfs_item *e = btrfs_item_nr(slot);
4439                 u32 item_inline_len;
4440
4441                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4442                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4443                 if (extent_num_bytes == 0 ||
4444                     extent_num_bytes != item_inline_len)
4445                         err |= FILE_EXTENT_ERROR;
4446                 *size += extent_num_bytes;
4447                 return err;
4448         }
4449
4450         /* Check extent type */
4451         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4452                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4453                 err |= FILE_EXTENT_ERROR;
4454                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4455                       root->objectid, fkey->objectid, fkey->offset);
4456                 return err;
4457         }
4458
4459         /* Check REG_EXTENT/PREALLOC_EXTENT */
4460         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4461         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4462         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4463         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4464
4465         /* Check EXTENT_DATA datasum */
4466         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4467         if (found > 0 && nodatasum) {
4468                 err |= ODD_CSUM_ITEM;
4469                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4470                       root->objectid, fkey->objectid, fkey->offset);
4471         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4472                    !is_hole &&
4473                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4474                 err |= CSUM_ITEM_MISSING;
4475                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4476                       root->objectid, fkey->objectid, fkey->offset);
4477         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4478                 err |= ODD_CSUM_ITEM;
4479                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4480                       root->objectid, fkey->objectid, fkey->offset);
4481         }
4482
4483         /* Check EXTENT_DATA hole */
4484         if (no_holes && is_hole) {
4485                 err |= FILE_EXTENT_ERROR;
4486                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4487                       root->objectid, fkey->objectid, fkey->offset);
4488         } else if (!no_holes && *end != fkey->offset) {
4489                 err |= FILE_EXTENT_ERROR;
4490                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4491                       root->objectid, fkey->objectid, fkey->offset);
4492         }
4493
4494         *end += extent_num_bytes;
4495         if (!is_hole)
4496                 *size += extent_num_bytes;
4497
4498         return err;
4499 }
4500
4501 /*
4502  * Check INODE_ITEM and related ITEMs (the same inode number)
4503  * 1. check link count
4504  * 2. check inode ref/extref
4505  * 3. check dir item/index
4506  *
4507  * @ext_ref:    the EXTENDED_IREF feature
4508  *
4509  * Return 0 if no error occurred.
4510  * Return >0 for error or hit the traversal is done(by error bitmap)
4511  */
4512 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4513                             unsigned int ext_ref)
4514 {
4515         struct extent_buffer *node;
4516         struct btrfs_inode_item *ii;
4517         struct btrfs_key key;
4518         u64 inode_id;
4519         u32 mode;
4520         u64 nlink;
4521         u64 nbytes;
4522         u64 isize;
4523         u64 size = 0;
4524         u64 refs = 0;
4525         u64 extent_end = 0;
4526         u64 extent_size = 0;
4527         unsigned int dir;
4528         unsigned int nodatasum;
4529         int slot;
4530         int ret;
4531         int err = 0;
4532
4533         node = path->nodes[0];
4534         slot = path->slots[0];
4535
4536         btrfs_item_key_to_cpu(node, &key, slot);
4537         inode_id = key.objectid;
4538
4539         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4540                 ret = btrfs_next_item(root, path);
4541                 if (ret > 0)
4542                         err |= LAST_ITEM;
4543                 return err;
4544         }
4545
4546         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4547         isize = btrfs_inode_size(node, ii);
4548         nbytes = btrfs_inode_nbytes(node, ii);
4549         mode = btrfs_inode_mode(node, ii);
4550         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4551         nlink = btrfs_inode_nlink(node, ii);
4552         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4553
4554         while (1) {
4555                 ret = btrfs_next_item(root, path);
4556                 if (ret < 0) {
4557                         /* out will fill 'err' rusing current statistics */
4558                         goto out;
4559                 } else if (ret > 0) {
4560                         err |= LAST_ITEM;
4561                         goto out;
4562                 }
4563
4564                 node = path->nodes[0];
4565                 slot = path->slots[0];
4566                 btrfs_item_key_to_cpu(node, &key, slot);
4567                 if (key.objectid != inode_id)
4568                         goto out;
4569
4570                 switch (key.type) {
4571                 case BTRFS_INODE_REF_KEY:
4572                         ret = check_inode_ref(root, &key, node, slot, &refs,
4573                                               mode);
4574                         err |= ret;
4575                         break;
4576                 case BTRFS_INODE_EXTREF_KEY:
4577                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4578                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4579                                         root->objectid, key.objectid,
4580                                         key.offset);
4581                         ret = check_inode_extref(root, &key, node, slot, &refs,
4582                                                  mode);
4583                         err |= ret;
4584                         break;
4585                 case BTRFS_DIR_ITEM_KEY:
4586                 case BTRFS_DIR_INDEX_KEY:
4587                         if (!dir) {
4588                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4589                                         root->objectid, inode_id,
4590                                         imode_to_type(mode), key.objectid,
4591                                         key.offset);
4592                         }
4593                         ret = check_dir_item(root, &key, node, slot, &size,
4594                                              ext_ref);
4595                         err |= ret;
4596                         break;
4597                 case BTRFS_EXTENT_DATA_KEY:
4598                         if (dir) {
4599                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4600                                         root->objectid, inode_id, key.objectid,
4601                                         key.offset);
4602                         }
4603                         ret = check_file_extent(root, &key, node, slot,
4604                                                 nodatasum, &extent_size,
4605                                                 &extent_end);
4606                         err |= ret;
4607                         break;
4608                 case BTRFS_XATTR_ITEM_KEY:
4609                         break;
4610                 default:
4611                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4612                               key.objectid, key.type, key.offset);
4613                 }
4614         }
4615
4616 out:
4617         /* verify INODE_ITEM nlink/isize/nbytes */
4618         if (dir) {
4619                 if (nlink != 1) {
4620                         err |= LINK_COUNT_ERROR;
4621                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4622                               root->objectid, inode_id, nlink);
4623                 }
4624
4625                 /*
4626                  * Just a warning, as dir inode nbytes is just an
4627                  * instructive value.
4628                  */
4629                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4630                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4631                                 root->objectid, inode_id, root->nodesize);
4632                 }
4633
4634                 if (isize != size) {
4635                         err |= ISIZE_ERROR;
4636                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4637                               root->objectid, inode_id, isize, size);
4638                 }
4639         } else {
4640                 if (nlink != refs) {
4641                         err |= LINK_COUNT_ERROR;
4642                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4643                               root->objectid, inode_id, nlink, refs);
4644                 } else if (!nlink) {
4645                         err |= ORPHAN_ITEM;
4646                 }
4647
4648                 if (!nbytes && !no_holes && extent_end < isize) {
4649                         err |= NBYTES_ERROR;
4650                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4651                               root->objectid, inode_id, isize);
4652                 }
4653
4654                 if (nbytes != extent_size) {
4655                         err |= NBYTES_ERROR;
4656                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4657                               root->objectid, inode_id, nbytes, extent_size);
4658                 }
4659         }
4660
4661         return err;
4662 }
4663
4664 /*
4665  * Iterate all item on the tree and call check_inode_item() to check.
4666  *
4667  * @root:       the root of the tree to be checked.
4668  * @ext_ref:    the EXTENDED_IREF feature
4669  *
4670  * Return 0 if no error found.
4671  * Return <0 for error.
4672  * All internal error bitmap will be converted to -EIO, to avoid
4673  * mixing negative and postive return value.
4674  */
4675 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4676 {
4677         struct btrfs_path *path;
4678         struct btrfs_key key;
4679         u64 inode_id;
4680         int ret, err = 0;
4681
4682         path = btrfs_alloc_path();
4683         if (!path)
4684                 return -ENOMEM;
4685
4686         key.objectid = 0;
4687         key.type = 0;
4688         key.offset = 0;
4689
4690         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4691         if (ret < 0)
4692                 goto out;
4693
4694         while (1) {
4695                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4696
4697                 /*
4698                  * All check must start with inode item, skip if not
4699                  */
4700                 if (key.type == BTRFS_INODE_ITEM_KEY) {
4701                         ret = check_inode_item(root, path, ext_ref);
4702                         err |= ret;
4703                         if (err & LAST_ITEM)
4704                                 goto out;
4705                         continue;
4706                 }
4707                 error("root %llu ITEM[%llu %u %llu] isn't INODE_ITEM, skip to next inode",
4708                       root->objectid, key.objectid, key.type,
4709                       key.offset);
4710
4711                 err |= NO_INODE_ITEM;
4712                 inode_id = key.objectid;
4713
4714                 /*
4715                  * skip to next inode
4716                  * TODO: Maybe search_slot() will be faster?
4717                  */
4718                 do {
4719                         ret = btrfs_next_item(root, path);
4720                         if (ret > 0) {
4721                                 goto out;
4722                         } else if (ret < 0) {
4723                                 err = ret;
4724                                 goto out;
4725                         }
4726                         btrfs_item_key_to_cpu(path->nodes[0], &key,
4727                                               path->slots[0]);
4728                 } while (inode_id == key.objectid);
4729         }
4730
4731 out:
4732         err &= ~LAST_ITEM;
4733         if (err && !ret)
4734                 ret = -EIO;
4735         btrfs_free_path(path);
4736         return ret;
4737 }
4738
4739 /*
4740  * Find the relative ref for root_ref and root_backref.
4741  *
4742  * @root:       the root of the root tree.
4743  * @ref_key:    the key of the root ref.
4744  *
4745  * Return 0 if no error occurred.
4746  */
4747 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4748                           struct extent_buffer *node, int slot)
4749 {
4750         struct btrfs_path *path;
4751         struct btrfs_key key;
4752         struct btrfs_root_ref *ref;
4753         struct btrfs_root_ref *backref;
4754         char ref_name[BTRFS_NAME_LEN];
4755         char backref_name[BTRFS_NAME_LEN];
4756         u64 ref_dirid;
4757         u64 ref_seq;
4758         u32 ref_namelen;
4759         u64 backref_dirid;
4760         u64 backref_seq;
4761         u32 backref_namelen;
4762         u32 len;
4763         int ret;
4764         int err = 0;
4765
4766         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
4767         ref_dirid = btrfs_root_ref_dirid(node, ref);
4768         ref_seq = btrfs_root_ref_sequence(node, ref);
4769         ref_namelen = btrfs_root_ref_name_len(node, ref);
4770
4771         if (ref_namelen <= BTRFS_NAME_LEN) {
4772                 len = ref_namelen;
4773         } else {
4774                 len = BTRFS_NAME_LEN;
4775                 warning("%s[%llu %llu] ref_name too long",
4776                         ref_key->type == BTRFS_ROOT_REF_KEY ?
4777                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
4778                         ref_key->offset);
4779         }
4780         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
4781
4782         /* Find relative root_ref */
4783         key.objectid = ref_key->offset;
4784         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
4785         key.offset = ref_key->objectid;
4786
4787         path = btrfs_alloc_path();
4788         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4789         if (ret) {
4790                 err |= ROOT_REF_MISSING;
4791                 error("%s[%llu %llu] couldn't find relative ref",
4792                       ref_key->type == BTRFS_ROOT_REF_KEY ?
4793                       "ROOT_REF" : "ROOT_BACKREF",
4794                       ref_key->objectid, ref_key->offset);
4795                 goto out;
4796         }
4797
4798         backref = btrfs_item_ptr(path->nodes[0], path->slots[0],
4799                                  struct btrfs_root_ref);
4800         backref_dirid = btrfs_root_ref_dirid(path->nodes[0], backref);
4801         backref_seq = btrfs_root_ref_sequence(path->nodes[0], backref);
4802         backref_namelen = btrfs_root_ref_name_len(path->nodes[0], backref);
4803
4804         if (backref_namelen <= BTRFS_NAME_LEN) {
4805                 len = backref_namelen;
4806         } else {
4807                 len = BTRFS_NAME_LEN;
4808                 warning("%s[%llu %llu] ref_name too long",
4809                         key.type == BTRFS_ROOT_REF_KEY ?
4810                         "ROOT_REF" : "ROOT_BACKREF",
4811                         key.objectid, key.offset);
4812         }
4813         read_extent_buffer(path->nodes[0], backref_name,
4814                            (unsigned long)(backref + 1), len);
4815
4816         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
4817             ref_namelen != backref_namelen ||
4818             strncmp(ref_name, backref_name, len)) {
4819                 err |= ROOT_REF_MISMATCH;
4820                 error("%s[%llu %llu] mismatch relative ref",
4821                       ref_key->type == BTRFS_ROOT_REF_KEY ?
4822                       "ROOT_REF" : "ROOT_BACKREF",
4823                       ref_key->objectid, ref_key->offset);
4824         }
4825 out:
4826         btrfs_free_path(path);
4827         return err;
4828 }
4829
4830 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4831 {
4832         struct list_head *cur = rec->backrefs.next;
4833         struct extent_backref *back;
4834         struct tree_backref *tback;
4835         struct data_backref *dback;
4836         u64 found = 0;
4837         int err = 0;
4838
4839         while(cur != &rec->backrefs) {
4840                 back = to_extent_backref(cur);
4841                 cur = cur->next;
4842                 if (!back->found_extent_tree) {
4843                         err = 1;
4844                         if (!print_errs)
4845                                 goto out;
4846                         if (back->is_data) {
4847                                 dback = to_data_backref(back);
4848                                 fprintf(stderr, "Backref %llu %s %llu"
4849                                         " owner %llu offset %llu num_refs %lu"
4850                                         " not found in extent tree\n",
4851                                         (unsigned long long)rec->start,
4852                                         back->full_backref ?
4853                                         "parent" : "root",
4854                                         back->full_backref ?
4855                                         (unsigned long long)dback->parent:
4856                                         (unsigned long long)dback->root,
4857                                         (unsigned long long)dback->owner,
4858                                         (unsigned long long)dback->offset,
4859                                         (unsigned long)dback->num_refs);
4860                         } else {
4861                                 tback = to_tree_backref(back);
4862                                 fprintf(stderr, "Backref %llu parent %llu"
4863                                         " root %llu not found in extent tree\n",
4864                                         (unsigned long long)rec->start,
4865                                         (unsigned long long)tback->parent,
4866                                         (unsigned long long)tback->root);
4867                         }
4868                 }
4869                 if (!back->is_data && !back->found_ref) {
4870                         err = 1;
4871                         if (!print_errs)
4872                                 goto out;
4873                         tback = to_tree_backref(back);
4874                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4875                                 (unsigned long long)rec->start,
4876                                 back->full_backref ? "parent" : "root",
4877                                 back->full_backref ?
4878                                 (unsigned long long)tback->parent :
4879                                 (unsigned long long)tback->root, back);
4880                 }
4881                 if (back->is_data) {
4882                         dback = to_data_backref(back);
4883                         if (dback->found_ref != dback->num_refs) {
4884                                 err = 1;
4885                                 if (!print_errs)
4886                                         goto out;
4887                                 fprintf(stderr, "Incorrect local backref count"
4888                                         " on %llu %s %llu owner %llu"
4889                                         " offset %llu found %u wanted %u back %p\n",
4890                                         (unsigned long long)rec->start,
4891                                         back->full_backref ?
4892                                         "parent" : "root",
4893                                         back->full_backref ?
4894                                         (unsigned long long)dback->parent:
4895                                         (unsigned long long)dback->root,
4896                                         (unsigned long long)dback->owner,
4897                                         (unsigned long long)dback->offset,
4898                                         dback->found_ref, dback->num_refs, back);
4899                         }
4900                         if (dback->disk_bytenr != rec->start) {
4901                                 err = 1;
4902                                 if (!print_errs)
4903                                         goto out;
4904                                 fprintf(stderr, "Backref disk bytenr does not"
4905                                         " match extent record, bytenr=%llu, "
4906                                         "ref bytenr=%llu\n",
4907                                         (unsigned long long)rec->start,
4908                                         (unsigned long long)dback->disk_bytenr);
4909                         }
4910
4911                         if (dback->bytes != rec->nr) {
4912                                 err = 1;
4913                                 if (!print_errs)
4914                                         goto out;
4915                                 fprintf(stderr, "Backref bytes do not match "
4916                                         "extent backref, bytenr=%llu, ref "
4917                                         "bytes=%llu, backref bytes=%llu\n",
4918                                         (unsigned long long)rec->start,
4919                                         (unsigned long long)rec->nr,
4920                                         (unsigned long long)dback->bytes);
4921                         }
4922                 }
4923                 if (!back->is_data) {
4924                         found += 1;
4925                 } else {
4926                         dback = to_data_backref(back);
4927                         found += dback->found_ref;
4928                 }
4929         }
4930         if (found != rec->refs) {
4931                 err = 1;
4932                 if (!print_errs)
4933                         goto out;
4934                 fprintf(stderr, "Incorrect global backref count "
4935                         "on %llu found %llu wanted %llu\n",
4936                         (unsigned long long)rec->start,
4937                         (unsigned long long)found,
4938                         (unsigned long long)rec->refs);
4939         }
4940 out:
4941         return err;
4942 }
4943
4944 static int free_all_extent_backrefs(struct extent_record *rec)
4945 {
4946         struct extent_backref *back;
4947         struct list_head *cur;
4948         while (!list_empty(&rec->backrefs)) {
4949                 cur = rec->backrefs.next;
4950                 back = to_extent_backref(cur);
4951                 list_del(cur);
4952                 free(back);
4953         }
4954         return 0;
4955 }
4956
4957 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4958                                      struct cache_tree *extent_cache)
4959 {
4960         struct cache_extent *cache;
4961         struct extent_record *rec;
4962
4963         while (1) {
4964                 cache = first_cache_extent(extent_cache);
4965                 if (!cache)
4966                         break;
4967                 rec = container_of(cache, struct extent_record, cache);
4968                 remove_cache_extent(extent_cache, cache);
4969                 free_all_extent_backrefs(rec);
4970                 free(rec);
4971         }
4972 }
4973
4974 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4975                                  struct extent_record *rec)
4976 {
4977         if (rec->content_checked && rec->owner_ref_checked &&
4978             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4979             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4980             !rec->bad_full_backref && !rec->crossing_stripes &&
4981             !rec->wrong_chunk_type) {
4982                 remove_cache_extent(extent_cache, &rec->cache);
4983                 free_all_extent_backrefs(rec);
4984                 list_del_init(&rec->list);
4985                 free(rec);
4986         }
4987         return 0;
4988 }
4989
4990 static int check_owner_ref(struct btrfs_root *root,
4991                             struct extent_record *rec,
4992                             struct extent_buffer *buf)
4993 {
4994         struct extent_backref *node;
4995         struct tree_backref *back;
4996         struct btrfs_root *ref_root;
4997         struct btrfs_key key;
4998         struct btrfs_path path;
4999         struct extent_buffer *parent;
5000         int level;
5001         int found = 0;
5002         int ret;
5003
5004         list_for_each_entry(node, &rec->backrefs, list) {
5005                 if (node->is_data)
5006                         continue;
5007                 if (!node->found_ref)
5008                         continue;
5009                 if (node->full_backref)
5010                         continue;
5011                 back = to_tree_backref(node);
5012                 if (btrfs_header_owner(buf) == back->root)
5013                         return 0;
5014         }
5015         BUG_ON(rec->is_root);
5016
5017         /* try to find the block by search corresponding fs tree */
5018         key.objectid = btrfs_header_owner(buf);
5019         key.type = BTRFS_ROOT_ITEM_KEY;
5020         key.offset = (u64)-1;
5021
5022         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5023         if (IS_ERR(ref_root))
5024                 return 1;
5025
5026         level = btrfs_header_level(buf);
5027         if (level == 0)
5028                 btrfs_item_key_to_cpu(buf, &key, 0);
5029         else
5030                 btrfs_node_key_to_cpu(buf, &key, 0);
5031
5032         btrfs_init_path(&path);
5033         path.lowest_level = level + 1;
5034         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5035         if (ret < 0)
5036                 return 0;
5037
5038         parent = path.nodes[level + 1];
5039         if (parent && buf->start == btrfs_node_blockptr(parent,
5040                                                         path.slots[level + 1]))
5041                 found = 1;
5042
5043         btrfs_release_path(&path);
5044         return found ? 0 : 1;
5045 }
5046
5047 static int is_extent_tree_record(struct extent_record *rec)
5048 {
5049         struct list_head *cur = rec->backrefs.next;
5050         struct extent_backref *node;
5051         struct tree_backref *back;
5052         int is_extent = 0;
5053
5054         while(cur != &rec->backrefs) {
5055                 node = to_extent_backref(cur);
5056                 cur = cur->next;
5057                 if (node->is_data)
5058                         return 0;
5059                 back = to_tree_backref(node);
5060                 if (node->full_backref)
5061                         return 0;
5062                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5063                         is_extent = 1;
5064         }
5065         return is_extent;
5066 }
5067
5068
5069 static int record_bad_block_io(struct btrfs_fs_info *info,
5070                                struct cache_tree *extent_cache,
5071                                u64 start, u64 len)
5072 {
5073         struct extent_record *rec;
5074         struct cache_extent *cache;
5075         struct btrfs_key key;
5076
5077         cache = lookup_cache_extent(extent_cache, start, len);
5078         if (!cache)
5079                 return 0;
5080
5081         rec = container_of(cache, struct extent_record, cache);
5082         if (!is_extent_tree_record(rec))
5083                 return 0;
5084
5085         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5086         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5087 }
5088
5089 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5090                        struct extent_buffer *buf, int slot)
5091 {
5092         if (btrfs_header_level(buf)) {
5093                 struct btrfs_key_ptr ptr1, ptr2;
5094
5095                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5096                                    sizeof(struct btrfs_key_ptr));
5097                 read_extent_buffer(buf, &ptr2,
5098                                    btrfs_node_key_ptr_offset(slot + 1),
5099                                    sizeof(struct btrfs_key_ptr));
5100                 write_extent_buffer(buf, &ptr1,
5101                                     btrfs_node_key_ptr_offset(slot + 1),
5102                                     sizeof(struct btrfs_key_ptr));
5103                 write_extent_buffer(buf, &ptr2,
5104                                     btrfs_node_key_ptr_offset(slot),
5105                                     sizeof(struct btrfs_key_ptr));
5106                 if (slot == 0) {
5107                         struct btrfs_disk_key key;
5108                         btrfs_node_key(buf, &key, 0);
5109                         btrfs_fixup_low_keys(root, path, &key,
5110                                              btrfs_header_level(buf) + 1);
5111                 }
5112         } else {
5113                 struct btrfs_item *item1, *item2;
5114                 struct btrfs_key k1, k2;
5115                 char *item1_data, *item2_data;
5116                 u32 item1_offset, item2_offset, item1_size, item2_size;
5117
5118                 item1 = btrfs_item_nr(slot);
5119                 item2 = btrfs_item_nr(slot + 1);
5120                 btrfs_item_key_to_cpu(buf, &k1, slot);
5121                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5122                 item1_offset = btrfs_item_offset(buf, item1);
5123                 item2_offset = btrfs_item_offset(buf, item2);
5124                 item1_size = btrfs_item_size(buf, item1);
5125                 item2_size = btrfs_item_size(buf, item2);
5126
5127                 item1_data = malloc(item1_size);
5128                 if (!item1_data)
5129                         return -ENOMEM;
5130                 item2_data = malloc(item2_size);
5131                 if (!item2_data) {
5132                         free(item1_data);
5133                         return -ENOMEM;
5134                 }
5135
5136                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5137                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5138
5139                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5140                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5141                 free(item1_data);
5142                 free(item2_data);
5143
5144                 btrfs_set_item_offset(buf, item1, item2_offset);
5145                 btrfs_set_item_offset(buf, item2, item1_offset);
5146                 btrfs_set_item_size(buf, item1, item2_size);
5147                 btrfs_set_item_size(buf, item2, item1_size);
5148
5149                 path->slots[0] = slot;
5150                 btrfs_set_item_key_unsafe(root, path, &k2);
5151                 path->slots[0] = slot + 1;
5152                 btrfs_set_item_key_unsafe(root, path, &k1);
5153         }
5154         return 0;
5155 }
5156
5157 static int fix_key_order(struct btrfs_trans_handle *trans,
5158                          struct btrfs_root *root,
5159                          struct btrfs_path *path)
5160 {
5161         struct extent_buffer *buf;
5162         struct btrfs_key k1, k2;
5163         int i;
5164         int level = path->lowest_level;
5165         int ret = -EIO;
5166
5167         buf = path->nodes[level];
5168         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5169                 if (level) {
5170                         btrfs_node_key_to_cpu(buf, &k1, i);
5171                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5172                 } else {
5173                         btrfs_item_key_to_cpu(buf, &k1, i);
5174                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5175                 }
5176                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5177                         continue;
5178                 ret = swap_values(root, path, buf, i);
5179                 if (ret)
5180                         break;
5181                 btrfs_mark_buffer_dirty(buf);
5182                 i = 0;
5183         }
5184         return ret;
5185 }
5186
5187 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5188                              struct btrfs_root *root,
5189                              struct btrfs_path *path,
5190                              struct extent_buffer *buf, int slot)
5191 {
5192         struct btrfs_key key;
5193         int nritems = btrfs_header_nritems(buf);
5194
5195         btrfs_item_key_to_cpu(buf, &key, slot);
5196
5197         /* These are all the keys we can deal with missing. */
5198         if (key.type != BTRFS_DIR_INDEX_KEY &&
5199             key.type != BTRFS_EXTENT_ITEM_KEY &&
5200             key.type != BTRFS_METADATA_ITEM_KEY &&
5201             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5202             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5203                 return -1;
5204
5205         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5206                (unsigned long long)key.objectid, key.type,
5207                (unsigned long long)key.offset, slot, buf->start);
5208         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5209                               btrfs_item_nr_offset(slot + 1),
5210                               sizeof(struct btrfs_item) *
5211                               (nritems - slot - 1));
5212         btrfs_set_header_nritems(buf, nritems - 1);
5213         if (slot == 0) {
5214                 struct btrfs_disk_key disk_key;
5215
5216                 btrfs_item_key(buf, &disk_key, 0);
5217                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5218         }
5219         btrfs_mark_buffer_dirty(buf);
5220         return 0;
5221 }
5222
5223 static int fix_item_offset(struct btrfs_trans_handle *trans,
5224                            struct btrfs_root *root,
5225                            struct btrfs_path *path)
5226 {
5227         struct extent_buffer *buf;
5228         int i;
5229         int ret = 0;
5230
5231         /* We should only get this for leaves */
5232         BUG_ON(path->lowest_level);
5233         buf = path->nodes[0];
5234 again:
5235         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5236                 unsigned int shift = 0, offset;
5237
5238                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5239                     BTRFS_LEAF_DATA_SIZE(root)) {
5240                         if (btrfs_item_end_nr(buf, i) >
5241                             BTRFS_LEAF_DATA_SIZE(root)) {
5242                                 ret = delete_bogus_item(trans, root, path,
5243                                                         buf, i);
5244                                 if (!ret)
5245                                         goto again;
5246                                 fprintf(stderr, "item is off the end of the "
5247                                         "leaf, can't fix\n");
5248                                 ret = -EIO;
5249                                 break;
5250                         }
5251                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5252                                 btrfs_item_end_nr(buf, i);
5253                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5254                            btrfs_item_offset_nr(buf, i - 1)) {
5255                         if (btrfs_item_end_nr(buf, i) >
5256                             btrfs_item_offset_nr(buf, i - 1)) {
5257                                 ret = delete_bogus_item(trans, root, path,
5258                                                         buf, i);
5259                                 if (!ret)
5260                                         goto again;
5261                                 fprintf(stderr, "items overlap, can't fix\n");
5262                                 ret = -EIO;
5263                                 break;
5264                         }
5265                         shift = btrfs_item_offset_nr(buf, i - 1) -
5266                                 btrfs_item_end_nr(buf, i);
5267                 }
5268                 if (!shift)
5269                         continue;
5270
5271                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5272                        i, shift, (unsigned long long)buf->start);
5273                 offset = btrfs_item_offset_nr(buf, i);
5274                 memmove_extent_buffer(buf,
5275                                       btrfs_leaf_data(buf) + offset + shift,
5276                                       btrfs_leaf_data(buf) + offset,
5277                                       btrfs_item_size_nr(buf, i));
5278                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5279                                       offset + shift);
5280                 btrfs_mark_buffer_dirty(buf);
5281         }
5282
5283         /*
5284          * We may have moved things, in which case we want to exit so we don't
5285          * write those changes out.  Once we have proper abort functionality in
5286          * progs this can be changed to something nicer.
5287          */
5288         BUG_ON(ret);
5289         return ret;
5290 }
5291
5292 /*
5293  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5294  * then just return -EIO.
5295  */
5296 static int try_to_fix_bad_block(struct btrfs_root *root,
5297                                 struct extent_buffer *buf,
5298                                 enum btrfs_tree_block_status status)
5299 {
5300         struct btrfs_trans_handle *trans;
5301         struct ulist *roots;
5302         struct ulist_node *node;
5303         struct btrfs_root *search_root;
5304         struct btrfs_path path;
5305         struct ulist_iterator iter;
5306         struct btrfs_key root_key, key;
5307         int ret;
5308
5309         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5310             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5311                 return -EIO;
5312
5313         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5314         if (ret)
5315                 return -EIO;
5316
5317         btrfs_init_path(&path);
5318         ULIST_ITER_INIT(&iter);
5319         while ((node = ulist_next(roots, &iter))) {
5320                 root_key.objectid = node->val;
5321                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5322                 root_key.offset = (u64)-1;
5323
5324                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5325                 if (IS_ERR(root)) {
5326                         ret = -EIO;
5327                         break;
5328                 }
5329
5330
5331                 trans = btrfs_start_transaction(search_root, 0);
5332                 if (IS_ERR(trans)) {
5333                         ret = PTR_ERR(trans);
5334                         break;
5335                 }
5336
5337                 path.lowest_level = btrfs_header_level(buf);
5338                 path.skip_check_block = 1;
5339                 if (path.lowest_level)
5340                         btrfs_node_key_to_cpu(buf, &key, 0);
5341                 else
5342                         btrfs_item_key_to_cpu(buf, &key, 0);
5343                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5344                 if (ret) {
5345                         ret = -EIO;
5346                         btrfs_commit_transaction(trans, search_root);
5347                         break;
5348                 }
5349                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5350                         ret = fix_key_order(trans, search_root, &path);
5351                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5352                         ret = fix_item_offset(trans, search_root, &path);
5353                 if (ret) {
5354                         btrfs_commit_transaction(trans, search_root);
5355                         break;
5356                 }
5357                 btrfs_release_path(&path);
5358                 btrfs_commit_transaction(trans, search_root);
5359         }
5360         ulist_free(roots);
5361         btrfs_release_path(&path);
5362         return ret;
5363 }
5364
5365 static int check_block(struct btrfs_root *root,
5366                        struct cache_tree *extent_cache,
5367                        struct extent_buffer *buf, u64 flags)
5368 {
5369         struct extent_record *rec;
5370         struct cache_extent *cache;
5371         struct btrfs_key key;
5372         enum btrfs_tree_block_status status;
5373         int ret = 0;
5374         int level;
5375
5376         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5377         if (!cache)
5378                 return 1;
5379         rec = container_of(cache, struct extent_record, cache);
5380         rec->generation = btrfs_header_generation(buf);
5381
5382         level = btrfs_header_level(buf);
5383         if (btrfs_header_nritems(buf) > 0) {
5384
5385                 if (level == 0)
5386                         btrfs_item_key_to_cpu(buf, &key, 0);
5387                 else
5388                         btrfs_node_key_to_cpu(buf, &key, 0);
5389
5390                 rec->info_objectid = key.objectid;
5391         }
5392         rec->info_level = level;
5393
5394         if (btrfs_is_leaf(buf))
5395                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5396         else
5397                 status = btrfs_check_node(root, &rec->parent_key, buf);
5398
5399         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5400                 if (repair)
5401                         status = try_to_fix_bad_block(root, buf, status);
5402                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5403                         ret = -EIO;
5404                         fprintf(stderr, "bad block %llu\n",
5405                                 (unsigned long long)buf->start);
5406                 } else {
5407                         /*
5408                          * Signal to callers we need to start the scan over
5409                          * again since we'll have cowed blocks.
5410                          */
5411                         ret = -EAGAIN;
5412                 }
5413         } else {
5414                 rec->content_checked = 1;
5415                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5416                         rec->owner_ref_checked = 1;
5417                 else {
5418                         ret = check_owner_ref(root, rec, buf);
5419                         if (!ret)
5420                                 rec->owner_ref_checked = 1;
5421                 }
5422         }
5423         if (!ret)
5424                 maybe_free_extent_rec(extent_cache, rec);
5425         return ret;
5426 }
5427
5428 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5429                                                 u64 parent, u64 root)
5430 {
5431         struct list_head *cur = rec->backrefs.next;
5432         struct extent_backref *node;
5433         struct tree_backref *back;
5434
5435         while(cur != &rec->backrefs) {
5436                 node = to_extent_backref(cur);
5437                 cur = cur->next;
5438                 if (node->is_data)
5439                         continue;
5440                 back = to_tree_backref(node);
5441                 if (parent > 0) {
5442                         if (!node->full_backref)
5443                                 continue;
5444                         if (parent == back->parent)
5445                                 return back;
5446                 } else {
5447                         if (node->full_backref)
5448                                 continue;
5449                         if (back->root == root)
5450                                 return back;
5451                 }
5452         }
5453         return NULL;
5454 }
5455
5456 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5457                                                 u64 parent, u64 root)
5458 {
5459         struct tree_backref *ref = malloc(sizeof(*ref));
5460
5461         if (!ref)
5462                 return NULL;
5463         memset(&ref->node, 0, sizeof(ref->node));
5464         if (parent > 0) {
5465                 ref->parent = parent;
5466                 ref->node.full_backref = 1;
5467         } else {
5468                 ref->root = root;
5469                 ref->node.full_backref = 0;
5470         }
5471         list_add_tail(&ref->node.list, &rec->backrefs);
5472
5473         return ref;
5474 }
5475
5476 static struct data_backref *find_data_backref(struct extent_record *rec,
5477                                                 u64 parent, u64 root,
5478                                                 u64 owner, u64 offset,
5479                                                 int found_ref,
5480                                                 u64 disk_bytenr, u64 bytes)
5481 {
5482         struct list_head *cur = rec->backrefs.next;
5483         struct extent_backref *node;
5484         struct data_backref *back;
5485
5486         while(cur != &rec->backrefs) {
5487                 node = to_extent_backref(cur);
5488                 cur = cur->next;
5489                 if (!node->is_data)
5490                         continue;
5491                 back = to_data_backref(node);
5492                 if (parent > 0) {
5493                         if (!node->full_backref)
5494                                 continue;
5495                         if (parent == back->parent)
5496                                 return back;
5497                 } else {
5498                         if (node->full_backref)
5499                                 continue;
5500                         if (back->root == root && back->owner == owner &&
5501                             back->offset == offset) {
5502                                 if (found_ref && node->found_ref &&
5503                                     (back->bytes != bytes ||
5504                                     back->disk_bytenr != disk_bytenr))
5505                                         continue;
5506                                 return back;
5507                         }
5508                 }
5509         }
5510         return NULL;
5511 }
5512
5513 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5514                                                 u64 parent, u64 root,
5515                                                 u64 owner, u64 offset,
5516                                                 u64 max_size)
5517 {
5518         struct data_backref *ref = malloc(sizeof(*ref));
5519
5520         if (!ref)
5521                 return NULL;
5522         memset(&ref->node, 0, sizeof(ref->node));
5523         ref->node.is_data = 1;
5524
5525         if (parent > 0) {
5526                 ref->parent = parent;
5527                 ref->owner = 0;
5528                 ref->offset = 0;
5529                 ref->node.full_backref = 1;
5530         } else {
5531                 ref->root = root;
5532                 ref->owner = owner;
5533                 ref->offset = offset;
5534                 ref->node.full_backref = 0;
5535         }
5536         ref->bytes = max_size;
5537         ref->found_ref = 0;
5538         ref->num_refs = 0;
5539         list_add_tail(&ref->node.list, &rec->backrefs);
5540         if (max_size > rec->max_size)
5541                 rec->max_size = max_size;
5542         return ref;
5543 }
5544
5545 /* Check if the type of extent matches with its chunk */
5546 static void check_extent_type(struct extent_record *rec)
5547 {
5548         struct btrfs_block_group_cache *bg_cache;
5549
5550         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5551         if (!bg_cache)
5552                 return;
5553
5554         /* data extent, check chunk directly*/
5555         if (!rec->metadata) {
5556                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5557                         rec->wrong_chunk_type = 1;
5558                 return;
5559         }
5560
5561         /* metadata extent, check the obvious case first */
5562         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5563                                  BTRFS_BLOCK_GROUP_METADATA))) {
5564                 rec->wrong_chunk_type = 1;
5565                 return;
5566         }
5567
5568         /*
5569          * Check SYSTEM extent, as it's also marked as metadata, we can only
5570          * make sure it's a SYSTEM extent by its backref
5571          */
5572         if (!list_empty(&rec->backrefs)) {
5573                 struct extent_backref *node;
5574                 struct tree_backref *tback;
5575                 u64 bg_type;
5576
5577                 node = to_extent_backref(rec->backrefs.next);
5578                 if (node->is_data) {
5579                         /* tree block shouldn't have data backref */
5580                         rec->wrong_chunk_type = 1;
5581                         return;
5582                 }
5583                 tback = container_of(node, struct tree_backref, node);
5584
5585                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5586                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5587                 else
5588                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5589                 if (!(bg_cache->flags & bg_type))
5590                         rec->wrong_chunk_type = 1;
5591         }
5592 }
5593
5594 /*
5595  * Allocate a new extent record, fill default values from @tmpl and insert int
5596  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5597  * the cache, otherwise it fails.
5598  */
5599 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5600                 struct extent_record *tmpl)
5601 {
5602         struct extent_record *rec;
5603         int ret = 0;
5604
5605         rec = malloc(sizeof(*rec));
5606         if (!rec)
5607                 return -ENOMEM;
5608         rec->start = tmpl->start;
5609         rec->max_size = tmpl->max_size;
5610         rec->nr = max(tmpl->nr, tmpl->max_size);
5611         rec->found_rec = tmpl->found_rec;
5612         rec->content_checked = tmpl->content_checked;
5613         rec->owner_ref_checked = tmpl->owner_ref_checked;
5614         rec->num_duplicates = 0;
5615         rec->metadata = tmpl->metadata;
5616         rec->flag_block_full_backref = FLAG_UNSET;
5617         rec->bad_full_backref = 0;
5618         rec->crossing_stripes = 0;
5619         rec->wrong_chunk_type = 0;
5620         rec->is_root = tmpl->is_root;
5621         rec->refs = tmpl->refs;
5622         rec->extent_item_refs = tmpl->extent_item_refs;
5623         rec->parent_generation = tmpl->parent_generation;
5624         INIT_LIST_HEAD(&rec->backrefs);
5625         INIT_LIST_HEAD(&rec->dups);
5626         INIT_LIST_HEAD(&rec->list);
5627         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5628         rec->cache.start = tmpl->start;
5629         rec->cache.size = tmpl->nr;
5630         ret = insert_cache_extent(extent_cache, &rec->cache);
5631         if (ret) {
5632                 free(rec);
5633                 return ret;
5634         }
5635         bytes_used += rec->nr;
5636
5637         if (tmpl->metadata)
5638                 rec->crossing_stripes = check_crossing_stripes(global_info,
5639                                 rec->start, global_info->tree_root->nodesize);
5640         check_extent_type(rec);
5641         return ret;
5642 }
5643
5644 /*
5645  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5646  * some are hints:
5647  * - refs              - if found, increase refs
5648  * - is_root           - if found, set
5649  * - content_checked   - if found, set
5650  * - owner_ref_checked - if found, set
5651  *
5652  * If not found, create a new one, initialize and insert.
5653  */
5654 static int add_extent_rec(struct cache_tree *extent_cache,
5655                 struct extent_record *tmpl)
5656 {
5657         struct extent_record *rec;
5658         struct cache_extent *cache;
5659         int ret = 0;
5660         int dup = 0;
5661
5662         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5663         if (cache) {
5664                 rec = container_of(cache, struct extent_record, cache);
5665                 if (tmpl->refs)
5666                         rec->refs++;
5667                 if (rec->nr == 1)
5668                         rec->nr = max(tmpl->nr, tmpl->max_size);
5669
5670                 /*
5671                  * We need to make sure to reset nr to whatever the extent
5672                  * record says was the real size, this way we can compare it to
5673                  * the backrefs.
5674                  */
5675                 if (tmpl->found_rec) {
5676                         if (tmpl->start != rec->start || rec->found_rec) {
5677                                 struct extent_record *tmp;
5678
5679                                 dup = 1;
5680                                 if (list_empty(&rec->list))
5681                                         list_add_tail(&rec->list,
5682                                                       &duplicate_extents);
5683
5684                                 /*
5685                                  * We have to do this song and dance in case we
5686                                  * find an extent record that falls inside of
5687                                  * our current extent record but does not have
5688                                  * the same objectid.
5689                                  */
5690                                 tmp = malloc(sizeof(*tmp));
5691                                 if (!tmp)
5692                                         return -ENOMEM;
5693                                 tmp->start = tmpl->start;
5694                                 tmp->max_size = tmpl->max_size;
5695                                 tmp->nr = tmpl->nr;
5696                                 tmp->found_rec = 1;
5697                                 tmp->metadata = tmpl->metadata;
5698                                 tmp->extent_item_refs = tmpl->extent_item_refs;
5699                                 INIT_LIST_HEAD(&tmp->list);
5700                                 list_add_tail(&tmp->list, &rec->dups);
5701                                 rec->num_duplicates++;
5702                         } else {
5703                                 rec->nr = tmpl->nr;
5704                                 rec->found_rec = 1;
5705                         }
5706                 }
5707
5708                 if (tmpl->extent_item_refs && !dup) {
5709                         if (rec->extent_item_refs) {
5710                                 fprintf(stderr, "block %llu rec "
5711                                         "extent_item_refs %llu, passed %llu\n",
5712                                         (unsigned long long)tmpl->start,
5713                                         (unsigned long long)
5714                                                         rec->extent_item_refs,
5715                                         (unsigned long long)tmpl->extent_item_refs);
5716                         }
5717                         rec->extent_item_refs = tmpl->extent_item_refs;
5718                 }
5719                 if (tmpl->is_root)
5720                         rec->is_root = 1;
5721                 if (tmpl->content_checked)
5722                         rec->content_checked = 1;
5723                 if (tmpl->owner_ref_checked)
5724                         rec->owner_ref_checked = 1;
5725                 memcpy(&rec->parent_key, &tmpl->parent_key,
5726                                 sizeof(tmpl->parent_key));
5727                 if (tmpl->parent_generation)
5728                         rec->parent_generation = tmpl->parent_generation;
5729                 if (rec->max_size < tmpl->max_size)
5730                         rec->max_size = tmpl->max_size;
5731
5732                 /*
5733                  * A metadata extent can't cross stripe_len boundary, otherwise
5734                  * kernel scrub won't be able to handle it.
5735                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5736                  * it.
5737                  */
5738                 if (tmpl->metadata)
5739                         rec->crossing_stripes = check_crossing_stripes(
5740                                         global_info, rec->start,
5741                                         global_info->tree_root->nodesize);
5742                 check_extent_type(rec);
5743                 maybe_free_extent_rec(extent_cache, rec);
5744                 return ret;
5745         }
5746
5747         ret = add_extent_rec_nolookup(extent_cache, tmpl);
5748
5749         return ret;
5750 }
5751
5752 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5753                             u64 parent, u64 root, int found_ref)
5754 {
5755         struct extent_record *rec;
5756         struct tree_backref *back;
5757         struct cache_extent *cache;
5758         int ret;
5759
5760         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5761         if (!cache) {
5762                 struct extent_record tmpl;
5763
5764                 memset(&tmpl, 0, sizeof(tmpl));
5765                 tmpl.start = bytenr;
5766                 tmpl.nr = 1;
5767                 tmpl.metadata = 1;
5768
5769                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5770                 if (ret)
5771                         return ret;
5772
5773                 /* really a bug in cache_extent implement now */
5774                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5775                 if (!cache)
5776                         return -ENOENT;
5777         }
5778
5779         rec = container_of(cache, struct extent_record, cache);
5780         if (rec->start != bytenr) {
5781                 /*
5782                  * Several cause, from unaligned bytenr to over lapping extents
5783                  */
5784                 return -EEXIST;
5785         }
5786
5787         back = find_tree_backref(rec, parent, root);
5788         if (!back) {
5789                 back = alloc_tree_backref(rec, parent, root);
5790                 if (!back)
5791                         return -ENOMEM;
5792         }
5793
5794         if (found_ref) {
5795                 if (back->node.found_ref) {
5796                         fprintf(stderr, "Extent back ref already exists "
5797                                 "for %llu parent %llu root %llu \n",
5798                                 (unsigned long long)bytenr,
5799                                 (unsigned long long)parent,
5800                                 (unsigned long long)root);
5801                 }
5802                 back->node.found_ref = 1;
5803         } else {
5804                 if (back->node.found_extent_tree) {
5805                         fprintf(stderr, "Extent back ref already exists "
5806                                 "for %llu parent %llu root %llu \n",
5807                                 (unsigned long long)bytenr,
5808                                 (unsigned long long)parent,
5809                                 (unsigned long long)root);
5810                 }
5811                 back->node.found_extent_tree = 1;
5812         }
5813         check_extent_type(rec);
5814         maybe_free_extent_rec(extent_cache, rec);
5815         return 0;
5816 }
5817
5818 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5819                             u64 parent, u64 root, u64 owner, u64 offset,
5820                             u32 num_refs, int found_ref, u64 max_size)
5821 {
5822         struct extent_record *rec;
5823         struct data_backref *back;
5824         struct cache_extent *cache;
5825         int ret;
5826
5827         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5828         if (!cache) {
5829                 struct extent_record tmpl;
5830
5831                 memset(&tmpl, 0, sizeof(tmpl));
5832                 tmpl.start = bytenr;
5833                 tmpl.nr = 1;
5834                 tmpl.max_size = max_size;
5835
5836                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5837                 if (ret)
5838                         return ret;
5839
5840                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5841                 if (!cache)
5842                         abort();
5843         }
5844
5845         rec = container_of(cache, struct extent_record, cache);
5846         if (rec->max_size < max_size)
5847                 rec->max_size = max_size;
5848
5849         /*
5850          * If found_ref is set then max_size is the real size and must match the
5851          * existing refs.  So if we have already found a ref then we need to
5852          * make sure that this ref matches the existing one, otherwise we need
5853          * to add a new backref so we can notice that the backrefs don't match
5854          * and we need to figure out who is telling the truth.  This is to
5855          * account for that awful fsync bug I introduced where we'd end up with
5856          * a btrfs_file_extent_item that would have its length include multiple
5857          * prealloc extents or point inside of a prealloc extent.
5858          */
5859         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5860                                  bytenr, max_size);
5861         if (!back) {
5862                 back = alloc_data_backref(rec, parent, root, owner, offset,
5863                                           max_size);
5864                 BUG_ON(!back);
5865         }
5866
5867         if (found_ref) {
5868                 BUG_ON(num_refs != 1);
5869                 if (back->node.found_ref)
5870                         BUG_ON(back->bytes != max_size);
5871                 back->node.found_ref = 1;
5872                 back->found_ref += 1;
5873                 back->bytes = max_size;
5874                 back->disk_bytenr = bytenr;
5875                 rec->refs += 1;
5876                 rec->content_checked = 1;
5877                 rec->owner_ref_checked = 1;
5878         } else {
5879                 if (back->node.found_extent_tree) {
5880                         fprintf(stderr, "Extent back ref already exists "
5881                                 "for %llu parent %llu root %llu "
5882                                 "owner %llu offset %llu num_refs %lu\n",
5883                                 (unsigned long long)bytenr,
5884                                 (unsigned long long)parent,
5885                                 (unsigned long long)root,
5886                                 (unsigned long long)owner,
5887                                 (unsigned long long)offset,
5888                                 (unsigned long)num_refs);
5889                 }
5890                 back->num_refs = num_refs;
5891                 back->node.found_extent_tree = 1;
5892         }
5893         maybe_free_extent_rec(extent_cache, rec);
5894         return 0;
5895 }
5896
5897 static int add_pending(struct cache_tree *pending,
5898                        struct cache_tree *seen, u64 bytenr, u32 size)
5899 {
5900         int ret;
5901         ret = add_cache_extent(seen, bytenr, size);
5902         if (ret)
5903                 return ret;
5904         add_cache_extent(pending, bytenr, size);
5905         return 0;
5906 }
5907
5908 static int pick_next_pending(struct cache_tree *pending,
5909                         struct cache_tree *reada,
5910                         struct cache_tree *nodes,
5911                         u64 last, struct block_info *bits, int bits_nr,
5912                         int *reada_bits)
5913 {
5914         unsigned long node_start = last;
5915         struct cache_extent *cache;
5916         int ret;
5917
5918         cache = search_cache_extent(reada, 0);
5919         if (cache) {
5920                 bits[0].start = cache->start;
5921                 bits[0].size = cache->size;
5922                 *reada_bits = 1;
5923                 return 1;
5924         }
5925         *reada_bits = 0;
5926         if (node_start > 32768)
5927                 node_start -= 32768;
5928
5929         cache = search_cache_extent(nodes, node_start);
5930         if (!cache)
5931                 cache = search_cache_extent(nodes, 0);
5932
5933         if (!cache) {
5934                  cache = search_cache_extent(pending, 0);
5935                  if (!cache)
5936                          return 0;
5937                  ret = 0;
5938                  do {
5939                          bits[ret].start = cache->start;
5940                          bits[ret].size = cache->size;
5941                          cache = next_cache_extent(cache);
5942                          ret++;
5943                  } while (cache && ret < bits_nr);
5944                  return ret;
5945         }
5946
5947         ret = 0;
5948         do {
5949                 bits[ret].start = cache->start;
5950                 bits[ret].size = cache->size;
5951                 cache = next_cache_extent(cache);
5952                 ret++;
5953         } while (cache && ret < bits_nr);
5954
5955         if (bits_nr - ret > 8) {
5956                 u64 lookup = bits[0].start + bits[0].size;
5957                 struct cache_extent *next;
5958                 next = search_cache_extent(pending, lookup);
5959                 while(next) {
5960                         if (next->start - lookup > 32768)
5961                                 break;
5962                         bits[ret].start = next->start;
5963                         bits[ret].size = next->size;
5964                         lookup = next->start + next->size;
5965                         ret++;
5966                         if (ret == bits_nr)
5967                                 break;
5968                         next = next_cache_extent(next);
5969                         if (!next)
5970                                 break;
5971                 }
5972         }
5973         return ret;
5974 }
5975
5976 static void free_chunk_record(struct cache_extent *cache)
5977 {
5978         struct chunk_record *rec;
5979
5980         rec = container_of(cache, struct chunk_record, cache);
5981         list_del_init(&rec->list);
5982         list_del_init(&rec->dextents);
5983         free(rec);
5984 }
5985
5986 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5987 {
5988         cache_tree_free_extents(chunk_cache, free_chunk_record);
5989 }
5990
5991 static void free_device_record(struct rb_node *node)
5992 {
5993         struct device_record *rec;
5994
5995         rec = container_of(node, struct device_record, node);
5996         free(rec);
5997 }
5998
5999 FREE_RB_BASED_TREE(device_cache, free_device_record);
6000
6001 int insert_block_group_record(struct block_group_tree *tree,
6002                               struct block_group_record *bg_rec)
6003 {
6004         int ret;
6005
6006         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6007         if (ret)
6008                 return ret;
6009
6010         list_add_tail(&bg_rec->list, &tree->block_groups);
6011         return 0;
6012 }
6013
6014 static void free_block_group_record(struct cache_extent *cache)
6015 {
6016         struct block_group_record *rec;
6017
6018         rec = container_of(cache, struct block_group_record, cache);
6019         list_del_init(&rec->list);
6020         free(rec);
6021 }
6022
6023 void free_block_group_tree(struct block_group_tree *tree)
6024 {
6025         cache_tree_free_extents(&tree->tree, free_block_group_record);
6026 }
6027
6028 int insert_device_extent_record(struct device_extent_tree *tree,
6029                                 struct device_extent_record *de_rec)
6030 {
6031         int ret;
6032
6033         /*
6034          * Device extent is a bit different from the other extents, because
6035          * the extents which belong to the different devices may have the
6036          * same start and size, so we need use the special extent cache
6037          * search/insert functions.
6038          */
6039         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6040         if (ret)
6041                 return ret;
6042
6043         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6044         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6045         return 0;
6046 }
6047
6048 static void free_device_extent_record(struct cache_extent *cache)
6049 {
6050         struct device_extent_record *rec;
6051
6052         rec = container_of(cache, struct device_extent_record, cache);
6053         if (!list_empty(&rec->chunk_list))
6054                 list_del_init(&rec->chunk_list);
6055         if (!list_empty(&rec->device_list))
6056                 list_del_init(&rec->device_list);
6057         free(rec);
6058 }
6059
6060 void free_device_extent_tree(struct device_extent_tree *tree)
6061 {
6062         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6063 }
6064
6065 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6066 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6067                                  struct extent_buffer *leaf, int slot)
6068 {
6069         struct btrfs_extent_ref_v0 *ref0;
6070         struct btrfs_key key;
6071         int ret;
6072
6073         btrfs_item_key_to_cpu(leaf, &key, slot);
6074         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6075         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6076                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6077                                 0, 0);
6078         } else {
6079                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6080                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6081         }
6082         return ret;
6083 }
6084 #endif
6085
6086 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6087                                             struct btrfs_key *key,
6088                                             int slot)
6089 {
6090         struct btrfs_chunk *ptr;
6091         struct chunk_record *rec;
6092         int num_stripes, i;
6093
6094         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6095         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6096
6097         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6098         if (!rec) {
6099                 fprintf(stderr, "memory allocation failed\n");
6100                 exit(-1);
6101         }
6102
6103         INIT_LIST_HEAD(&rec->list);
6104         INIT_LIST_HEAD(&rec->dextents);
6105         rec->bg_rec = NULL;
6106
6107         rec->cache.start = key->offset;
6108         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6109
6110         rec->generation = btrfs_header_generation(leaf);
6111
6112         rec->objectid = key->objectid;
6113         rec->type = key->type;
6114         rec->offset = key->offset;
6115
6116         rec->length = rec->cache.size;
6117         rec->owner = btrfs_chunk_owner(leaf, ptr);
6118         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6119         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6120         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6121         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6122         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6123         rec->num_stripes = num_stripes;
6124         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6125
6126         for (i = 0; i < rec->num_stripes; ++i) {
6127                 rec->stripes[i].devid =
6128                         btrfs_stripe_devid_nr(leaf, ptr, i);
6129                 rec->stripes[i].offset =
6130                         btrfs_stripe_offset_nr(leaf, ptr, i);
6131                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6132                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6133                                 BTRFS_UUID_SIZE);
6134         }
6135
6136         return rec;
6137 }
6138
6139 static int process_chunk_item(struct cache_tree *chunk_cache,
6140                               struct btrfs_key *key, struct extent_buffer *eb,
6141                               int slot)
6142 {
6143         struct chunk_record *rec;
6144         struct btrfs_chunk *chunk;
6145         int ret = 0;
6146
6147         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6148         /*
6149          * Do extra check for this chunk item,
6150          *
6151          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6152          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6153          * and owner<->key_type check.
6154          */
6155         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6156                                       key->offset);
6157         if (ret < 0) {
6158                 error("chunk(%llu, %llu) is not valid, ignore it",
6159                       key->offset, btrfs_chunk_length(eb, chunk));
6160                 return 0;
6161         }
6162         rec = btrfs_new_chunk_record(eb, key, slot);
6163         ret = insert_cache_extent(chunk_cache, &rec->cache);
6164         if (ret) {
6165                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6166                         rec->offset, rec->length);
6167                 free(rec);
6168         }
6169
6170         return ret;
6171 }
6172
6173 static int process_device_item(struct rb_root *dev_cache,
6174                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6175 {
6176         struct btrfs_dev_item *ptr;
6177         struct device_record *rec;
6178         int ret = 0;
6179
6180         ptr = btrfs_item_ptr(eb,
6181                 slot, struct btrfs_dev_item);
6182
6183         rec = malloc(sizeof(*rec));
6184         if (!rec) {
6185                 fprintf(stderr, "memory allocation failed\n");
6186                 return -ENOMEM;
6187         }
6188
6189         rec->devid = key->offset;
6190         rec->generation = btrfs_header_generation(eb);
6191
6192         rec->objectid = key->objectid;
6193         rec->type = key->type;
6194         rec->offset = key->offset;
6195
6196         rec->devid = btrfs_device_id(eb, ptr);
6197         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6198         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6199
6200         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6201         if (ret) {
6202                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6203                 free(rec);
6204         }
6205
6206         return ret;
6207 }
6208
6209 struct block_group_record *
6210 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6211                              int slot)
6212 {
6213         struct btrfs_block_group_item *ptr;
6214         struct block_group_record *rec;
6215
6216         rec = calloc(1, sizeof(*rec));
6217         if (!rec) {
6218                 fprintf(stderr, "memory allocation failed\n");
6219                 exit(-1);
6220         }
6221
6222         rec->cache.start = key->objectid;
6223         rec->cache.size = key->offset;
6224
6225         rec->generation = btrfs_header_generation(leaf);
6226
6227         rec->objectid = key->objectid;
6228         rec->type = key->type;
6229         rec->offset = key->offset;
6230
6231         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6232         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6233
6234         INIT_LIST_HEAD(&rec->list);
6235
6236         return rec;
6237 }
6238
6239 static int process_block_group_item(struct block_group_tree *block_group_cache,
6240                                     struct btrfs_key *key,
6241                                     struct extent_buffer *eb, int slot)
6242 {
6243         struct block_group_record *rec;
6244         int ret = 0;
6245
6246         rec = btrfs_new_block_group_record(eb, key, slot);
6247         ret = insert_block_group_record(block_group_cache, rec);
6248         if (ret) {
6249                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6250                         rec->objectid, rec->offset);
6251                 free(rec);
6252         }
6253
6254         return ret;
6255 }
6256
6257 struct device_extent_record *
6258 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6259                                struct btrfs_key *key, int slot)
6260 {
6261         struct device_extent_record *rec;
6262         struct btrfs_dev_extent *ptr;
6263
6264         rec = calloc(1, sizeof(*rec));
6265         if (!rec) {
6266                 fprintf(stderr, "memory allocation failed\n");
6267                 exit(-1);
6268         }
6269
6270         rec->cache.objectid = key->objectid;
6271         rec->cache.start = key->offset;
6272
6273         rec->generation = btrfs_header_generation(leaf);
6274
6275         rec->objectid = key->objectid;
6276         rec->type = key->type;
6277         rec->offset = key->offset;
6278
6279         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6280         rec->chunk_objecteid =
6281                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6282         rec->chunk_offset =
6283                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6284         rec->length = btrfs_dev_extent_length(leaf, ptr);
6285         rec->cache.size = rec->length;
6286
6287         INIT_LIST_HEAD(&rec->chunk_list);
6288         INIT_LIST_HEAD(&rec->device_list);
6289
6290         return rec;
6291 }
6292
6293 static int
6294 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6295                            struct btrfs_key *key, struct extent_buffer *eb,
6296                            int slot)
6297 {
6298         struct device_extent_record *rec;
6299         int ret;
6300
6301         rec = btrfs_new_device_extent_record(eb, key, slot);
6302         ret = insert_device_extent_record(dev_extent_cache, rec);
6303         if (ret) {
6304                 fprintf(stderr,
6305                         "Device extent[%llu, %llu, %llu] existed.\n",
6306                         rec->objectid, rec->offset, rec->length);
6307                 free(rec);
6308         }
6309
6310         return ret;
6311 }
6312
6313 static int process_extent_item(struct btrfs_root *root,
6314                                struct cache_tree *extent_cache,
6315                                struct extent_buffer *eb, int slot)
6316 {
6317         struct btrfs_extent_item *ei;
6318         struct btrfs_extent_inline_ref *iref;
6319         struct btrfs_extent_data_ref *dref;
6320         struct btrfs_shared_data_ref *sref;
6321         struct btrfs_key key;
6322         struct extent_record tmpl;
6323         unsigned long end;
6324         unsigned long ptr;
6325         int ret;
6326         int type;
6327         u32 item_size = btrfs_item_size_nr(eb, slot);
6328         u64 refs = 0;
6329         u64 offset;
6330         u64 num_bytes;
6331         int metadata = 0;
6332
6333         btrfs_item_key_to_cpu(eb, &key, slot);
6334
6335         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6336                 metadata = 1;
6337                 num_bytes = root->nodesize;
6338         } else {
6339                 num_bytes = key.offset;
6340         }
6341
6342         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6343                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6344                       key.objectid, root->sectorsize);
6345                 return -EIO;
6346         }
6347         if (item_size < sizeof(*ei)) {
6348 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6349                 struct btrfs_extent_item_v0 *ei0;
6350                 BUG_ON(item_size != sizeof(*ei0));
6351                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6352                 refs = btrfs_extent_refs_v0(eb, ei0);
6353 #else
6354                 BUG();
6355 #endif
6356                 memset(&tmpl, 0, sizeof(tmpl));
6357                 tmpl.start = key.objectid;
6358                 tmpl.nr = num_bytes;
6359                 tmpl.extent_item_refs = refs;
6360                 tmpl.metadata = metadata;
6361                 tmpl.found_rec = 1;
6362                 tmpl.max_size = num_bytes;
6363
6364                 return add_extent_rec(extent_cache, &tmpl);
6365         }
6366
6367         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6368         refs = btrfs_extent_refs(eb, ei);
6369         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6370                 metadata = 1;
6371         else
6372                 metadata = 0;
6373         if (metadata && num_bytes != root->nodesize) {
6374                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6375                       num_bytes, root->nodesize);
6376                 return -EIO;
6377         }
6378         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6379                 error("ignore invalid data extent, length %llu is not aligned to %u",
6380                       num_bytes, root->sectorsize);
6381                 return -EIO;
6382         }
6383
6384         memset(&tmpl, 0, sizeof(tmpl));
6385         tmpl.start = key.objectid;
6386         tmpl.nr = num_bytes;
6387         tmpl.extent_item_refs = refs;
6388         tmpl.metadata = metadata;
6389         tmpl.found_rec = 1;
6390         tmpl.max_size = num_bytes;
6391         add_extent_rec(extent_cache, &tmpl);
6392
6393         ptr = (unsigned long)(ei + 1);
6394         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6395             key.type == BTRFS_EXTENT_ITEM_KEY)
6396                 ptr += sizeof(struct btrfs_tree_block_info);
6397
6398         end = (unsigned long)ei + item_size;
6399         while (ptr < end) {
6400                 iref = (struct btrfs_extent_inline_ref *)ptr;
6401                 type = btrfs_extent_inline_ref_type(eb, iref);
6402                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6403                 switch (type) {
6404                 case BTRFS_TREE_BLOCK_REF_KEY:
6405                         ret = add_tree_backref(extent_cache, key.objectid,
6406                                         0, offset, 0);
6407                         if (ret < 0)
6408                                 error("add_tree_backref failed: %s",
6409                                       strerror(-ret));
6410                         break;
6411                 case BTRFS_SHARED_BLOCK_REF_KEY:
6412                         ret = add_tree_backref(extent_cache, key.objectid,
6413                                         offset, 0, 0);
6414                         if (ret < 0)
6415                                 error("add_tree_backref failed: %s",
6416                                       strerror(-ret));
6417                         break;
6418                 case BTRFS_EXTENT_DATA_REF_KEY:
6419                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6420                         add_data_backref(extent_cache, key.objectid, 0,
6421                                         btrfs_extent_data_ref_root(eb, dref),
6422                                         btrfs_extent_data_ref_objectid(eb,
6423                                                                        dref),
6424                                         btrfs_extent_data_ref_offset(eb, dref),
6425                                         btrfs_extent_data_ref_count(eb, dref),
6426                                         0, num_bytes);
6427                         break;
6428                 case BTRFS_SHARED_DATA_REF_KEY:
6429                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6430                         add_data_backref(extent_cache, key.objectid, offset,
6431                                         0, 0, 0,
6432                                         btrfs_shared_data_ref_count(eb, sref),
6433                                         0, num_bytes);
6434                         break;
6435                 default:
6436                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6437                                 key.objectid, key.type, num_bytes);
6438                         goto out;
6439                 }
6440                 ptr += btrfs_extent_inline_ref_size(type);
6441         }
6442         WARN_ON(ptr > end);
6443 out:
6444         return 0;
6445 }
6446
6447 static int check_cache_range(struct btrfs_root *root,
6448                              struct btrfs_block_group_cache *cache,
6449                              u64 offset, u64 bytes)
6450 {
6451         struct btrfs_free_space *entry;
6452         u64 *logical;
6453         u64 bytenr;
6454         int stripe_len;
6455         int i, nr, ret;
6456
6457         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6458                 bytenr = btrfs_sb_offset(i);
6459                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6460                                        cache->key.objectid, bytenr, 0,
6461                                        &logical, &nr, &stripe_len);
6462                 if (ret)
6463                         return ret;
6464
6465                 while (nr--) {
6466                         if (logical[nr] + stripe_len <= offset)
6467                                 continue;
6468                         if (offset + bytes <= logical[nr])
6469                                 continue;
6470                         if (logical[nr] == offset) {
6471                                 if (stripe_len >= bytes) {
6472                                         free(logical);
6473                                         return 0;
6474                                 }
6475                                 bytes -= stripe_len;
6476                                 offset += stripe_len;
6477                         } else if (logical[nr] < offset) {
6478                                 if (logical[nr] + stripe_len >=
6479                                     offset + bytes) {
6480                                         free(logical);
6481                                         return 0;
6482                                 }
6483                                 bytes = (offset + bytes) -
6484                                         (logical[nr] + stripe_len);
6485                                 offset = logical[nr] + stripe_len;
6486                         } else {
6487                                 /*
6488                                  * Could be tricky, the super may land in the
6489                                  * middle of the area we're checking.  First
6490                                  * check the easiest case, it's at the end.
6491                                  */
6492                                 if (logical[nr] + stripe_len >=
6493                                     bytes + offset) {
6494                                         bytes = logical[nr] - offset;
6495                                         continue;
6496                                 }
6497
6498                                 /* Check the left side */
6499                                 ret = check_cache_range(root, cache,
6500                                                         offset,
6501                                                         logical[nr] - offset);
6502                                 if (ret) {
6503                                         free(logical);
6504                                         return ret;
6505                                 }
6506
6507                                 /* Now we continue with the right side */
6508                                 bytes = (offset + bytes) -
6509                                         (logical[nr] + stripe_len);
6510                                 offset = logical[nr] + stripe_len;
6511                         }
6512                 }
6513
6514                 free(logical);
6515         }
6516
6517         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6518         if (!entry) {
6519                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6520                         offset, offset+bytes);
6521                 return -EINVAL;
6522         }
6523
6524         if (entry->offset != offset) {
6525                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6526                         entry->offset);
6527                 return -EINVAL;
6528         }
6529
6530         if (entry->bytes != bytes) {
6531                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6532                         bytes, entry->bytes, offset);
6533                 return -EINVAL;
6534         }
6535
6536         unlink_free_space(cache->free_space_ctl, entry);
6537         free(entry);
6538         return 0;
6539 }
6540
6541 static int verify_space_cache(struct btrfs_root *root,
6542                               struct btrfs_block_group_cache *cache)
6543 {
6544         struct btrfs_path path;
6545         struct extent_buffer *leaf;
6546         struct btrfs_key key;
6547         u64 last;
6548         int ret = 0;
6549
6550         root = root->fs_info->extent_root;
6551
6552         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6553
6554         btrfs_init_path(&path);
6555         key.objectid = last;
6556         key.offset = 0;
6557         key.type = BTRFS_EXTENT_ITEM_KEY;
6558         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6559         if (ret < 0)
6560                 goto out;
6561         ret = 0;
6562         while (1) {
6563                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6564                         ret = btrfs_next_leaf(root, &path);
6565                         if (ret < 0)
6566                                 goto out;
6567                         if (ret > 0) {
6568                                 ret = 0;
6569                                 break;
6570                         }
6571                 }
6572                 leaf = path.nodes[0];
6573                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6574                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6575                         break;
6576                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6577                     key.type != BTRFS_METADATA_ITEM_KEY) {
6578                         path.slots[0]++;
6579                         continue;
6580                 }
6581
6582                 if (last == key.objectid) {
6583                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6584                                 last = key.objectid + key.offset;
6585                         else
6586                                 last = key.objectid + root->nodesize;
6587                         path.slots[0]++;
6588                         continue;
6589                 }
6590
6591                 ret = check_cache_range(root, cache, last,
6592                                         key.objectid - last);
6593                 if (ret)
6594                         break;
6595                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6596                         last = key.objectid + key.offset;
6597                 else
6598                         last = key.objectid + root->nodesize;
6599                 path.slots[0]++;
6600         }
6601
6602         if (last < cache->key.objectid + cache->key.offset)
6603                 ret = check_cache_range(root, cache, last,
6604                                         cache->key.objectid +
6605                                         cache->key.offset - last);
6606
6607 out:
6608         btrfs_release_path(&path);
6609
6610         if (!ret &&
6611             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6612                 fprintf(stderr, "There are still entries left in the space "
6613                         "cache\n");
6614                 ret = -EINVAL;
6615         }
6616
6617         return ret;
6618 }
6619
6620 static int check_space_cache(struct btrfs_root *root)
6621 {
6622         struct btrfs_block_group_cache *cache;
6623         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6624         int ret;
6625         int error = 0;
6626
6627         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6628             btrfs_super_generation(root->fs_info->super_copy) !=
6629             btrfs_super_cache_generation(root->fs_info->super_copy)) {
6630                 printf("cache and super generation don't match, space cache "
6631                        "will be invalidated\n");
6632                 return 0;
6633         }
6634
6635         if (ctx.progress_enabled) {
6636                 ctx.tp = TASK_FREE_SPACE;
6637                 task_start(ctx.info);
6638         }
6639
6640         while (1) {
6641                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6642                 if (!cache)
6643                         break;
6644
6645                 start = cache->key.objectid + cache->key.offset;
6646                 if (!cache->free_space_ctl) {
6647                         if (btrfs_init_free_space_ctl(cache,
6648                                                       root->sectorsize)) {
6649                                 ret = -ENOMEM;
6650                                 break;
6651                         }
6652                 } else {
6653                         btrfs_remove_free_space_cache(cache);
6654                 }
6655
6656                 if (btrfs_fs_compat_ro(root->fs_info,
6657                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6658                         ret = exclude_super_stripes(root, cache);
6659                         if (ret) {
6660                                 fprintf(stderr, "could not exclude super stripes: %s\n",
6661                                         strerror(-ret));
6662                                 error++;
6663                                 continue;
6664                         }
6665                         ret = load_free_space_tree(root->fs_info, cache);
6666                         free_excluded_extents(root, cache);
6667                         if (ret < 0) {
6668                                 fprintf(stderr, "could not load free space tree: %s\n",
6669                                         strerror(-ret));
6670                                 error++;
6671                                 continue;
6672                         }
6673                         error += ret;
6674                 } else {
6675                         ret = load_free_space_cache(root->fs_info, cache);
6676                         if (!ret)
6677                                 continue;
6678                 }
6679
6680                 ret = verify_space_cache(root, cache);
6681                 if (ret) {
6682                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
6683                                 cache->key.objectid);
6684                         error++;
6685                 }
6686         }
6687
6688         task_stop(ctx.info);
6689
6690         return error ? -EINVAL : 0;
6691 }
6692
6693 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6694                         u64 num_bytes, unsigned long leaf_offset,
6695                         struct extent_buffer *eb) {
6696
6697         u64 offset = 0;
6698         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6699         char *data;
6700         unsigned long csum_offset;
6701         u32 csum;
6702         u32 csum_expected;
6703         u64 read_len;
6704         u64 data_checked = 0;
6705         u64 tmp;
6706         int ret = 0;
6707         int mirror;
6708         int num_copies;
6709
6710         if (num_bytes % root->sectorsize)
6711                 return -EINVAL;
6712
6713         data = malloc(num_bytes);
6714         if (!data)
6715                 return -ENOMEM;
6716
6717         while (offset < num_bytes) {
6718                 mirror = 0;
6719 again:
6720                 read_len = num_bytes - offset;
6721                 /* read as much space once a time */
6722                 ret = read_extent_data(root, data + offset,
6723                                 bytenr + offset, &read_len, mirror);
6724                 if (ret)
6725                         goto out;
6726                 data_checked = 0;
6727                 /* verify every 4k data's checksum */
6728                 while (data_checked < read_len) {
6729                         csum = ~(u32)0;
6730                         tmp = offset + data_checked;
6731
6732                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
6733                                                csum, root->sectorsize);
6734                         btrfs_csum_final(csum, (u8 *)&csum);
6735
6736                         csum_offset = leaf_offset +
6737                                  tmp / root->sectorsize * csum_size;
6738                         read_extent_buffer(eb, (char *)&csum_expected,
6739                                            csum_offset, csum_size);
6740                         /* try another mirror */
6741                         if (csum != csum_expected) {
6742                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6743                                                 mirror, bytenr + tmp,
6744                                                 csum, csum_expected);
6745                                 num_copies = btrfs_num_copies(
6746                                                 &root->fs_info->mapping_tree,
6747                                                 bytenr, num_bytes);
6748                                 if (mirror < num_copies - 1) {
6749                                         mirror += 1;
6750                                         goto again;
6751                                 }
6752                         }
6753                         data_checked += root->sectorsize;
6754                 }
6755                 offset += read_len;
6756         }
6757 out:
6758         free(data);
6759         return ret;
6760 }
6761
6762 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6763                                u64 num_bytes)
6764 {
6765         struct btrfs_path path;
6766         struct extent_buffer *leaf;
6767         struct btrfs_key key;
6768         int ret;
6769
6770         btrfs_init_path(&path);
6771         key.objectid = bytenr;
6772         key.type = BTRFS_EXTENT_ITEM_KEY;
6773         key.offset = (u64)-1;
6774
6775 again:
6776         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6777                                 0, 0);
6778         if (ret < 0) {
6779                 fprintf(stderr, "Error looking up extent record %d\n", ret);
6780                 btrfs_release_path(&path);
6781                 return ret;
6782         } else if (ret) {
6783                 if (path.slots[0] > 0) {
6784                         path.slots[0]--;
6785                 } else {
6786                         ret = btrfs_prev_leaf(root, &path);
6787                         if (ret < 0) {
6788                                 goto out;
6789                         } else if (ret > 0) {
6790                                 ret = 0;
6791                                 goto out;
6792                         }
6793                 }
6794         }
6795
6796         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6797
6798         /*
6799          * Block group items come before extent items if they have the same
6800          * bytenr, so walk back one more just in case.  Dear future traveller,
6801          * first congrats on mastering time travel.  Now if it's not too much
6802          * trouble could you go back to 2006 and tell Chris to make the
6803          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6804          * EXTENT_ITEM_KEY please?
6805          */
6806         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6807                 if (path.slots[0] > 0) {
6808                         path.slots[0]--;
6809                 } else {
6810                         ret = btrfs_prev_leaf(root, &path);
6811                         if (ret < 0) {
6812                                 goto out;
6813                         } else if (ret > 0) {
6814                                 ret = 0;
6815                                 goto out;
6816                         }
6817                 }
6818                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6819         }
6820
6821         while (num_bytes) {
6822                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6823                         ret = btrfs_next_leaf(root, &path);
6824                         if (ret < 0) {
6825                                 fprintf(stderr, "Error going to next leaf "
6826                                         "%d\n", ret);
6827                                 btrfs_release_path(&path);
6828                                 return ret;
6829                         } else if (ret) {
6830                                 break;
6831                         }
6832                 }
6833                 leaf = path.nodes[0];
6834                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6835                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6836                         path.slots[0]++;
6837                         continue;
6838                 }
6839                 if (key.objectid + key.offset < bytenr) {
6840                         path.slots[0]++;
6841                         continue;
6842                 }
6843                 if (key.objectid > bytenr + num_bytes)
6844                         break;
6845
6846                 if (key.objectid == bytenr) {
6847                         if (key.offset >= num_bytes) {
6848                                 num_bytes = 0;
6849                                 break;
6850                         }
6851                         num_bytes -= key.offset;
6852                         bytenr += key.offset;
6853                 } else if (key.objectid < bytenr) {
6854                         if (key.objectid + key.offset >= bytenr + num_bytes) {
6855                                 num_bytes = 0;
6856                                 break;
6857                         }
6858                         num_bytes = (bytenr + num_bytes) -
6859                                 (key.objectid + key.offset);
6860                         bytenr = key.objectid + key.offset;
6861                 } else {
6862                         if (key.objectid + key.offset < bytenr + num_bytes) {
6863                                 u64 new_start = key.objectid + key.offset;
6864                                 u64 new_bytes = bytenr + num_bytes - new_start;
6865
6866                                 /*
6867                                  * Weird case, the extent is in the middle of
6868                                  * our range, we'll have to search one side
6869                                  * and then the other.  Not sure if this happens
6870                                  * in real life, but no harm in coding it up
6871                                  * anyway just in case.
6872                                  */
6873                                 btrfs_release_path(&path);
6874                                 ret = check_extent_exists(root, new_start,
6875                                                           new_bytes);
6876                                 if (ret) {
6877                                         fprintf(stderr, "Right section didn't "
6878                                                 "have a record\n");
6879                                         break;
6880                                 }
6881                                 num_bytes = key.objectid - bytenr;
6882                                 goto again;
6883                         }
6884                         num_bytes = key.objectid - bytenr;
6885                 }
6886                 path.slots[0]++;
6887         }
6888         ret = 0;
6889
6890 out:
6891         if (num_bytes && !ret) {
6892                 fprintf(stderr, "There are no extents for csum range "
6893                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6894                 ret = 1;
6895         }
6896
6897         btrfs_release_path(&path);
6898         return ret;
6899 }
6900
6901 static int check_csums(struct btrfs_root *root)
6902 {
6903         struct btrfs_path path;
6904         struct extent_buffer *leaf;
6905         struct btrfs_key key;
6906         u64 offset = 0, num_bytes = 0;
6907         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6908         int errors = 0;
6909         int ret;
6910         u64 data_len;
6911         unsigned long leaf_offset;
6912
6913         root = root->fs_info->csum_root;
6914         if (!extent_buffer_uptodate(root->node)) {
6915                 fprintf(stderr, "No valid csum tree found\n");
6916                 return -ENOENT;
6917         }
6918
6919         btrfs_init_path(&path);
6920         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6921         key.type = BTRFS_EXTENT_CSUM_KEY;
6922         key.offset = 0;
6923         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6924         if (ret < 0) {
6925                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6926                 btrfs_release_path(&path);
6927                 return ret;
6928         }
6929
6930         if (ret > 0 && path.slots[0])
6931                 path.slots[0]--;
6932         ret = 0;
6933
6934         while (1) {
6935                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6936                         ret = btrfs_next_leaf(root, &path);
6937                         if (ret < 0) {
6938                                 fprintf(stderr, "Error going to next leaf "
6939                                         "%d\n", ret);
6940                                 break;
6941                         }
6942                         if (ret)
6943                                 break;
6944                 }
6945                 leaf = path.nodes[0];
6946
6947                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6948                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6949                         path.slots[0]++;
6950                         continue;
6951                 }
6952
6953                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6954                               csum_size) * root->sectorsize;
6955                 if (!check_data_csum)
6956                         goto skip_csum_check;
6957                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6958                 ret = check_extent_csums(root, key.offset, data_len,
6959                                          leaf_offset, leaf);
6960                 if (ret)
6961                         break;
6962 skip_csum_check:
6963                 if (!num_bytes) {
6964                         offset = key.offset;
6965                 } else if (key.offset != offset + num_bytes) {
6966                         ret = check_extent_exists(root, offset, num_bytes);
6967                         if (ret) {
6968                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6969                                         "there is no extent record\n",
6970                                         offset, offset+num_bytes);
6971                                 errors++;
6972                         }
6973                         offset = key.offset;
6974                         num_bytes = 0;
6975                 }
6976                 num_bytes += data_len;
6977                 path.slots[0]++;
6978         }
6979
6980         btrfs_release_path(&path);
6981         return errors;
6982 }
6983
6984 static int is_dropped_key(struct btrfs_key *key,
6985                           struct btrfs_key *drop_key) {
6986         if (key->objectid < drop_key->objectid)
6987                 return 1;
6988         else if (key->objectid == drop_key->objectid) {
6989                 if (key->type < drop_key->type)
6990                         return 1;
6991                 else if (key->type == drop_key->type) {
6992                         if (key->offset < drop_key->offset)
6993                                 return 1;
6994                 }
6995         }
6996         return 0;
6997 }
6998
6999 /*
7000  * Here are the rules for FULL_BACKREF.
7001  *
7002  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7003  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7004  *      FULL_BACKREF set.
7005  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7006  *    if it happened after the relocation occurred since we'll have dropped the
7007  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7008  *    have no real way to know for sure.
7009  *
7010  * We process the blocks one root at a time, and we start from the lowest root
7011  * objectid and go to the highest.  So we can just lookup the owner backref for
7012  * the record and if we don't find it then we know it doesn't exist and we have
7013  * a FULL BACKREF.
7014  *
7015  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7016  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7017  * be set or not and then we can check later once we've gathered all the refs.
7018  */
7019 static int calc_extent_flag(struct btrfs_root *root,
7020                            struct cache_tree *extent_cache,
7021                            struct extent_buffer *buf,
7022                            struct root_item_record *ri,
7023                            u64 *flags)
7024 {
7025         struct extent_record *rec;
7026         struct cache_extent *cache;
7027         struct tree_backref *tback;
7028         u64 owner = 0;
7029
7030         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7031         /* we have added this extent before */
7032         if (!cache)
7033                 return -ENOENT;
7034
7035         rec = container_of(cache, struct extent_record, cache);
7036
7037         /*
7038          * Except file/reloc tree, we can not have
7039          * FULL BACKREF MODE
7040          */
7041         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7042                 goto normal;
7043         /*
7044          * root node
7045          */
7046         if (buf->start == ri->bytenr)
7047                 goto normal;
7048
7049         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7050                 goto full_backref;
7051
7052         owner = btrfs_header_owner(buf);
7053         if (owner == ri->objectid)
7054                 goto normal;
7055
7056         tback = find_tree_backref(rec, 0, owner);
7057         if (!tback)
7058                 goto full_backref;
7059 normal:
7060         *flags = 0;
7061         if (rec->flag_block_full_backref != FLAG_UNSET &&
7062             rec->flag_block_full_backref != 0)
7063                 rec->bad_full_backref = 1;
7064         return 0;
7065 full_backref:
7066         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7067         if (rec->flag_block_full_backref != FLAG_UNSET &&
7068             rec->flag_block_full_backref != 1)
7069                 rec->bad_full_backref = 1;
7070         return 0;
7071 }
7072
7073 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7074 {
7075         fprintf(stderr, "Invalid key type(");
7076         print_key_type(stderr, 0, key_type);
7077         fprintf(stderr, ") found in root(");
7078         print_objectid(stderr, rootid, 0);
7079         fprintf(stderr, ")\n");
7080 }
7081
7082 /*
7083  * Check if the key is valid with its extent buffer.
7084  *
7085  * This is a early check in case invalid key exists in a extent buffer
7086  * This is not comprehensive yet, but should prevent wrong key/item passed
7087  * further
7088  */
7089 static int check_type_with_root(u64 rootid, u8 key_type)
7090 {
7091         switch (key_type) {
7092         /* Only valid in chunk tree */
7093         case BTRFS_DEV_ITEM_KEY:
7094         case BTRFS_CHUNK_ITEM_KEY:
7095                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7096                         goto err;
7097                 break;
7098         /* valid in csum and log tree */
7099         case BTRFS_CSUM_TREE_OBJECTID:
7100                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7101                       is_fstree(rootid)))
7102                         goto err;
7103                 break;
7104         case BTRFS_EXTENT_ITEM_KEY:
7105         case BTRFS_METADATA_ITEM_KEY:
7106         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7107                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7108                         goto err;
7109                 break;
7110         case BTRFS_ROOT_ITEM_KEY:
7111                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7112                         goto err;
7113                 break;
7114         case BTRFS_DEV_EXTENT_KEY:
7115                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7116                         goto err;
7117                 break;
7118         }
7119         return 0;
7120 err:
7121         report_mismatch_key_root(key_type, rootid);
7122         return -EINVAL;
7123 }
7124
7125 static int run_next_block(struct btrfs_root *root,
7126                           struct block_info *bits,
7127                           int bits_nr,
7128                           u64 *last,
7129                           struct cache_tree *pending,
7130                           struct cache_tree *seen,
7131                           struct cache_tree *reada,
7132                           struct cache_tree *nodes,
7133                           struct cache_tree *extent_cache,
7134                           struct cache_tree *chunk_cache,
7135                           struct rb_root *dev_cache,
7136                           struct block_group_tree *block_group_cache,
7137                           struct device_extent_tree *dev_extent_cache,
7138                           struct root_item_record *ri)
7139 {
7140         struct extent_buffer *buf;
7141         struct extent_record *rec = NULL;
7142         u64 bytenr;
7143         u32 size;
7144         u64 parent;
7145         u64 owner;
7146         u64 flags;
7147         u64 ptr;
7148         u64 gen = 0;
7149         int ret = 0;
7150         int i;
7151         int nritems;
7152         struct btrfs_key key;
7153         struct cache_extent *cache;
7154         int reada_bits;
7155
7156         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7157                                     bits_nr, &reada_bits);
7158         if (nritems == 0)
7159                 return 1;
7160
7161         if (!reada_bits) {
7162                 for(i = 0; i < nritems; i++) {
7163                         ret = add_cache_extent(reada, bits[i].start,
7164                                                bits[i].size);
7165                         if (ret == -EEXIST)
7166                                 continue;
7167
7168                         /* fixme, get the parent transid */
7169                         readahead_tree_block(root, bits[i].start,
7170                                              bits[i].size, 0);
7171                 }
7172         }
7173         *last = bits[0].start;
7174         bytenr = bits[0].start;
7175         size = bits[0].size;
7176
7177         cache = lookup_cache_extent(pending, bytenr, size);
7178         if (cache) {
7179                 remove_cache_extent(pending, cache);
7180                 free(cache);
7181         }
7182         cache = lookup_cache_extent(reada, bytenr, size);
7183         if (cache) {
7184                 remove_cache_extent(reada, cache);
7185                 free(cache);
7186         }
7187         cache = lookup_cache_extent(nodes, bytenr, size);
7188         if (cache) {
7189                 remove_cache_extent(nodes, cache);
7190                 free(cache);
7191         }
7192         cache = lookup_cache_extent(extent_cache, bytenr, size);
7193         if (cache) {
7194                 rec = container_of(cache, struct extent_record, cache);
7195                 gen = rec->parent_generation;
7196         }
7197
7198         /* fixme, get the real parent transid */
7199         buf = read_tree_block(root, bytenr, size, gen);
7200         if (!extent_buffer_uptodate(buf)) {
7201                 record_bad_block_io(root->fs_info,
7202                                     extent_cache, bytenr, size);
7203                 goto out;
7204         }
7205
7206         nritems = btrfs_header_nritems(buf);
7207
7208         flags = 0;
7209         if (!init_extent_tree) {
7210                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7211                                        btrfs_header_level(buf), 1, NULL,
7212                                        &flags);
7213                 if (ret < 0) {
7214                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7215                         if (ret < 0) {
7216                                 fprintf(stderr, "Couldn't calc extent flags\n");
7217                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7218                         }
7219                 }
7220         } else {
7221                 flags = 0;
7222                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7223                 if (ret < 0) {
7224                         fprintf(stderr, "Couldn't calc extent flags\n");
7225                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7226                 }
7227         }
7228
7229         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7230                 if (ri != NULL &&
7231                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7232                     ri->objectid == btrfs_header_owner(buf)) {
7233                         /*
7234                          * Ok we got to this block from it's original owner and
7235                          * we have FULL_BACKREF set.  Relocation can leave
7236                          * converted blocks over so this is altogether possible,
7237                          * however it's not possible if the generation > the
7238                          * last snapshot, so check for this case.
7239                          */
7240                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7241                             btrfs_header_generation(buf) > ri->last_snapshot) {
7242                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7243                                 rec->bad_full_backref = 1;
7244                         }
7245                 }
7246         } else {
7247                 if (ri != NULL &&
7248                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7249                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7250                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7251                         rec->bad_full_backref = 1;
7252                 }
7253         }
7254
7255         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7256                 rec->flag_block_full_backref = 1;
7257                 parent = bytenr;
7258                 owner = 0;
7259         } else {
7260                 rec->flag_block_full_backref = 0;
7261                 parent = 0;
7262                 owner = btrfs_header_owner(buf);
7263         }
7264
7265         ret = check_block(root, extent_cache, buf, flags);
7266         if (ret)
7267                 goto out;
7268
7269         if (btrfs_is_leaf(buf)) {
7270                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7271                 for (i = 0; i < nritems; i++) {
7272                         struct btrfs_file_extent_item *fi;
7273                         btrfs_item_key_to_cpu(buf, &key, i);
7274                         /*
7275                          * Check key type against the leaf owner.
7276                          * Could filter quite a lot of early error if
7277                          * owner is correct
7278                          */
7279                         if (check_type_with_root(btrfs_header_owner(buf),
7280                                                  key.type)) {
7281                                 fprintf(stderr, "ignoring invalid key\n");
7282                                 continue;
7283                         }
7284                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7285                                 process_extent_item(root, extent_cache, buf,
7286                                                     i);
7287                                 continue;
7288                         }
7289                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7290                                 process_extent_item(root, extent_cache, buf,
7291                                                     i);
7292                                 continue;
7293                         }
7294                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7295                                 total_csum_bytes +=
7296                                         btrfs_item_size_nr(buf, i);
7297                                 continue;
7298                         }
7299                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7300                                 process_chunk_item(chunk_cache, &key, buf, i);
7301                                 continue;
7302                         }
7303                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7304                                 process_device_item(dev_cache, &key, buf, i);
7305                                 continue;
7306                         }
7307                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7308                                 process_block_group_item(block_group_cache,
7309                                         &key, buf, i);
7310                                 continue;
7311                         }
7312                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7313                                 process_device_extent_item(dev_extent_cache,
7314                                         &key, buf, i);
7315                                 continue;
7316
7317                         }
7318                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7319 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7320                                 process_extent_ref_v0(extent_cache, buf, i);
7321 #else
7322                                 BUG();
7323 #endif
7324                                 continue;
7325                         }
7326
7327                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7328                                 ret = add_tree_backref(extent_cache,
7329                                                 key.objectid, 0, key.offset, 0);
7330                                 if (ret < 0)
7331                                         error("add_tree_backref failed: %s",
7332                                               strerror(-ret));
7333                                 continue;
7334                         }
7335                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7336                                 ret = add_tree_backref(extent_cache,
7337                                                 key.objectid, key.offset, 0, 0);
7338                                 if (ret < 0)
7339                                         error("add_tree_backref failed: %s",
7340                                               strerror(-ret));
7341                                 continue;
7342                         }
7343                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7344                                 struct btrfs_extent_data_ref *ref;
7345                                 ref = btrfs_item_ptr(buf, i,
7346                                                 struct btrfs_extent_data_ref);
7347                                 add_data_backref(extent_cache,
7348                                         key.objectid, 0,
7349                                         btrfs_extent_data_ref_root(buf, ref),
7350                                         btrfs_extent_data_ref_objectid(buf,
7351                                                                        ref),
7352                                         btrfs_extent_data_ref_offset(buf, ref),
7353                                         btrfs_extent_data_ref_count(buf, ref),
7354                                         0, root->sectorsize);
7355                                 continue;
7356                         }
7357                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7358                                 struct btrfs_shared_data_ref *ref;
7359                                 ref = btrfs_item_ptr(buf, i,
7360                                                 struct btrfs_shared_data_ref);
7361                                 add_data_backref(extent_cache,
7362                                         key.objectid, key.offset, 0, 0, 0,
7363                                         btrfs_shared_data_ref_count(buf, ref),
7364                                         0, root->sectorsize);
7365                                 continue;
7366                         }
7367                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7368                                 struct bad_item *bad;
7369
7370                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7371                                         continue;
7372                                 if (!owner)
7373                                         continue;
7374                                 bad = malloc(sizeof(struct bad_item));
7375                                 if (!bad)
7376                                         continue;
7377                                 INIT_LIST_HEAD(&bad->list);
7378                                 memcpy(&bad->key, &key,
7379                                        sizeof(struct btrfs_key));
7380                                 bad->root_id = owner;
7381                                 list_add_tail(&bad->list, &delete_items);
7382                                 continue;
7383                         }
7384                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7385                                 continue;
7386                         fi = btrfs_item_ptr(buf, i,
7387                                             struct btrfs_file_extent_item);
7388                         if (btrfs_file_extent_type(buf, fi) ==
7389                             BTRFS_FILE_EXTENT_INLINE)
7390                                 continue;
7391                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7392                                 continue;
7393
7394                         data_bytes_allocated +=
7395                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7396                         if (data_bytes_allocated < root->sectorsize) {
7397                                 abort();
7398                         }
7399                         data_bytes_referenced +=
7400                                 btrfs_file_extent_num_bytes(buf, fi);
7401                         add_data_backref(extent_cache,
7402                                 btrfs_file_extent_disk_bytenr(buf, fi),
7403                                 parent, owner, key.objectid, key.offset -
7404                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7405                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7406                 }
7407         } else {
7408                 int level;
7409                 struct btrfs_key first_key;
7410
7411                 first_key.objectid = 0;
7412
7413                 if (nritems > 0)
7414                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7415                 level = btrfs_header_level(buf);
7416                 for (i = 0; i < nritems; i++) {
7417                         struct extent_record tmpl;
7418
7419                         ptr = btrfs_node_blockptr(buf, i);
7420                         size = root->nodesize;
7421                         btrfs_node_key_to_cpu(buf, &key, i);
7422                         if (ri != NULL) {
7423                                 if ((level == ri->drop_level)
7424                                     && is_dropped_key(&key, &ri->drop_key)) {
7425                                         continue;
7426                                 }
7427                         }
7428
7429                         memset(&tmpl, 0, sizeof(tmpl));
7430                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7431                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7432                         tmpl.start = ptr;
7433                         tmpl.nr = size;
7434                         tmpl.refs = 1;
7435                         tmpl.metadata = 1;
7436                         tmpl.max_size = size;
7437                         ret = add_extent_rec(extent_cache, &tmpl);
7438                         if (ret < 0)
7439                                 goto out;
7440
7441                         ret = add_tree_backref(extent_cache, ptr, parent,
7442                                         owner, 1);
7443                         if (ret < 0) {
7444                                 error("add_tree_backref failed: %s",
7445                                       strerror(-ret));
7446                                 continue;
7447                         }
7448
7449                         if (level > 1) {
7450                                 add_pending(nodes, seen, ptr, size);
7451                         } else {
7452                                 add_pending(pending, seen, ptr, size);
7453                         }
7454                 }
7455                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7456                                       nritems) * sizeof(struct btrfs_key_ptr);
7457         }
7458         total_btree_bytes += buf->len;
7459         if (fs_root_objectid(btrfs_header_owner(buf)))
7460                 total_fs_tree_bytes += buf->len;
7461         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7462                 total_extent_tree_bytes += buf->len;
7463         if (!found_old_backref &&
7464             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7465             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7466             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7467                 found_old_backref = 1;
7468 out:
7469         free_extent_buffer(buf);
7470         return ret;
7471 }
7472
7473 static int add_root_to_pending(struct extent_buffer *buf,
7474                                struct cache_tree *extent_cache,
7475                                struct cache_tree *pending,
7476                                struct cache_tree *seen,
7477                                struct cache_tree *nodes,
7478                                u64 objectid)
7479 {
7480         struct extent_record tmpl;
7481         int ret;
7482
7483         if (btrfs_header_level(buf) > 0)
7484                 add_pending(nodes, seen, buf->start, buf->len);
7485         else
7486                 add_pending(pending, seen, buf->start, buf->len);
7487
7488         memset(&tmpl, 0, sizeof(tmpl));
7489         tmpl.start = buf->start;
7490         tmpl.nr = buf->len;
7491         tmpl.is_root = 1;
7492         tmpl.refs = 1;
7493         tmpl.metadata = 1;
7494         tmpl.max_size = buf->len;
7495         add_extent_rec(extent_cache, &tmpl);
7496
7497         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7498             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7499                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7500                                 0, 1);
7501         else
7502                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7503                                 1);
7504         return ret;
7505 }
7506
7507 /* as we fix the tree, we might be deleting blocks that
7508  * we're tracking for repair.  This hook makes sure we
7509  * remove any backrefs for blocks as we are fixing them.
7510  */
7511 static int free_extent_hook(struct btrfs_trans_handle *trans,
7512                             struct btrfs_root *root,
7513                             u64 bytenr, u64 num_bytes, u64 parent,
7514                             u64 root_objectid, u64 owner, u64 offset,
7515                             int refs_to_drop)
7516 {
7517         struct extent_record *rec;
7518         struct cache_extent *cache;
7519         int is_data;
7520         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7521
7522         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7523         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7524         if (!cache)
7525                 return 0;
7526
7527         rec = container_of(cache, struct extent_record, cache);
7528         if (is_data) {
7529                 struct data_backref *back;
7530                 back = find_data_backref(rec, parent, root_objectid, owner,
7531                                          offset, 1, bytenr, num_bytes);
7532                 if (!back)
7533                         goto out;
7534                 if (back->node.found_ref) {
7535                         back->found_ref -= refs_to_drop;
7536                         if (rec->refs)
7537                                 rec->refs -= refs_to_drop;
7538                 }
7539                 if (back->node.found_extent_tree) {
7540                         back->num_refs -= refs_to_drop;
7541                         if (rec->extent_item_refs)
7542                                 rec->extent_item_refs -= refs_to_drop;
7543                 }
7544                 if (back->found_ref == 0)
7545                         back->node.found_ref = 0;
7546                 if (back->num_refs == 0)
7547                         back->node.found_extent_tree = 0;
7548
7549                 if (!back->node.found_extent_tree && back->node.found_ref) {
7550                         list_del(&back->node.list);
7551                         free(back);
7552                 }
7553         } else {
7554                 struct tree_backref *back;
7555                 back = find_tree_backref(rec, parent, root_objectid);
7556                 if (!back)
7557                         goto out;
7558                 if (back->node.found_ref) {
7559                         if (rec->refs)
7560                                 rec->refs--;
7561                         back->node.found_ref = 0;
7562                 }
7563                 if (back->node.found_extent_tree) {
7564                         if (rec->extent_item_refs)
7565                                 rec->extent_item_refs--;
7566                         back->node.found_extent_tree = 0;
7567                 }
7568                 if (!back->node.found_extent_tree && back->node.found_ref) {
7569                         list_del(&back->node.list);
7570                         free(back);
7571                 }
7572         }
7573         maybe_free_extent_rec(extent_cache, rec);
7574 out:
7575         return 0;
7576 }
7577
7578 static int delete_extent_records(struct btrfs_trans_handle *trans,
7579                                  struct btrfs_root *root,
7580                                  struct btrfs_path *path,
7581                                  u64 bytenr, u64 new_len)
7582 {
7583         struct btrfs_key key;
7584         struct btrfs_key found_key;
7585         struct extent_buffer *leaf;
7586         int ret;
7587         int slot;
7588
7589
7590         key.objectid = bytenr;
7591         key.type = (u8)-1;
7592         key.offset = (u64)-1;
7593
7594         while(1) {
7595                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7596                                         &key, path, 0, 1);
7597                 if (ret < 0)
7598                         break;
7599
7600                 if (ret > 0) {
7601                         ret = 0;
7602                         if (path->slots[0] == 0)
7603                                 break;
7604                         path->slots[0]--;
7605                 }
7606                 ret = 0;
7607
7608                 leaf = path->nodes[0];
7609                 slot = path->slots[0];
7610
7611                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7612                 if (found_key.objectid != bytenr)
7613                         break;
7614
7615                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7616                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7617                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7618                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7619                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7620                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7621                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7622                         btrfs_release_path(path);
7623                         if (found_key.type == 0) {
7624                                 if (found_key.offset == 0)
7625                                         break;
7626                                 key.offset = found_key.offset - 1;
7627                                 key.type = found_key.type;
7628                         }
7629                         key.type = found_key.type - 1;
7630                         key.offset = (u64)-1;
7631                         continue;
7632                 }
7633
7634                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7635                         found_key.objectid, found_key.type, found_key.offset);
7636
7637                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7638                 if (ret)
7639                         break;
7640                 btrfs_release_path(path);
7641
7642                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7643                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
7644                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7645                                 found_key.offset : root->nodesize;
7646
7647                         ret = btrfs_update_block_group(trans, root, bytenr,
7648                                                        bytes, 0, 0);
7649                         if (ret)
7650                                 break;
7651                 }
7652         }
7653
7654         btrfs_release_path(path);
7655         return ret;
7656 }
7657
7658 /*
7659  * for a single backref, this will allocate a new extent
7660  * and add the backref to it.
7661  */
7662 static int record_extent(struct btrfs_trans_handle *trans,
7663                          struct btrfs_fs_info *info,
7664                          struct btrfs_path *path,
7665                          struct extent_record *rec,
7666                          struct extent_backref *back,
7667                          int allocated, u64 flags)
7668 {
7669         int ret;
7670         struct btrfs_root *extent_root = info->extent_root;
7671         struct extent_buffer *leaf;
7672         struct btrfs_key ins_key;
7673         struct btrfs_extent_item *ei;
7674         struct data_backref *dback;
7675         struct btrfs_tree_block_info *bi;
7676
7677         if (!back->is_data)
7678                 rec->max_size = max_t(u64, rec->max_size,
7679                                     info->extent_root->nodesize);
7680
7681         if (!allocated) {
7682                 u32 item_size = sizeof(*ei);
7683
7684                 if (!back->is_data)
7685                         item_size += sizeof(*bi);
7686
7687                 ins_key.objectid = rec->start;
7688                 ins_key.offset = rec->max_size;
7689                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7690
7691                 ret = btrfs_insert_empty_item(trans, extent_root, path,
7692                                         &ins_key, item_size);
7693                 if (ret)
7694                         goto fail;
7695
7696                 leaf = path->nodes[0];
7697                 ei = btrfs_item_ptr(leaf, path->slots[0],
7698                                     struct btrfs_extent_item);
7699
7700                 btrfs_set_extent_refs(leaf, ei, 0);
7701                 btrfs_set_extent_generation(leaf, ei, rec->generation);
7702
7703                 if (back->is_data) {
7704                         btrfs_set_extent_flags(leaf, ei,
7705                                                BTRFS_EXTENT_FLAG_DATA);
7706                 } else {
7707                         struct btrfs_disk_key copy_key;;
7708
7709                         bi = (struct btrfs_tree_block_info *)(ei + 1);
7710                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
7711                                              sizeof(*bi));
7712
7713                         btrfs_set_disk_key_objectid(&copy_key,
7714                                                     rec->info_objectid);
7715                         btrfs_set_disk_key_type(&copy_key, 0);
7716                         btrfs_set_disk_key_offset(&copy_key, 0);
7717
7718                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7719                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
7720
7721                         btrfs_set_extent_flags(leaf, ei,
7722                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7723                 }
7724
7725                 btrfs_mark_buffer_dirty(leaf);
7726                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7727                                                rec->max_size, 1, 0);
7728                 if (ret)
7729                         goto fail;
7730                 btrfs_release_path(path);
7731         }
7732
7733         if (back->is_data) {
7734                 u64 parent;
7735                 int i;
7736
7737                 dback = to_data_backref(back);
7738                 if (back->full_backref)
7739                         parent = dback->parent;
7740                 else
7741                         parent = 0;
7742
7743                 for (i = 0; i < dback->found_ref; i++) {
7744                         /* if parent != 0, we're doing a full backref
7745                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7746                          * just makes the backref allocator create a data
7747                          * backref
7748                          */
7749                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
7750                                                    rec->start, rec->max_size,
7751                                                    parent,
7752                                                    dback->root,
7753                                                    parent ?
7754                                                    BTRFS_FIRST_FREE_OBJECTID :
7755                                                    dback->owner,
7756                                                    dback->offset);
7757                         if (ret)
7758                                 break;
7759                 }
7760                 fprintf(stderr, "adding new data backref"
7761                                 " on %llu %s %llu owner %llu"
7762                                 " offset %llu found %d\n",
7763                                 (unsigned long long)rec->start,
7764                                 back->full_backref ?
7765                                 "parent" : "root",
7766                                 back->full_backref ?
7767                                 (unsigned long long)parent :
7768                                 (unsigned long long)dback->root,
7769                                 (unsigned long long)dback->owner,
7770                                 (unsigned long long)dback->offset,
7771                                 dback->found_ref);
7772         } else {
7773                 u64 parent;
7774                 struct tree_backref *tback;
7775
7776                 tback = to_tree_backref(back);
7777                 if (back->full_backref)
7778                         parent = tback->parent;
7779                 else
7780                         parent = 0;
7781
7782                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7783                                            rec->start, rec->max_size,
7784                                            parent, tback->root, 0, 0);
7785                 fprintf(stderr, "adding new tree backref on "
7786                         "start %llu len %llu parent %llu root %llu\n",
7787                         rec->start, rec->max_size, parent, tback->root);
7788         }
7789 fail:
7790         btrfs_release_path(path);
7791         return ret;
7792 }
7793
7794 static struct extent_entry *find_entry(struct list_head *entries,
7795                                        u64 bytenr, u64 bytes)
7796 {
7797         struct extent_entry *entry = NULL;
7798
7799         list_for_each_entry(entry, entries, list) {
7800                 if (entry->bytenr == bytenr && entry->bytes == bytes)
7801                         return entry;
7802         }
7803
7804         return NULL;
7805 }
7806
7807 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7808 {
7809         struct extent_entry *entry, *best = NULL, *prev = NULL;
7810
7811         list_for_each_entry(entry, entries, list) {
7812                 /*
7813                  * If there are as many broken entries as entries then we know
7814                  * not to trust this particular entry.
7815                  */
7816                 if (entry->broken == entry->count)
7817                         continue;
7818
7819                 /*
7820                  * Special case, when there are only two entries and 'best' is
7821                  * the first one
7822                  */
7823                 if (!prev) {
7824                         best = entry;
7825                         prev = entry;
7826                         continue;
7827                 }
7828
7829                 /*
7830                  * If our current entry == best then we can't be sure our best
7831                  * is really the best, so we need to keep searching.
7832                  */
7833                 if (best && best->count == entry->count) {
7834                         prev = entry;
7835                         best = NULL;
7836                         continue;
7837                 }
7838
7839                 /* Prev == entry, not good enough, have to keep searching */
7840                 if (!prev->broken && prev->count == entry->count)
7841                         continue;
7842
7843                 if (!best)
7844                         best = (prev->count > entry->count) ? prev : entry;
7845                 else if (best->count < entry->count)
7846                         best = entry;
7847                 prev = entry;
7848         }
7849
7850         return best;
7851 }
7852
7853 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7854                       struct data_backref *dback, struct extent_entry *entry)
7855 {
7856         struct btrfs_trans_handle *trans;
7857         struct btrfs_root *root;
7858         struct btrfs_file_extent_item *fi;
7859         struct extent_buffer *leaf;
7860         struct btrfs_key key;
7861         u64 bytenr, bytes;
7862         int ret, err;
7863
7864         key.objectid = dback->root;
7865         key.type = BTRFS_ROOT_ITEM_KEY;
7866         key.offset = (u64)-1;
7867         root = btrfs_read_fs_root(info, &key);
7868         if (IS_ERR(root)) {
7869                 fprintf(stderr, "Couldn't find root for our ref\n");
7870                 return -EINVAL;
7871         }
7872
7873         /*
7874          * The backref points to the original offset of the extent if it was
7875          * split, so we need to search down to the offset we have and then walk
7876          * forward until we find the backref we're looking for.
7877          */
7878         key.objectid = dback->owner;
7879         key.type = BTRFS_EXTENT_DATA_KEY;
7880         key.offset = dback->offset;
7881         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7882         if (ret < 0) {
7883                 fprintf(stderr, "Error looking up ref %d\n", ret);
7884                 return ret;
7885         }
7886
7887         while (1) {
7888                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7889                         ret = btrfs_next_leaf(root, path);
7890                         if (ret) {
7891                                 fprintf(stderr, "Couldn't find our ref, next\n");
7892                                 return -EINVAL;
7893                         }
7894                 }
7895                 leaf = path->nodes[0];
7896                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7897                 if (key.objectid != dback->owner ||
7898                     key.type != BTRFS_EXTENT_DATA_KEY) {
7899                         fprintf(stderr, "Couldn't find our ref, search\n");
7900                         return -EINVAL;
7901                 }
7902                 fi = btrfs_item_ptr(leaf, path->slots[0],
7903                                     struct btrfs_file_extent_item);
7904                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7905                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7906
7907                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7908                         break;
7909                 path->slots[0]++;
7910         }
7911
7912         btrfs_release_path(path);
7913
7914         trans = btrfs_start_transaction(root, 1);
7915         if (IS_ERR(trans))
7916                 return PTR_ERR(trans);
7917
7918         /*
7919          * Ok we have the key of the file extent we want to fix, now we can cow
7920          * down to the thing and fix it.
7921          */
7922         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7923         if (ret < 0) {
7924                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7925                         key.objectid, key.type, key.offset, ret);
7926                 goto out;
7927         }
7928         if (ret > 0) {
7929                 fprintf(stderr, "Well that's odd, we just found this key "
7930                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7931                         key.offset);
7932                 ret = -EINVAL;
7933                 goto out;
7934         }
7935         leaf = path->nodes[0];
7936         fi = btrfs_item_ptr(leaf, path->slots[0],
7937                             struct btrfs_file_extent_item);
7938
7939         if (btrfs_file_extent_compression(leaf, fi) &&
7940             dback->disk_bytenr != entry->bytenr) {
7941                 fprintf(stderr, "Ref doesn't match the record start and is "
7942                         "compressed, please take a btrfs-image of this file "
7943                         "system and send it to a btrfs developer so they can "
7944                         "complete this functionality for bytenr %Lu\n",
7945                         dback->disk_bytenr);
7946                 ret = -EINVAL;
7947                 goto out;
7948         }
7949
7950         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7951                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7952         } else if (dback->disk_bytenr > entry->bytenr) {
7953                 u64 off_diff, offset;
7954
7955                 off_diff = dback->disk_bytenr - entry->bytenr;
7956                 offset = btrfs_file_extent_offset(leaf, fi);
7957                 if (dback->disk_bytenr + offset +
7958                     btrfs_file_extent_num_bytes(leaf, fi) >
7959                     entry->bytenr + entry->bytes) {
7960                         fprintf(stderr, "Ref is past the entry end, please "
7961                                 "take a btrfs-image of this file system and "
7962                                 "send it to a btrfs developer, ref %Lu\n",
7963                                 dback->disk_bytenr);
7964                         ret = -EINVAL;
7965                         goto out;
7966                 }
7967                 offset += off_diff;
7968                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7969                 btrfs_set_file_extent_offset(leaf, fi, offset);
7970         } else if (dback->disk_bytenr < entry->bytenr) {
7971                 u64 offset;
7972
7973                 offset = btrfs_file_extent_offset(leaf, fi);
7974                 if (dback->disk_bytenr + offset < entry->bytenr) {
7975                         fprintf(stderr, "Ref is before the entry start, please"
7976                                 " take a btrfs-image of this file system and "
7977                                 "send it to a btrfs developer, ref %Lu\n",
7978                                 dback->disk_bytenr);
7979                         ret = -EINVAL;
7980                         goto out;
7981                 }
7982
7983                 offset += dback->disk_bytenr;
7984                 offset -= entry->bytenr;
7985                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7986                 btrfs_set_file_extent_offset(leaf, fi, offset);
7987         }
7988
7989         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7990
7991         /*
7992          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7993          * only do this if we aren't using compression, otherwise it's a
7994          * trickier case.
7995          */
7996         if (!btrfs_file_extent_compression(leaf, fi))
7997                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7998         else
7999                 printf("ram bytes may be wrong?\n");
8000         btrfs_mark_buffer_dirty(leaf);
8001 out:
8002         err = btrfs_commit_transaction(trans, root);
8003         btrfs_release_path(path);
8004         return ret ? ret : err;
8005 }
8006
8007 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8008                            struct extent_record *rec)
8009 {
8010         struct extent_backref *back;
8011         struct data_backref *dback;
8012         struct extent_entry *entry, *best = NULL;
8013         LIST_HEAD(entries);
8014         int nr_entries = 0;
8015         int broken_entries = 0;
8016         int ret = 0;
8017         short mismatch = 0;
8018
8019         /*
8020          * Metadata is easy and the backrefs should always agree on bytenr and
8021          * size, if not we've got bigger issues.
8022          */
8023         if (rec->metadata)
8024                 return 0;
8025
8026         list_for_each_entry(back, &rec->backrefs, list) {
8027                 if (back->full_backref || !back->is_data)
8028                         continue;
8029
8030                 dback = to_data_backref(back);
8031
8032                 /*
8033                  * We only pay attention to backrefs that we found a real
8034                  * backref for.
8035                  */
8036                 if (dback->found_ref == 0)
8037                         continue;
8038
8039                 /*
8040                  * For now we only catch when the bytes don't match, not the
8041                  * bytenr.  We can easily do this at the same time, but I want
8042                  * to have a fs image to test on before we just add repair
8043                  * functionality willy-nilly so we know we won't screw up the
8044                  * repair.
8045                  */
8046
8047                 entry = find_entry(&entries, dback->disk_bytenr,
8048                                    dback->bytes);
8049                 if (!entry) {
8050                         entry = malloc(sizeof(struct extent_entry));
8051                         if (!entry) {
8052                                 ret = -ENOMEM;
8053                                 goto out;
8054                         }
8055                         memset(entry, 0, sizeof(*entry));
8056                         entry->bytenr = dback->disk_bytenr;
8057                         entry->bytes = dback->bytes;
8058                         list_add_tail(&entry->list, &entries);
8059                         nr_entries++;
8060                 }
8061
8062                 /*
8063                  * If we only have on entry we may think the entries agree when
8064                  * in reality they don't so we have to do some extra checking.
8065                  */
8066                 if (dback->disk_bytenr != rec->start ||
8067                     dback->bytes != rec->nr || back->broken)
8068                         mismatch = 1;
8069
8070                 if (back->broken) {
8071                         entry->broken++;
8072                         broken_entries++;
8073                 }
8074
8075                 entry->count++;
8076         }
8077
8078         /* Yay all the backrefs agree, carry on good sir */
8079         if (nr_entries <= 1 && !mismatch)
8080                 goto out;
8081
8082         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8083                 "%Lu\n", rec->start);
8084
8085         /*
8086          * First we want to see if the backrefs can agree amongst themselves who
8087          * is right, so figure out which one of the entries has the highest
8088          * count.
8089          */
8090         best = find_most_right_entry(&entries);
8091
8092         /*
8093          * Ok so we may have an even split between what the backrefs think, so
8094          * this is where we use the extent ref to see what it thinks.
8095          */
8096         if (!best) {
8097                 entry = find_entry(&entries, rec->start, rec->nr);
8098                 if (!entry && (!broken_entries || !rec->found_rec)) {
8099                         fprintf(stderr, "Backrefs don't agree with each other "
8100                                 "and extent record doesn't agree with anybody,"
8101                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8102                                 rec->start, rec->nr);
8103                         ret = -EINVAL;
8104                         goto out;
8105                 } else if (!entry) {
8106                         /*
8107                          * Ok our backrefs were broken, we'll assume this is the
8108                          * correct value and add an entry for this range.
8109                          */
8110                         entry = malloc(sizeof(struct extent_entry));
8111                         if (!entry) {
8112                                 ret = -ENOMEM;
8113                                 goto out;
8114                         }
8115                         memset(entry, 0, sizeof(*entry));
8116                         entry->bytenr = rec->start;
8117                         entry->bytes = rec->nr;
8118                         list_add_tail(&entry->list, &entries);
8119                         nr_entries++;
8120                 }
8121                 entry->count++;
8122                 best = find_most_right_entry(&entries);
8123                 if (!best) {
8124                         fprintf(stderr, "Backrefs and extent record evenly "
8125                                 "split on who is right, this is going to "
8126                                 "require user input to fix bytenr %Lu bytes "
8127                                 "%Lu\n", rec->start, rec->nr);
8128                         ret = -EINVAL;
8129                         goto out;
8130                 }
8131         }
8132
8133         /*
8134          * I don't think this can happen currently as we'll abort() if we catch
8135          * this case higher up, but in case somebody removes that we still can't
8136          * deal with it properly here yet, so just bail out of that's the case.
8137          */
8138         if (best->bytenr != rec->start) {
8139                 fprintf(stderr, "Extent start and backref starts don't match, "
8140                         "please use btrfs-image on this file system and send "
8141                         "it to a btrfs developer so they can make fsck fix "
8142                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8143                         rec->start, rec->nr);
8144                 ret = -EINVAL;
8145                 goto out;
8146         }
8147
8148         /*
8149          * Ok great we all agreed on an extent record, let's go find the real
8150          * references and fix up the ones that don't match.
8151          */
8152         list_for_each_entry(back, &rec->backrefs, list) {
8153                 if (back->full_backref || !back->is_data)
8154                         continue;
8155
8156                 dback = to_data_backref(back);
8157
8158                 /*
8159                  * Still ignoring backrefs that don't have a real ref attached
8160                  * to them.
8161                  */
8162                 if (dback->found_ref == 0)
8163                         continue;
8164
8165                 if (dback->bytes == best->bytes &&
8166                     dback->disk_bytenr == best->bytenr)
8167                         continue;
8168
8169                 ret = repair_ref(info, path, dback, best);
8170                 if (ret)
8171                         goto out;
8172         }
8173
8174         /*
8175          * Ok we messed with the actual refs, which means we need to drop our
8176          * entire cache and go back and rescan.  I know this is a huge pain and
8177          * adds a lot of extra work, but it's the only way to be safe.  Once all
8178          * the backrefs agree we may not need to do anything to the extent
8179          * record itself.
8180          */
8181         ret = -EAGAIN;
8182 out:
8183         while (!list_empty(&entries)) {
8184                 entry = list_entry(entries.next, struct extent_entry, list);
8185                 list_del_init(&entry->list);
8186                 free(entry);
8187         }
8188         return ret;
8189 }
8190
8191 static int process_duplicates(struct btrfs_root *root,
8192                               struct cache_tree *extent_cache,
8193                               struct extent_record *rec)
8194 {
8195         struct extent_record *good, *tmp;
8196         struct cache_extent *cache;
8197         int ret;
8198
8199         /*
8200          * If we found a extent record for this extent then return, or if we
8201          * have more than one duplicate we are likely going to need to delete
8202          * something.
8203          */
8204         if (rec->found_rec || rec->num_duplicates > 1)
8205                 return 0;
8206
8207         /* Shouldn't happen but just in case */
8208         BUG_ON(!rec->num_duplicates);
8209
8210         /*
8211          * So this happens if we end up with a backref that doesn't match the
8212          * actual extent entry.  So either the backref is bad or the extent
8213          * entry is bad.  Either way we want to have the extent_record actually
8214          * reflect what we found in the extent_tree, so we need to take the
8215          * duplicate out and use that as the extent_record since the only way we
8216          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8217          */
8218         remove_cache_extent(extent_cache, &rec->cache);
8219
8220         good = to_extent_record(rec->dups.next);
8221         list_del_init(&good->list);
8222         INIT_LIST_HEAD(&good->backrefs);
8223         INIT_LIST_HEAD(&good->dups);
8224         good->cache.start = good->start;
8225         good->cache.size = good->nr;
8226         good->content_checked = 0;
8227         good->owner_ref_checked = 0;
8228         good->num_duplicates = 0;
8229         good->refs = rec->refs;
8230         list_splice_init(&rec->backrefs, &good->backrefs);
8231         while (1) {
8232                 cache = lookup_cache_extent(extent_cache, good->start,
8233                                             good->nr);
8234                 if (!cache)
8235                         break;
8236                 tmp = container_of(cache, struct extent_record, cache);
8237
8238                 /*
8239                  * If we find another overlapping extent and it's found_rec is
8240                  * set then it's a duplicate and we need to try and delete
8241                  * something.
8242                  */
8243                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8244                         if (list_empty(&good->list))
8245                                 list_add_tail(&good->list,
8246                                               &duplicate_extents);
8247                         good->num_duplicates += tmp->num_duplicates + 1;
8248                         list_splice_init(&tmp->dups, &good->dups);
8249                         list_del_init(&tmp->list);
8250                         list_add_tail(&tmp->list, &good->dups);
8251                         remove_cache_extent(extent_cache, &tmp->cache);
8252                         continue;
8253                 }
8254
8255                 /*
8256                  * Ok we have another non extent item backed extent rec, so lets
8257                  * just add it to this extent and carry on like we did above.
8258                  */
8259                 good->refs += tmp->refs;
8260                 list_splice_init(&tmp->backrefs, &good->backrefs);
8261                 remove_cache_extent(extent_cache, &tmp->cache);
8262                 free(tmp);
8263         }
8264         ret = insert_cache_extent(extent_cache, &good->cache);
8265         BUG_ON(ret);
8266         free(rec);
8267         return good->num_duplicates ? 0 : 1;
8268 }
8269
8270 static int delete_duplicate_records(struct btrfs_root *root,
8271                                     struct extent_record *rec)
8272 {
8273         struct btrfs_trans_handle *trans;
8274         LIST_HEAD(delete_list);
8275         struct btrfs_path path;
8276         struct extent_record *tmp, *good, *n;
8277         int nr_del = 0;
8278         int ret = 0, err;
8279         struct btrfs_key key;
8280
8281         btrfs_init_path(&path);
8282
8283         good = rec;
8284         /* Find the record that covers all of the duplicates. */
8285         list_for_each_entry(tmp, &rec->dups, list) {
8286                 if (good->start < tmp->start)
8287                         continue;
8288                 if (good->nr > tmp->nr)
8289                         continue;
8290
8291                 if (tmp->start + tmp->nr < good->start + good->nr) {
8292                         fprintf(stderr, "Ok we have overlapping extents that "
8293                                 "aren't completely covered by each other, this "
8294                                 "is going to require more careful thought.  "
8295                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8296                                 tmp->start, tmp->nr, good->start, good->nr);
8297                         abort();
8298                 }
8299                 good = tmp;
8300         }
8301
8302         if (good != rec)
8303                 list_add_tail(&rec->list, &delete_list);
8304
8305         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8306                 if (tmp == good)
8307                         continue;
8308                 list_move_tail(&tmp->list, &delete_list);
8309         }
8310
8311         root = root->fs_info->extent_root;
8312         trans = btrfs_start_transaction(root, 1);
8313         if (IS_ERR(trans)) {
8314                 ret = PTR_ERR(trans);
8315                 goto out;
8316         }
8317
8318         list_for_each_entry(tmp, &delete_list, list) {
8319                 if (tmp->found_rec == 0)
8320                         continue;
8321                 key.objectid = tmp->start;
8322                 key.type = BTRFS_EXTENT_ITEM_KEY;
8323                 key.offset = tmp->nr;
8324
8325                 /* Shouldn't happen but just in case */
8326                 if (tmp->metadata) {
8327                         fprintf(stderr, "Well this shouldn't happen, extent "
8328                                 "record overlaps but is metadata? "
8329                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8330                         abort();
8331                 }
8332
8333                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8334                 if (ret) {
8335                         if (ret > 0)
8336                                 ret = -EINVAL;
8337                         break;
8338                 }
8339                 ret = btrfs_del_item(trans, root, &path);
8340                 if (ret)
8341                         break;
8342                 btrfs_release_path(&path);
8343                 nr_del++;
8344         }
8345         err = btrfs_commit_transaction(trans, root);
8346         if (err && !ret)
8347                 ret = err;
8348 out:
8349         while (!list_empty(&delete_list)) {
8350                 tmp = to_extent_record(delete_list.next);
8351                 list_del_init(&tmp->list);
8352                 if (tmp == rec)
8353                         continue;
8354                 free(tmp);
8355         }
8356
8357         while (!list_empty(&rec->dups)) {
8358                 tmp = to_extent_record(rec->dups.next);
8359                 list_del_init(&tmp->list);
8360                 free(tmp);
8361         }
8362
8363         btrfs_release_path(&path);
8364
8365         if (!ret && !nr_del)
8366                 rec->num_duplicates = 0;
8367
8368         return ret ? ret : nr_del;
8369 }
8370
8371 static int find_possible_backrefs(struct btrfs_fs_info *info,
8372                                   struct btrfs_path *path,
8373                                   struct cache_tree *extent_cache,
8374                                   struct extent_record *rec)
8375 {
8376         struct btrfs_root *root;
8377         struct extent_backref *back;
8378         struct data_backref *dback;
8379         struct cache_extent *cache;
8380         struct btrfs_file_extent_item *fi;
8381         struct btrfs_key key;
8382         u64 bytenr, bytes;
8383         int ret;
8384
8385         list_for_each_entry(back, &rec->backrefs, list) {
8386                 /* Don't care about full backrefs (poor unloved backrefs) */
8387                 if (back->full_backref || !back->is_data)
8388                         continue;
8389
8390                 dback = to_data_backref(back);
8391
8392                 /* We found this one, we don't need to do a lookup */
8393                 if (dback->found_ref)
8394                         continue;
8395
8396                 key.objectid = dback->root;
8397                 key.type = BTRFS_ROOT_ITEM_KEY;
8398                 key.offset = (u64)-1;
8399
8400                 root = btrfs_read_fs_root(info, &key);
8401
8402                 /* No root, definitely a bad ref, skip */
8403                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8404                         continue;
8405                 /* Other err, exit */
8406                 if (IS_ERR(root))
8407                         return PTR_ERR(root);
8408
8409                 key.objectid = dback->owner;
8410                 key.type = BTRFS_EXTENT_DATA_KEY;
8411                 key.offset = dback->offset;
8412                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8413                 if (ret) {
8414                         btrfs_release_path(path);
8415                         if (ret < 0)
8416                                 return ret;
8417                         /* Didn't find it, we can carry on */
8418                         ret = 0;
8419                         continue;
8420                 }
8421
8422                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8423                                     struct btrfs_file_extent_item);
8424                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8425                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8426                 btrfs_release_path(path);
8427                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8428                 if (cache) {
8429                         struct extent_record *tmp;
8430                         tmp = container_of(cache, struct extent_record, cache);
8431
8432                         /*
8433                          * If we found an extent record for the bytenr for this
8434                          * particular backref then we can't add it to our
8435                          * current extent record.  We only want to add backrefs
8436                          * that don't have a corresponding extent item in the
8437                          * extent tree since they likely belong to this record
8438                          * and we need to fix it if it doesn't match bytenrs.
8439                          */
8440                         if  (tmp->found_rec)
8441                                 continue;
8442                 }
8443
8444                 dback->found_ref += 1;
8445                 dback->disk_bytenr = bytenr;
8446                 dback->bytes = bytes;
8447
8448                 /*
8449                  * Set this so the verify backref code knows not to trust the
8450                  * values in this backref.
8451                  */
8452                 back->broken = 1;
8453         }
8454
8455         return 0;
8456 }
8457
8458 /*
8459  * Record orphan data ref into corresponding root.
8460  *
8461  * Return 0 if the extent item contains data ref and recorded.
8462  * Return 1 if the extent item contains no useful data ref
8463  *   On that case, it may contains only shared_dataref or metadata backref
8464  *   or the file extent exists(this should be handled by the extent bytenr
8465  *   recovery routine)
8466  * Return <0 if something goes wrong.
8467  */
8468 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8469                                       struct extent_record *rec)
8470 {
8471         struct btrfs_key key;
8472         struct btrfs_root *dest_root;
8473         struct extent_backref *back;
8474         struct data_backref *dback;
8475         struct orphan_data_extent *orphan;
8476         struct btrfs_path path;
8477         int recorded_data_ref = 0;
8478         int ret = 0;
8479
8480         if (rec->metadata)
8481                 return 1;
8482         btrfs_init_path(&path);
8483         list_for_each_entry(back, &rec->backrefs, list) {
8484                 if (back->full_backref || !back->is_data ||
8485                     !back->found_extent_tree)
8486                         continue;
8487                 dback = to_data_backref(back);
8488                 if (dback->found_ref)
8489                         continue;
8490                 key.objectid = dback->root;
8491                 key.type = BTRFS_ROOT_ITEM_KEY;
8492                 key.offset = (u64)-1;
8493
8494                 dest_root = btrfs_read_fs_root(fs_info, &key);
8495
8496                 /* For non-exist root we just skip it */
8497                 if (IS_ERR(dest_root) || !dest_root)
8498                         continue;
8499
8500                 key.objectid = dback->owner;
8501                 key.type = BTRFS_EXTENT_DATA_KEY;
8502                 key.offset = dback->offset;
8503
8504                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8505                 btrfs_release_path(&path);
8506                 /*
8507                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8508                  * we need to record it for inode/file extent rebuild.
8509                  * For ret > 0, we record it only for file extent rebuild.
8510                  * For ret == 0, the file extent exists but only bytenr
8511                  * mismatch, let the original bytenr fix routine to handle,
8512                  * don't record it.
8513                  */
8514                 if (ret == 0)
8515                         continue;
8516                 ret = 0;
8517                 orphan = malloc(sizeof(*orphan));
8518                 if (!orphan) {
8519                         ret = -ENOMEM;
8520                         goto out;
8521                 }
8522                 INIT_LIST_HEAD(&orphan->list);
8523                 orphan->root = dback->root;
8524                 orphan->objectid = dback->owner;
8525                 orphan->offset = dback->offset;
8526                 orphan->disk_bytenr = rec->cache.start;
8527                 orphan->disk_len = rec->cache.size;
8528                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8529                 recorded_data_ref = 1;
8530         }
8531 out:
8532         btrfs_release_path(&path);
8533         if (!ret)
8534                 return !recorded_data_ref;
8535         else
8536                 return ret;
8537 }
8538
8539 /*
8540  * when an incorrect extent item is found, this will delete
8541  * all of the existing entries for it and recreate them
8542  * based on what the tree scan found.
8543  */
8544 static int fixup_extent_refs(struct btrfs_fs_info *info,
8545                              struct cache_tree *extent_cache,
8546                              struct extent_record *rec)
8547 {
8548         struct btrfs_trans_handle *trans = NULL;
8549         int ret;
8550         struct btrfs_path path;
8551         struct list_head *cur = rec->backrefs.next;
8552         struct cache_extent *cache;
8553         struct extent_backref *back;
8554         int allocated = 0;
8555         u64 flags = 0;
8556
8557         if (rec->flag_block_full_backref)
8558                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8559
8560         btrfs_init_path(&path);
8561         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8562                 /*
8563                  * Sometimes the backrefs themselves are so broken they don't
8564                  * get attached to any meaningful rec, so first go back and
8565                  * check any of our backrefs that we couldn't find and throw
8566                  * them into the list if we find the backref so that
8567                  * verify_backrefs can figure out what to do.
8568                  */
8569                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8570                 if (ret < 0)
8571                         goto out;
8572         }
8573
8574         /* step one, make sure all of the backrefs agree */
8575         ret = verify_backrefs(info, &path, rec);
8576         if (ret < 0)
8577                 goto out;
8578
8579         trans = btrfs_start_transaction(info->extent_root, 1);
8580         if (IS_ERR(trans)) {
8581                 ret = PTR_ERR(trans);
8582                 goto out;
8583         }
8584
8585         /* step two, delete all the existing records */
8586         ret = delete_extent_records(trans, info->extent_root, &path,
8587                                     rec->start, rec->max_size);
8588
8589         if (ret < 0)
8590                 goto out;
8591
8592         /* was this block corrupt?  If so, don't add references to it */
8593         cache = lookup_cache_extent(info->corrupt_blocks,
8594                                     rec->start, rec->max_size);
8595         if (cache) {
8596                 ret = 0;
8597                 goto out;
8598         }
8599
8600         /* step three, recreate all the refs we did find */
8601         while(cur != &rec->backrefs) {
8602                 back = to_extent_backref(cur);
8603                 cur = cur->next;
8604
8605                 /*
8606                  * if we didn't find any references, don't create a
8607                  * new extent record
8608                  */
8609                 if (!back->found_ref)
8610                         continue;
8611
8612                 rec->bad_full_backref = 0;
8613                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8614                 allocated = 1;
8615
8616                 if (ret)
8617                         goto out;
8618         }
8619 out:
8620         if (trans) {
8621                 int err = btrfs_commit_transaction(trans, info->extent_root);
8622                 if (!ret)
8623                         ret = err;
8624         }
8625
8626         btrfs_release_path(&path);
8627         return ret;
8628 }
8629
8630 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8631                               struct extent_record *rec)
8632 {
8633         struct btrfs_trans_handle *trans;
8634         struct btrfs_root *root = fs_info->extent_root;
8635         struct btrfs_path path;
8636         struct btrfs_extent_item *ei;
8637         struct btrfs_key key;
8638         u64 flags;
8639         int ret = 0;
8640
8641         key.objectid = rec->start;
8642         if (rec->metadata) {
8643                 key.type = BTRFS_METADATA_ITEM_KEY;
8644                 key.offset = rec->info_level;
8645         } else {
8646                 key.type = BTRFS_EXTENT_ITEM_KEY;
8647                 key.offset = rec->max_size;
8648         }
8649
8650         trans = btrfs_start_transaction(root, 0);
8651         if (IS_ERR(trans))
8652                 return PTR_ERR(trans);
8653
8654         btrfs_init_path(&path);
8655         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8656         if (ret < 0) {
8657                 btrfs_release_path(&path);
8658                 btrfs_commit_transaction(trans, root);
8659                 return ret;
8660         } else if (ret) {
8661                 fprintf(stderr, "Didn't find extent for %llu\n",
8662                         (unsigned long long)rec->start);
8663                 btrfs_release_path(&path);
8664                 btrfs_commit_transaction(trans, root);
8665                 return -ENOENT;
8666         }
8667
8668         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8669                             struct btrfs_extent_item);
8670         flags = btrfs_extent_flags(path.nodes[0], ei);
8671         if (rec->flag_block_full_backref) {
8672                 fprintf(stderr, "setting full backref on %llu\n",
8673                         (unsigned long long)key.objectid);
8674                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8675         } else {
8676                 fprintf(stderr, "clearing full backref on %llu\n",
8677                         (unsigned long long)key.objectid);
8678                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8679         }
8680         btrfs_set_extent_flags(path.nodes[0], ei, flags);
8681         btrfs_mark_buffer_dirty(path.nodes[0]);
8682         btrfs_release_path(&path);
8683         return btrfs_commit_transaction(trans, root);
8684 }
8685
8686 /* right now we only prune from the extent allocation tree */
8687 static int prune_one_block(struct btrfs_trans_handle *trans,
8688                            struct btrfs_fs_info *info,
8689                            struct btrfs_corrupt_block *corrupt)
8690 {
8691         int ret;
8692         struct btrfs_path path;
8693         struct extent_buffer *eb;
8694         u64 found;
8695         int slot;
8696         int nritems;
8697         int level = corrupt->level + 1;
8698
8699         btrfs_init_path(&path);
8700 again:
8701         /* we want to stop at the parent to our busted block */
8702         path.lowest_level = level;
8703
8704         ret = btrfs_search_slot(trans, info->extent_root,
8705                                 &corrupt->key, &path, -1, 1);
8706
8707         if (ret < 0)
8708                 goto out;
8709
8710         eb = path.nodes[level];
8711         if (!eb) {
8712                 ret = -ENOENT;
8713                 goto out;
8714         }
8715
8716         /*
8717          * hopefully the search gave us the block we want to prune,
8718          * lets try that first
8719          */
8720         slot = path.slots[level];
8721         found =  btrfs_node_blockptr(eb, slot);
8722         if (found == corrupt->cache.start)
8723                 goto del_ptr;
8724
8725         nritems = btrfs_header_nritems(eb);
8726
8727         /* the search failed, lets scan this node and hope we find it */
8728         for (slot = 0; slot < nritems; slot++) {
8729                 found =  btrfs_node_blockptr(eb, slot);
8730                 if (found == corrupt->cache.start)
8731                         goto del_ptr;
8732         }
8733         /*
8734          * we couldn't find the bad block.  TODO, search all the nodes for pointers
8735          * to this block
8736          */
8737         if (eb == info->extent_root->node) {
8738                 ret = -ENOENT;
8739                 goto out;
8740         } else {
8741                 level++;
8742                 btrfs_release_path(&path);
8743                 goto again;
8744         }
8745
8746 del_ptr:
8747         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8748         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8749
8750 out:
8751         btrfs_release_path(&path);
8752         return ret;
8753 }
8754
8755 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8756 {
8757         struct btrfs_trans_handle *trans = NULL;
8758         struct cache_extent *cache;
8759         struct btrfs_corrupt_block *corrupt;
8760
8761         while (1) {
8762                 cache = search_cache_extent(info->corrupt_blocks, 0);
8763                 if (!cache)
8764                         break;
8765                 if (!trans) {
8766                         trans = btrfs_start_transaction(info->extent_root, 1);
8767                         if (IS_ERR(trans))
8768                                 return PTR_ERR(trans);
8769                 }
8770                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8771                 prune_one_block(trans, info, corrupt);
8772                 remove_cache_extent(info->corrupt_blocks, cache);
8773         }
8774         if (trans)
8775                 return btrfs_commit_transaction(trans, info->extent_root);
8776         return 0;
8777 }
8778
8779 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8780 {
8781         struct btrfs_block_group_cache *cache;
8782         u64 start, end;
8783         int ret;
8784
8785         while (1) {
8786                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8787                                             &start, &end, EXTENT_DIRTY);
8788                 if (ret)
8789                         break;
8790                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8791                                    GFP_NOFS);
8792         }
8793
8794         start = 0;
8795         while (1) {
8796                 cache = btrfs_lookup_first_block_group(fs_info, start);
8797                 if (!cache)
8798                         break;
8799                 if (cache->cached)
8800                         cache->cached = 0;
8801                 start = cache->key.objectid + cache->key.offset;
8802         }
8803 }
8804
8805 static int check_extent_refs(struct btrfs_root *root,
8806                              struct cache_tree *extent_cache)
8807 {
8808         struct extent_record *rec;
8809         struct cache_extent *cache;
8810         int err = 0;
8811         int ret = 0;
8812         int fixed = 0;
8813         int had_dups = 0;
8814         int recorded = 0;
8815
8816         if (repair) {
8817                 /*
8818                  * if we're doing a repair, we have to make sure
8819                  * we don't allocate from the problem extents.
8820                  * In the worst case, this will be all the
8821                  * extents in the FS
8822                  */
8823                 cache = search_cache_extent(extent_cache, 0);
8824                 while(cache) {
8825                         rec = container_of(cache, struct extent_record, cache);
8826                         set_extent_dirty(root->fs_info->excluded_extents,
8827                                          rec->start,
8828                                          rec->start + rec->max_size - 1,
8829                                          GFP_NOFS);
8830                         cache = next_cache_extent(cache);
8831                 }
8832
8833                 /* pin down all the corrupted blocks too */
8834                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8835                 while(cache) {
8836                         set_extent_dirty(root->fs_info->excluded_extents,
8837                                          cache->start,
8838                                          cache->start + cache->size - 1,
8839                                          GFP_NOFS);
8840                         cache = next_cache_extent(cache);
8841                 }
8842                 prune_corrupt_blocks(root->fs_info);
8843                 reset_cached_block_groups(root->fs_info);
8844         }
8845
8846         reset_cached_block_groups(root->fs_info);
8847
8848         /*
8849          * We need to delete any duplicate entries we find first otherwise we
8850          * could mess up the extent tree when we have backrefs that actually
8851          * belong to a different extent item and not the weird duplicate one.
8852          */
8853         while (repair && !list_empty(&duplicate_extents)) {
8854                 rec = to_extent_record(duplicate_extents.next);
8855                 list_del_init(&rec->list);
8856
8857                 /* Sometimes we can find a backref before we find an actual
8858                  * extent, so we need to process it a little bit to see if there
8859                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8860                  * if this is a backref screwup.  If we need to delete stuff
8861                  * process_duplicates() will return 0, otherwise it will return
8862                  * 1 and we
8863                  */
8864                 if (process_duplicates(root, extent_cache, rec))
8865                         continue;
8866                 ret = delete_duplicate_records(root, rec);
8867                 if (ret < 0)
8868                         return ret;
8869                 /*
8870                  * delete_duplicate_records will return the number of entries
8871                  * deleted, so if it's greater than 0 then we know we actually
8872                  * did something and we need to remove.
8873                  */
8874                 if (ret)
8875                         had_dups = 1;
8876         }
8877
8878         if (had_dups)
8879                 return -EAGAIN;
8880
8881         while(1) {
8882                 int cur_err = 0;
8883
8884                 fixed = 0;
8885                 recorded = 0;
8886                 cache = search_cache_extent(extent_cache, 0);
8887                 if (!cache)
8888                         break;
8889                 rec = container_of(cache, struct extent_record, cache);
8890                 if (rec->num_duplicates) {
8891                         fprintf(stderr, "extent item %llu has multiple extent "
8892                                 "items\n", (unsigned long long)rec->start);
8893                         err = 1;
8894                         cur_err = 1;
8895                 }
8896
8897                 if (rec->refs != rec->extent_item_refs) {
8898                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8899                                 (unsigned long long)rec->start,
8900                                 (unsigned long long)rec->nr);
8901                         fprintf(stderr, "extent item %llu, found %llu\n",
8902                                 (unsigned long long)rec->extent_item_refs,
8903                                 (unsigned long long)rec->refs);
8904                         ret = record_orphan_data_extents(root->fs_info, rec);
8905                         if (ret < 0)
8906                                 goto repair_abort;
8907                         if (ret == 0) {
8908                                 recorded = 1;
8909                         } else {
8910                                 /*
8911                                  * we can't use the extent to repair file
8912                                  * extent, let the fallback method handle it.
8913                                  */
8914                                 if (!fixed && repair) {
8915                                         ret = fixup_extent_refs(
8916                                                         root->fs_info,
8917                                                         extent_cache, rec);
8918                                         if (ret)
8919                                                 goto repair_abort;
8920                                         fixed = 1;
8921                                 }
8922                         }
8923                         err = 1;
8924                         cur_err = 1;
8925                 }
8926                 if (all_backpointers_checked(rec, 1)) {
8927                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8928                                 (unsigned long long)rec->start,
8929                                 (unsigned long long)rec->nr);
8930
8931                         if (!fixed && !recorded && repair) {
8932                                 ret = fixup_extent_refs(root->fs_info,
8933                                                         extent_cache, rec);
8934                                 if (ret)
8935                                         goto repair_abort;
8936                                 fixed = 1;
8937                         }
8938                         cur_err = 1;
8939                         err = 1;
8940                 }
8941                 if (!rec->owner_ref_checked) {
8942                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8943                                 (unsigned long long)rec->start,
8944                                 (unsigned long long)rec->nr);
8945                         if (!fixed && !recorded && repair) {
8946                                 ret = fixup_extent_refs(root->fs_info,
8947                                                         extent_cache, rec);
8948                                 if (ret)
8949                                         goto repair_abort;
8950                                 fixed = 1;
8951                         }
8952                         err = 1;
8953                         cur_err = 1;
8954                 }
8955                 if (rec->bad_full_backref) {
8956                         fprintf(stderr, "bad full backref, on [%llu]\n",
8957                                 (unsigned long long)rec->start);
8958                         if (repair) {
8959                                 ret = fixup_extent_flags(root->fs_info, rec);
8960                                 if (ret)
8961                                         goto repair_abort;
8962                                 fixed = 1;
8963                         }
8964                         err = 1;
8965                         cur_err = 1;
8966                 }
8967                 /*
8968                  * Although it's not a extent ref's problem, we reuse this
8969                  * routine for error reporting.
8970                  * No repair function yet.
8971                  */
8972                 if (rec->crossing_stripes) {
8973                         fprintf(stderr,
8974                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8975                                 rec->start, rec->start + rec->max_size);
8976                         err = 1;
8977                         cur_err = 1;
8978                 }
8979
8980                 if (rec->wrong_chunk_type) {
8981                         fprintf(stderr,
8982                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8983                                 rec->start, rec->start + rec->max_size);
8984                         err = 1;
8985                         cur_err = 1;
8986                 }
8987
8988                 remove_cache_extent(extent_cache, cache);
8989                 free_all_extent_backrefs(rec);
8990                 if (!init_extent_tree && repair && (!cur_err || fixed))
8991                         clear_extent_dirty(root->fs_info->excluded_extents,
8992                                            rec->start,
8993                                            rec->start + rec->max_size - 1,
8994                                            GFP_NOFS);
8995                 free(rec);
8996         }
8997 repair_abort:
8998         if (repair) {
8999                 if (ret && ret != -EAGAIN) {
9000                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9001                         exit(1);
9002                 } else if (!ret) {
9003                         struct btrfs_trans_handle *trans;
9004
9005                         root = root->fs_info->extent_root;
9006                         trans = btrfs_start_transaction(root, 1);
9007                         if (IS_ERR(trans)) {
9008                                 ret = PTR_ERR(trans);
9009                                 goto repair_abort;
9010                         }
9011
9012                         btrfs_fix_block_accounting(trans, root);
9013                         ret = btrfs_commit_transaction(trans, root);
9014                         if (ret)
9015                                 goto repair_abort;
9016                 }
9017                 if (err)
9018                         fprintf(stderr, "repaired damaged extent references\n");
9019                 return ret;
9020         }
9021         return err;
9022 }
9023
9024 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9025 {
9026         u64 stripe_size;
9027
9028         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9029                 stripe_size = length;
9030                 stripe_size /= num_stripes;
9031         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9032                 stripe_size = length * 2;
9033                 stripe_size /= num_stripes;
9034         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9035                 stripe_size = length;
9036                 stripe_size /= (num_stripes - 1);
9037         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9038                 stripe_size = length;
9039                 stripe_size /= (num_stripes - 2);
9040         } else {
9041                 stripe_size = length;
9042         }
9043         return stripe_size;
9044 }
9045
9046 /*
9047  * Check the chunk with its block group/dev list ref:
9048  * Return 0 if all refs seems valid.
9049  * Return 1 if part of refs seems valid, need later check for rebuild ref
9050  * like missing block group and needs to search extent tree to rebuild them.
9051  * Return -1 if essential refs are missing and unable to rebuild.
9052  */
9053 static int check_chunk_refs(struct chunk_record *chunk_rec,
9054                             struct block_group_tree *block_group_cache,
9055                             struct device_extent_tree *dev_extent_cache,
9056                             int silent)
9057 {
9058         struct cache_extent *block_group_item;
9059         struct block_group_record *block_group_rec;
9060         struct cache_extent *dev_extent_item;
9061         struct device_extent_record *dev_extent_rec;
9062         u64 devid;
9063         u64 offset;
9064         u64 length;
9065         int metadump_v2 = 0;
9066         int i;
9067         int ret = 0;
9068
9069         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9070                                                chunk_rec->offset,
9071                                                chunk_rec->length);
9072         if (block_group_item) {
9073                 block_group_rec = container_of(block_group_item,
9074                                                struct block_group_record,
9075                                                cache);
9076                 if (chunk_rec->length != block_group_rec->offset ||
9077                     chunk_rec->offset != block_group_rec->objectid ||
9078                     (!metadump_v2 &&
9079                      chunk_rec->type_flags != block_group_rec->flags)) {
9080                         if (!silent)
9081                                 fprintf(stderr,
9082                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9083                                         chunk_rec->objectid,
9084                                         chunk_rec->type,
9085                                         chunk_rec->offset,
9086                                         chunk_rec->length,
9087                                         chunk_rec->offset,
9088                                         chunk_rec->type_flags,
9089                                         block_group_rec->objectid,
9090                                         block_group_rec->type,
9091                                         block_group_rec->offset,
9092                                         block_group_rec->offset,
9093                                         block_group_rec->objectid,
9094                                         block_group_rec->flags);
9095                         ret = -1;
9096                 } else {
9097                         list_del_init(&block_group_rec->list);
9098                         chunk_rec->bg_rec = block_group_rec;
9099                 }
9100         } else {
9101                 if (!silent)
9102                         fprintf(stderr,
9103                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9104                                 chunk_rec->objectid,
9105                                 chunk_rec->type,
9106                                 chunk_rec->offset,
9107                                 chunk_rec->length,
9108                                 chunk_rec->offset,
9109                                 chunk_rec->type_flags);
9110                 ret = 1;
9111         }
9112
9113         if (metadump_v2)
9114                 return ret;
9115
9116         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9117                                     chunk_rec->num_stripes);
9118         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9119                 devid = chunk_rec->stripes[i].devid;
9120                 offset = chunk_rec->stripes[i].offset;
9121                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9122                                                        devid, offset, length);
9123                 if (dev_extent_item) {
9124                         dev_extent_rec = container_of(dev_extent_item,
9125                                                 struct device_extent_record,
9126                                                 cache);
9127                         if (dev_extent_rec->objectid != devid ||
9128                             dev_extent_rec->offset != offset ||
9129                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9130                             dev_extent_rec->length != length) {
9131                                 if (!silent)
9132                                         fprintf(stderr,
9133                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9134                                                 chunk_rec->objectid,
9135                                                 chunk_rec->type,
9136                                                 chunk_rec->offset,
9137                                                 chunk_rec->stripes[i].devid,
9138                                                 chunk_rec->stripes[i].offset,
9139                                                 dev_extent_rec->objectid,
9140                                                 dev_extent_rec->offset,
9141                                                 dev_extent_rec->length);
9142                                 ret = -1;
9143                         } else {
9144                                 list_move(&dev_extent_rec->chunk_list,
9145                                           &chunk_rec->dextents);
9146                         }
9147                 } else {
9148                         if (!silent)
9149                                 fprintf(stderr,
9150                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9151                                         chunk_rec->objectid,
9152                                         chunk_rec->type,
9153                                         chunk_rec->offset,
9154                                         chunk_rec->stripes[i].devid,
9155                                         chunk_rec->stripes[i].offset);
9156                         ret = -1;
9157                 }
9158         }
9159         return ret;
9160 }
9161
9162 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9163 int check_chunks(struct cache_tree *chunk_cache,
9164                  struct block_group_tree *block_group_cache,
9165                  struct device_extent_tree *dev_extent_cache,
9166                  struct list_head *good, struct list_head *bad,
9167                  struct list_head *rebuild, int silent)
9168 {
9169         struct cache_extent *chunk_item;
9170         struct chunk_record *chunk_rec;
9171         struct block_group_record *bg_rec;
9172         struct device_extent_record *dext_rec;
9173         int err;
9174         int ret = 0;
9175
9176         chunk_item = first_cache_extent(chunk_cache);
9177         while (chunk_item) {
9178                 chunk_rec = container_of(chunk_item, struct chunk_record,
9179                                          cache);
9180                 err = check_chunk_refs(chunk_rec, block_group_cache,
9181                                        dev_extent_cache, silent);
9182                 if (err < 0)
9183                         ret = err;
9184                 if (err == 0 && good)
9185                         list_add_tail(&chunk_rec->list, good);
9186                 if (err > 0 && rebuild)
9187                         list_add_tail(&chunk_rec->list, rebuild);
9188                 if (err < 0 && bad)
9189                         list_add_tail(&chunk_rec->list, bad);
9190                 chunk_item = next_cache_extent(chunk_item);
9191         }
9192
9193         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9194                 if (!silent)
9195                         fprintf(stderr,
9196                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9197                                 bg_rec->objectid,
9198                                 bg_rec->offset,
9199                                 bg_rec->flags);
9200                 if (!ret)
9201                         ret = 1;
9202         }
9203
9204         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9205                             chunk_list) {
9206                 if (!silent)
9207                         fprintf(stderr,
9208                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9209                                 dext_rec->objectid,
9210                                 dext_rec->offset,
9211                                 dext_rec->length);
9212                 if (!ret)
9213                         ret = 1;
9214         }
9215         return ret;
9216 }
9217
9218
9219 static int check_device_used(struct device_record *dev_rec,
9220                              struct device_extent_tree *dext_cache)
9221 {
9222         struct cache_extent *cache;
9223         struct device_extent_record *dev_extent_rec;
9224         u64 total_byte = 0;
9225
9226         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9227         while (cache) {
9228                 dev_extent_rec = container_of(cache,
9229                                               struct device_extent_record,
9230                                               cache);
9231                 if (dev_extent_rec->objectid != dev_rec->devid)
9232                         break;
9233
9234                 list_del_init(&dev_extent_rec->device_list);
9235                 total_byte += dev_extent_rec->length;
9236                 cache = next_cache_extent(cache);
9237         }
9238
9239         if (total_byte != dev_rec->byte_used) {
9240                 fprintf(stderr,
9241                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9242                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9243                         dev_rec->type, dev_rec->offset);
9244                 return -1;
9245         } else {
9246                 return 0;
9247         }
9248 }
9249
9250 /* check btrfs_dev_item -> btrfs_dev_extent */
9251 static int check_devices(struct rb_root *dev_cache,
9252                          struct device_extent_tree *dev_extent_cache)
9253 {
9254         struct rb_node *dev_node;
9255         struct device_record *dev_rec;
9256         struct device_extent_record *dext_rec;
9257         int err;
9258         int ret = 0;
9259
9260         dev_node = rb_first(dev_cache);
9261         while (dev_node) {
9262                 dev_rec = container_of(dev_node, struct device_record, node);
9263                 err = check_device_used(dev_rec, dev_extent_cache);
9264                 if (err)
9265                         ret = err;
9266
9267                 dev_node = rb_next(dev_node);
9268         }
9269         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9270                             device_list) {
9271                 fprintf(stderr,
9272                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9273                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9274                 if (!ret)
9275                         ret = 1;
9276         }
9277         return ret;
9278 }
9279
9280 static int add_root_item_to_list(struct list_head *head,
9281                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9282                                   u8 level, u8 drop_level,
9283                                   int level_size, struct btrfs_key *drop_key)
9284 {
9285
9286         struct root_item_record *ri_rec;
9287         ri_rec = malloc(sizeof(*ri_rec));
9288         if (!ri_rec)
9289                 return -ENOMEM;
9290         ri_rec->bytenr = bytenr;
9291         ri_rec->objectid = objectid;
9292         ri_rec->level = level;
9293         ri_rec->level_size = level_size;
9294         ri_rec->drop_level = drop_level;
9295         ri_rec->last_snapshot = last_snapshot;
9296         if (drop_key)
9297                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9298         list_add_tail(&ri_rec->list, head);
9299
9300         return 0;
9301 }
9302
9303 static void free_root_item_list(struct list_head *list)
9304 {
9305         struct root_item_record *ri_rec;
9306
9307         while (!list_empty(list)) {
9308                 ri_rec = list_first_entry(list, struct root_item_record,
9309                                           list);
9310                 list_del_init(&ri_rec->list);
9311                 free(ri_rec);
9312         }
9313 }
9314
9315 static int deal_root_from_list(struct list_head *list,
9316                                struct btrfs_root *root,
9317                                struct block_info *bits,
9318                                int bits_nr,
9319                                struct cache_tree *pending,
9320                                struct cache_tree *seen,
9321                                struct cache_tree *reada,
9322                                struct cache_tree *nodes,
9323                                struct cache_tree *extent_cache,
9324                                struct cache_tree *chunk_cache,
9325                                struct rb_root *dev_cache,
9326                                struct block_group_tree *block_group_cache,
9327                                struct device_extent_tree *dev_extent_cache)
9328 {
9329         int ret = 0;
9330         u64 last;
9331
9332         while (!list_empty(list)) {
9333                 struct root_item_record *rec;
9334                 struct extent_buffer *buf;
9335                 rec = list_entry(list->next,
9336                                  struct root_item_record, list);
9337                 last = 0;
9338                 buf = read_tree_block(root->fs_info->tree_root,
9339                                       rec->bytenr, rec->level_size, 0);
9340                 if (!extent_buffer_uptodate(buf)) {
9341                         free_extent_buffer(buf);
9342                         ret = -EIO;
9343                         break;
9344                 }
9345                 ret = add_root_to_pending(buf, extent_cache, pending,
9346                                     seen, nodes, rec->objectid);
9347                 if (ret < 0)
9348                         break;
9349                 /*
9350                  * To rebuild extent tree, we need deal with snapshot
9351                  * one by one, otherwise we deal with node firstly which
9352                  * can maximize readahead.
9353                  */
9354                 while (1) {
9355                         ret = run_next_block(root, bits, bits_nr, &last,
9356                                              pending, seen, reada, nodes,
9357                                              extent_cache, chunk_cache,
9358                                              dev_cache, block_group_cache,
9359                                              dev_extent_cache, rec);
9360                         if (ret != 0)
9361                                 break;
9362                 }
9363                 free_extent_buffer(buf);
9364                 list_del(&rec->list);
9365                 free(rec);
9366                 if (ret < 0)
9367                         break;
9368         }
9369         while (ret >= 0) {
9370                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9371                                      reada, nodes, extent_cache, chunk_cache,
9372                                      dev_cache, block_group_cache,
9373                                      dev_extent_cache, NULL);
9374                 if (ret != 0) {
9375                         if (ret > 0)
9376                                 ret = 0;
9377                         break;
9378                 }
9379         }
9380         return ret;
9381 }
9382
9383 static int check_chunks_and_extents(struct btrfs_root *root)
9384 {
9385         struct rb_root dev_cache;
9386         struct cache_tree chunk_cache;
9387         struct block_group_tree block_group_cache;
9388         struct device_extent_tree dev_extent_cache;
9389         struct cache_tree extent_cache;
9390         struct cache_tree seen;
9391         struct cache_tree pending;
9392         struct cache_tree reada;
9393         struct cache_tree nodes;
9394         struct extent_io_tree excluded_extents;
9395         struct cache_tree corrupt_blocks;
9396         struct btrfs_path path;
9397         struct btrfs_key key;
9398         struct btrfs_key found_key;
9399         int ret, err = 0;
9400         struct block_info *bits;
9401         int bits_nr;
9402         struct extent_buffer *leaf;
9403         int slot;
9404         struct btrfs_root_item ri;
9405         struct list_head dropping_trees;
9406         struct list_head normal_trees;
9407         struct btrfs_root *root1;
9408         u64 objectid;
9409         u32 level_size;
9410         u8 level;
9411
9412         dev_cache = RB_ROOT;
9413         cache_tree_init(&chunk_cache);
9414         block_group_tree_init(&block_group_cache);
9415         device_extent_tree_init(&dev_extent_cache);
9416
9417         cache_tree_init(&extent_cache);
9418         cache_tree_init(&seen);
9419         cache_tree_init(&pending);
9420         cache_tree_init(&nodes);
9421         cache_tree_init(&reada);
9422         cache_tree_init(&corrupt_blocks);
9423         extent_io_tree_init(&excluded_extents);
9424         INIT_LIST_HEAD(&dropping_trees);
9425         INIT_LIST_HEAD(&normal_trees);
9426
9427         if (repair) {
9428                 root->fs_info->excluded_extents = &excluded_extents;
9429                 root->fs_info->fsck_extent_cache = &extent_cache;
9430                 root->fs_info->free_extent_hook = free_extent_hook;
9431                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9432         }
9433
9434         bits_nr = 1024;
9435         bits = malloc(bits_nr * sizeof(struct block_info));
9436         if (!bits) {
9437                 perror("malloc");
9438                 exit(1);
9439         }
9440
9441         if (ctx.progress_enabled) {
9442                 ctx.tp = TASK_EXTENTS;
9443                 task_start(ctx.info);
9444         }
9445
9446 again:
9447         root1 = root->fs_info->tree_root;
9448         level = btrfs_header_level(root1->node);
9449         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9450                                     root1->node->start, 0, level, 0,
9451                                     root1->nodesize, NULL);
9452         if (ret < 0)
9453                 goto out;
9454         root1 = root->fs_info->chunk_root;
9455         level = btrfs_header_level(root1->node);
9456         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9457                                     root1->node->start, 0, level, 0,
9458                                     root1->nodesize, NULL);
9459         if (ret < 0)
9460                 goto out;
9461         btrfs_init_path(&path);
9462         key.offset = 0;
9463         key.objectid = 0;
9464         key.type = BTRFS_ROOT_ITEM_KEY;
9465         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9466                                         &key, &path, 0, 0);
9467         if (ret < 0)
9468                 goto out;
9469         while(1) {
9470                 leaf = path.nodes[0];
9471                 slot = path.slots[0];
9472                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9473                         ret = btrfs_next_leaf(root, &path);
9474                         if (ret != 0)
9475                                 break;
9476                         leaf = path.nodes[0];
9477                         slot = path.slots[0];
9478                 }
9479                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9480                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9481                         unsigned long offset;
9482                         u64 last_snapshot;
9483
9484                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9485                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9486                         last_snapshot = btrfs_root_last_snapshot(&ri);
9487                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9488                                 level = btrfs_root_level(&ri);
9489                                 level_size = root->nodesize;
9490                                 ret = add_root_item_to_list(&normal_trees,
9491                                                 found_key.objectid,
9492                                                 btrfs_root_bytenr(&ri),
9493                                                 last_snapshot, level,
9494                                                 0, level_size, NULL);
9495                                 if (ret < 0)
9496                                         goto out;
9497                         } else {
9498                                 level = btrfs_root_level(&ri);
9499                                 level_size = root->nodesize;
9500                                 objectid = found_key.objectid;
9501                                 btrfs_disk_key_to_cpu(&found_key,
9502                                                       &ri.drop_progress);
9503                                 ret = add_root_item_to_list(&dropping_trees,
9504                                                 objectid,
9505                                                 btrfs_root_bytenr(&ri),
9506                                                 last_snapshot, level,
9507                                                 ri.drop_level,
9508                                                 level_size, &found_key);
9509                                 if (ret < 0)
9510                                         goto out;
9511                         }
9512                 }
9513                 path.slots[0]++;
9514         }
9515         btrfs_release_path(&path);
9516
9517         /*
9518          * check_block can return -EAGAIN if it fixes something, please keep
9519          * this in mind when dealing with return values from these functions, if
9520          * we get -EAGAIN we want to fall through and restart the loop.
9521          */
9522         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9523                                   &seen, &reada, &nodes, &extent_cache,
9524                                   &chunk_cache, &dev_cache, &block_group_cache,
9525                                   &dev_extent_cache);
9526         if (ret < 0) {
9527                 if (ret == -EAGAIN)
9528                         goto loop;
9529                 goto out;
9530         }
9531         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9532                                   &pending, &seen, &reada, &nodes,
9533                                   &extent_cache, &chunk_cache, &dev_cache,
9534                                   &block_group_cache, &dev_extent_cache);
9535         if (ret < 0) {
9536                 if (ret == -EAGAIN)
9537                         goto loop;
9538                 goto out;
9539         }
9540
9541         ret = check_chunks(&chunk_cache, &block_group_cache,
9542                            &dev_extent_cache, NULL, NULL, NULL, 0);
9543         if (ret) {
9544                 if (ret == -EAGAIN)
9545                         goto loop;
9546                 err = ret;
9547         }
9548
9549         ret = check_extent_refs(root, &extent_cache);
9550         if (ret < 0) {
9551                 if (ret == -EAGAIN)
9552                         goto loop;
9553                 goto out;
9554         }
9555
9556         ret = check_devices(&dev_cache, &dev_extent_cache);
9557         if (ret && err)
9558                 ret = err;
9559
9560 out:
9561         task_stop(ctx.info);
9562         if (repair) {
9563                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9564                 extent_io_tree_cleanup(&excluded_extents);
9565                 root->fs_info->fsck_extent_cache = NULL;
9566                 root->fs_info->free_extent_hook = NULL;
9567                 root->fs_info->corrupt_blocks = NULL;
9568                 root->fs_info->excluded_extents = NULL;
9569         }
9570         free(bits);
9571         free_chunk_cache_tree(&chunk_cache);
9572         free_device_cache_tree(&dev_cache);
9573         free_block_group_tree(&block_group_cache);
9574         free_device_extent_tree(&dev_extent_cache);
9575         free_extent_cache_tree(&seen);
9576         free_extent_cache_tree(&pending);
9577         free_extent_cache_tree(&reada);
9578         free_extent_cache_tree(&nodes);
9579         return ret;
9580 loop:
9581         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9582         free_extent_cache_tree(&seen);
9583         free_extent_cache_tree(&pending);
9584         free_extent_cache_tree(&reada);
9585         free_extent_cache_tree(&nodes);
9586         free_chunk_cache_tree(&chunk_cache);
9587         free_block_group_tree(&block_group_cache);
9588         free_device_cache_tree(&dev_cache);
9589         free_device_extent_tree(&dev_extent_cache);
9590         free_extent_record_cache(root->fs_info, &extent_cache);
9591         free_root_item_list(&normal_trees);
9592         free_root_item_list(&dropping_trees);
9593         extent_io_tree_cleanup(&excluded_extents);
9594         goto again;
9595 }
9596
9597 /*
9598  * Check backrefs of a tree block given by @bytenr or @eb.
9599  *
9600  * @root:       the root containing the @bytenr or @eb
9601  * @eb:         tree block extent buffer, can be NULL
9602  * @bytenr:     bytenr of the tree block to search
9603  * @level:      tree level of the tree block
9604  * @owner:      owner of the tree block
9605  *
9606  * Return >0 for any error found and output error message
9607  * Return 0 for no error found
9608  */
9609 static int check_tree_block_ref(struct btrfs_root *root,
9610                                 struct extent_buffer *eb, u64 bytenr,
9611                                 int level, u64 owner)
9612 {
9613         struct btrfs_key key;
9614         struct btrfs_root *extent_root = root->fs_info->extent_root;
9615         struct btrfs_path path;
9616         struct btrfs_extent_item *ei;
9617         struct btrfs_extent_inline_ref *iref;
9618         struct extent_buffer *leaf;
9619         unsigned long end;
9620         unsigned long ptr;
9621         int slot;
9622         int skinny_level;
9623         int type;
9624         u32 nodesize = root->nodesize;
9625         u32 item_size;
9626         u64 offset;
9627         int found_ref = 0;
9628         int err = 0;
9629         int ret;
9630
9631         btrfs_init_path(&path);
9632         key.objectid = bytenr;
9633         if (btrfs_fs_incompat(root->fs_info,
9634                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9635                 key.type = BTRFS_METADATA_ITEM_KEY;
9636         else
9637                 key.type = BTRFS_EXTENT_ITEM_KEY;
9638         key.offset = (u64)-1;
9639
9640         /* Search for the backref in extent tree */
9641         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9642         if (ret < 0) {
9643                 err |= BACKREF_MISSING;
9644                 goto out;
9645         }
9646         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9647         if (ret) {
9648                 err |= BACKREF_MISSING;
9649                 goto out;
9650         }
9651
9652         leaf = path.nodes[0];
9653         slot = path.slots[0];
9654         btrfs_item_key_to_cpu(leaf, &key, slot);
9655
9656         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9657
9658         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9659                 skinny_level = (int)key.offset;
9660                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9661         } else {
9662                 struct btrfs_tree_block_info *info;
9663
9664                 info = (struct btrfs_tree_block_info *)(ei + 1);
9665                 skinny_level = btrfs_tree_block_level(leaf, info);
9666                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9667         }
9668
9669         if (eb) {
9670                 u64 header_gen;
9671                 u64 extent_gen;
9672
9673                 if (!(btrfs_extent_flags(leaf, ei) &
9674                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9675                         error(
9676                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9677                                 key.objectid, nodesize,
9678                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9679                         err = BACKREF_MISMATCH;
9680                 }
9681                 header_gen = btrfs_header_generation(eb);
9682                 extent_gen = btrfs_extent_generation(leaf, ei);
9683                 if (header_gen != extent_gen) {
9684                         error(
9685         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9686                                 key.objectid, nodesize, header_gen,
9687                                 extent_gen);
9688                         err = BACKREF_MISMATCH;
9689                 }
9690                 if (level != skinny_level) {
9691                         error(
9692                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9693                                 key.objectid, nodesize, level, skinny_level);
9694                         err = BACKREF_MISMATCH;
9695                 }
9696                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9697                         error(
9698                         "extent[%llu %u] is referred by other roots than %llu",
9699                                 key.objectid, nodesize, root->objectid);
9700                         err = BACKREF_MISMATCH;
9701                 }
9702         }
9703
9704         /*
9705          * Iterate the extent/metadata item to find the exact backref
9706          */
9707         item_size = btrfs_item_size_nr(leaf, slot);
9708         ptr = (unsigned long)iref;
9709         end = (unsigned long)ei + item_size;
9710         while (ptr < end) {
9711                 iref = (struct btrfs_extent_inline_ref *)ptr;
9712                 type = btrfs_extent_inline_ref_type(leaf, iref);
9713                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9714
9715                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9716                         (offset == root->objectid || offset == owner)) {
9717                         found_ref = 1;
9718                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9719                         /* Check if the backref points to valid referencer */
9720                         found_ref = !check_tree_block_ref(root, NULL, offset,
9721                                                           level + 1, owner);
9722                 }
9723
9724                 if (found_ref)
9725                         break;
9726                 ptr += btrfs_extent_inline_ref_size(type);
9727         }
9728
9729         /*
9730          * Inlined extent item doesn't have what we need, check
9731          * TREE_BLOCK_REF_KEY
9732          */
9733         if (!found_ref) {
9734                 btrfs_release_path(&path);
9735                 key.objectid = bytenr;
9736                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9737                 key.offset = root->objectid;
9738
9739                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9740                 if (!ret)
9741                         found_ref = 1;
9742         }
9743         if (!found_ref)
9744                 err |= BACKREF_MISSING;
9745 out:
9746         btrfs_release_path(&path);
9747         if (eb && (err & BACKREF_MISSING))
9748                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9749                         bytenr, nodesize, owner, level);
9750         return err;
9751 }
9752
9753 /*
9754  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9755  *
9756  * Return >0 any error found and output error message
9757  * Return 0 for no error found
9758  */
9759 static int check_extent_data_item(struct btrfs_root *root,
9760                                   struct extent_buffer *eb, int slot)
9761 {
9762         struct btrfs_file_extent_item *fi;
9763         struct btrfs_path path;
9764         struct btrfs_root *extent_root = root->fs_info->extent_root;
9765         struct btrfs_key fi_key;
9766         struct btrfs_key dbref_key;
9767         struct extent_buffer *leaf;
9768         struct btrfs_extent_item *ei;
9769         struct btrfs_extent_inline_ref *iref;
9770         struct btrfs_extent_data_ref *dref;
9771         u64 owner;
9772         u64 file_extent_gen;
9773         u64 disk_bytenr;
9774         u64 disk_num_bytes;
9775         u64 extent_num_bytes;
9776         u64 extent_flags;
9777         u64 extent_gen;
9778         u32 item_size;
9779         unsigned long end;
9780         unsigned long ptr;
9781         int type;
9782         u64 ref_root;
9783         int found_dbackref = 0;
9784         int err = 0;
9785         int ret;
9786
9787         btrfs_item_key_to_cpu(eb, &fi_key, slot);
9788         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9789         file_extent_gen = btrfs_file_extent_generation(eb, fi);
9790
9791         /* Nothing to check for hole and inline data extents */
9792         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9793             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9794                 return 0;
9795
9796         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9797         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9798         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9799
9800         /* Check unaligned disk_num_bytes and num_bytes */
9801         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9802                 error(
9803 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9804                         fi_key.objectid, fi_key.offset, disk_num_bytes,
9805                         root->sectorsize);
9806                 err |= BYTES_UNALIGNED;
9807         } else {
9808                 data_bytes_allocated += disk_num_bytes;
9809         }
9810         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9811                 error(
9812 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9813                         fi_key.objectid, fi_key.offset, extent_num_bytes,
9814                         root->sectorsize);
9815                 err |= BYTES_UNALIGNED;
9816         } else {
9817                 data_bytes_referenced += extent_num_bytes;
9818         }
9819         owner = btrfs_header_owner(eb);
9820
9821         /* Check the extent item of the file extent in extent tree */
9822         btrfs_init_path(&path);
9823         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9824         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9825         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9826
9827         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9828         if (ret) {
9829                 err |= BACKREF_MISSING;
9830                 goto error;
9831         }
9832
9833         leaf = path.nodes[0];
9834         slot = path.slots[0];
9835         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9836
9837         extent_flags = btrfs_extent_flags(leaf, ei);
9838         extent_gen = btrfs_extent_generation(leaf, ei);
9839
9840         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9841                 error(
9842                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9843                     disk_bytenr, disk_num_bytes,
9844                     BTRFS_EXTENT_FLAG_DATA);
9845                 err |= BACKREF_MISMATCH;
9846         }
9847
9848         if (file_extent_gen < extent_gen) {
9849                 error(
9850 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9851                         disk_bytenr, disk_num_bytes, file_extent_gen,
9852                         extent_gen);
9853                 err |= BACKREF_MISMATCH;
9854         }
9855
9856         /* Check data backref inside that extent item */
9857         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9858         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9859         ptr = (unsigned long)iref;
9860         end = (unsigned long)ei + item_size;
9861         while (ptr < end) {
9862                 iref = (struct btrfs_extent_inline_ref *)ptr;
9863                 type = btrfs_extent_inline_ref_type(leaf, iref);
9864                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9865
9866                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9867                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9868                         if (ref_root == owner || ref_root == root->objectid)
9869                                 found_dbackref = 1;
9870                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9871                         found_dbackref = !check_tree_block_ref(root, NULL,
9872                                 btrfs_extent_inline_ref_offset(leaf, iref),
9873                                 0, owner);
9874                 }
9875
9876                 if (found_dbackref)
9877                         break;
9878                 ptr += btrfs_extent_inline_ref_size(type);
9879         }
9880
9881         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9882         if (!found_dbackref) {
9883                 btrfs_release_path(&path);
9884
9885                 btrfs_init_path(&path);
9886                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9887                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9888                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9889                                 fi_key.objectid, fi_key.offset);
9890
9891                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9892                                         &dbref_key, &path, 0, 0);
9893                 if (!ret)
9894                         found_dbackref = 1;
9895         }
9896
9897         if (!found_dbackref)
9898                 err |= BACKREF_MISSING;
9899 error:
9900         btrfs_release_path(&path);
9901         if (err & BACKREF_MISSING) {
9902                 error("data extent[%llu %llu] backref lost",
9903                       disk_bytenr, disk_num_bytes);
9904         }
9905         return err;
9906 }
9907
9908 /*
9909  * Get real tree block level for the case like shared block
9910  * Return >= 0 as tree level
9911  * Return <0 for error
9912  */
9913 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9914 {
9915         struct extent_buffer *eb;
9916         struct btrfs_path path;
9917         struct btrfs_key key;
9918         struct btrfs_extent_item *ei;
9919         u64 flags;
9920         u64 transid;
9921         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9922         u8 backref_level;
9923         u8 header_level;
9924         int ret;
9925
9926         /* Search extent tree for extent generation and level */
9927         key.objectid = bytenr;
9928         key.type = BTRFS_METADATA_ITEM_KEY;
9929         key.offset = (u64)-1;
9930
9931         btrfs_init_path(&path);
9932         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9933         if (ret < 0)
9934                 goto release_out;
9935         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9936         if (ret < 0)
9937                 goto release_out;
9938         if (ret > 0) {
9939                 ret = -ENOENT;
9940                 goto release_out;
9941         }
9942
9943         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9944         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9945                             struct btrfs_extent_item);
9946         flags = btrfs_extent_flags(path.nodes[0], ei);
9947         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9948                 ret = -ENOENT;
9949                 goto release_out;
9950         }
9951
9952         /* Get transid for later read_tree_block() check */
9953         transid = btrfs_extent_generation(path.nodes[0], ei);
9954
9955         /* Get backref level as one source */
9956         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9957                 backref_level = key.offset;
9958         } else {
9959                 struct btrfs_tree_block_info *info;
9960
9961                 info = (struct btrfs_tree_block_info *)(ei + 1);
9962                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9963         }
9964         btrfs_release_path(&path);
9965
9966         /* Get level from tree block as an alternative source */
9967         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9968         if (!extent_buffer_uptodate(eb)) {
9969                 free_extent_buffer(eb);
9970                 return -EIO;
9971         }
9972         header_level = btrfs_header_level(eb);
9973         free_extent_buffer(eb);
9974
9975         if (header_level != backref_level)
9976                 return -EIO;
9977         return header_level;
9978
9979 release_out:
9980         btrfs_release_path(&path);
9981         return ret;
9982 }
9983
9984 /*
9985  * Check if a tree block backref is valid (points to a valid tree block)
9986  * if level == -1, level will be resolved
9987  * Return >0 for any error found and print error message
9988  */
9989 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9990                                     u64 bytenr, int level)
9991 {
9992         struct btrfs_root *root;
9993         struct btrfs_key key;
9994         struct btrfs_path path;
9995         struct extent_buffer *eb;
9996         struct extent_buffer *node;
9997         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9998         int err = 0;
9999         int ret;
10000
10001         /* Query level for level == -1 special case */
10002         if (level == -1)
10003                 level = query_tree_block_level(fs_info, bytenr);
10004         if (level < 0) {
10005                 err |= REFERENCER_MISSING;
10006                 goto out;
10007         }
10008
10009         key.objectid = root_id;
10010         key.type = BTRFS_ROOT_ITEM_KEY;
10011         key.offset = (u64)-1;
10012
10013         root = btrfs_read_fs_root(fs_info, &key);
10014         if (IS_ERR(root)) {
10015                 err |= REFERENCER_MISSING;
10016                 goto out;
10017         }
10018
10019         /* Read out the tree block to get item/node key */
10020         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10021         if (!extent_buffer_uptodate(eb)) {
10022                 err |= REFERENCER_MISSING;
10023                 free_extent_buffer(eb);
10024                 goto out;
10025         }
10026
10027         /* Empty tree, no need to check key */
10028         if (!btrfs_header_nritems(eb) && !level) {
10029                 free_extent_buffer(eb);
10030                 goto out;
10031         }
10032
10033         if (level)
10034                 btrfs_node_key_to_cpu(eb, &key, 0);
10035         else
10036                 btrfs_item_key_to_cpu(eb, &key, 0);
10037
10038         free_extent_buffer(eb);
10039
10040         btrfs_init_path(&path);
10041         path.lowest_level = level;
10042         /* Search with the first key, to ensure we can reach it */
10043         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10044         if (ret < 0) {
10045                 err |= REFERENCER_MISSING;
10046                 goto release_out;
10047         }
10048
10049         node = path.nodes[level];
10050         if (btrfs_header_bytenr(node) != bytenr) {
10051                 error(
10052         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10053                         bytenr, nodesize, bytenr,
10054                         btrfs_header_bytenr(node));
10055                 err |= REFERENCER_MISMATCH;
10056         }
10057         if (btrfs_header_level(node) != level) {
10058                 error(
10059         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10060                         bytenr, nodesize, level,
10061                         btrfs_header_level(node));
10062                 err |= REFERENCER_MISMATCH;
10063         }
10064
10065 release_out:
10066         btrfs_release_path(&path);
10067 out:
10068         if (err & REFERENCER_MISSING) {
10069                 if (level < 0)
10070                         error("extent [%llu %d] lost referencer (owner: %llu)",
10071                                 bytenr, nodesize, root_id);
10072                 else
10073                         error(
10074                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10075                                 bytenr, nodesize, root_id, level);
10076         }
10077
10078         return err;
10079 }
10080
10081 /*
10082  * Check referencer for shared block backref
10083  * If level == -1, this function will resolve the level.
10084  */
10085 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10086                                      u64 parent, u64 bytenr, int level)
10087 {
10088         struct extent_buffer *eb;
10089         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10090         u32 nr;
10091         int found_parent = 0;
10092         int i;
10093
10094         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10095         if (!extent_buffer_uptodate(eb))
10096                 goto out;
10097
10098         if (level == -1)
10099                 level = query_tree_block_level(fs_info, bytenr);
10100         if (level < 0)
10101                 goto out;
10102
10103         if (level + 1 != btrfs_header_level(eb))
10104                 goto out;
10105
10106         nr = btrfs_header_nritems(eb);
10107         for (i = 0; i < nr; i++) {
10108                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10109                         found_parent = 1;
10110                         break;
10111                 }
10112         }
10113 out:
10114         free_extent_buffer(eb);
10115         if (!found_parent) {
10116                 error(
10117         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10118                         bytenr, nodesize, parent, level);
10119                 return REFERENCER_MISSING;
10120         }
10121         return 0;
10122 }
10123
10124 /*
10125  * Check referencer for normal (inlined) data ref
10126  * If len == 0, it will be resolved by searching in extent tree
10127  */
10128 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10129                                      u64 root_id, u64 objectid, u64 offset,
10130                                      u64 bytenr, u64 len, u32 count)
10131 {
10132         struct btrfs_root *root;
10133         struct btrfs_root *extent_root = fs_info->extent_root;
10134         struct btrfs_key key;
10135         struct btrfs_path path;
10136         struct extent_buffer *leaf;
10137         struct btrfs_file_extent_item *fi;
10138         u32 found_count = 0;
10139         int slot;
10140         int ret = 0;
10141
10142         if (!len) {
10143                 key.objectid = bytenr;
10144                 key.type = BTRFS_EXTENT_ITEM_KEY;
10145                 key.offset = (u64)-1;
10146
10147                 btrfs_init_path(&path);
10148                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10149                 if (ret < 0)
10150                         goto out;
10151                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10152                 if (ret)
10153                         goto out;
10154                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10155                 if (key.objectid != bytenr ||
10156                     key.type != BTRFS_EXTENT_ITEM_KEY)
10157                         goto out;
10158                 len = key.offset;
10159                 btrfs_release_path(&path);
10160         }
10161         key.objectid = root_id;
10162         key.type = BTRFS_ROOT_ITEM_KEY;
10163         key.offset = (u64)-1;
10164         btrfs_init_path(&path);
10165
10166         root = btrfs_read_fs_root(fs_info, &key);
10167         if (IS_ERR(root))
10168                 goto out;
10169
10170         key.objectid = objectid;
10171         key.type = BTRFS_EXTENT_DATA_KEY;
10172         /*
10173          * It can be nasty as data backref offset is
10174          * file offset - file extent offset, which is smaller or
10175          * equal to original backref offset.  The only special case is
10176          * overflow.  So we need to special check and do further search.
10177          */
10178         key.offset = offset & (1ULL << 63) ? 0 : offset;
10179
10180         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10181         if (ret < 0)
10182                 goto out;
10183
10184         /*
10185          * Search afterwards to get correct one
10186          * NOTE: As we must do a comprehensive check on the data backref to
10187          * make sure the dref count also matches, we must iterate all file
10188          * extents for that inode.
10189          */
10190         while (1) {
10191                 leaf = path.nodes[0];
10192                 slot = path.slots[0];
10193
10194                 btrfs_item_key_to_cpu(leaf, &key, slot);
10195                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10196                         break;
10197                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10198                 /*
10199                  * Except normal disk bytenr and disk num bytes, we still
10200                  * need to do extra check on dbackref offset as
10201                  * dbackref offset = file_offset - file_extent_offset
10202                  */
10203                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10204                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10205                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10206                     offset)
10207                         found_count++;
10208
10209                 ret = btrfs_next_item(root, &path);
10210                 if (ret)
10211                         break;
10212         }
10213 out:
10214         btrfs_release_path(&path);
10215         if (found_count != count) {
10216                 error(
10217 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10218                         bytenr, len, root_id, objectid, offset, count, found_count);
10219                 return REFERENCER_MISSING;
10220         }
10221         return 0;
10222 }
10223
10224 /*
10225  * Check if the referencer of a shared data backref exists
10226  */
10227 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10228                                      u64 parent, u64 bytenr)
10229 {
10230         struct extent_buffer *eb;
10231         struct btrfs_key key;
10232         struct btrfs_file_extent_item *fi;
10233         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10234         u32 nr;
10235         int found_parent = 0;
10236         int i;
10237
10238         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10239         if (!extent_buffer_uptodate(eb))
10240                 goto out;
10241
10242         nr = btrfs_header_nritems(eb);
10243         for (i = 0; i < nr; i++) {
10244                 btrfs_item_key_to_cpu(eb, &key, i);
10245                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10246                         continue;
10247
10248                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10249                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10250                         continue;
10251
10252                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10253                         found_parent = 1;
10254                         break;
10255                 }
10256         }
10257
10258 out:
10259         free_extent_buffer(eb);
10260         if (!found_parent) {
10261                 error("shared extent %llu referencer lost (parent: %llu)",
10262                         bytenr, parent);
10263                 return REFERENCER_MISSING;
10264         }
10265         return 0;
10266 }
10267
10268 /*
10269  * This function will check a given extent item, including its backref and
10270  * itself (like crossing stripe boundary and type)
10271  *
10272  * Since we don't use extent_record anymore, introduce new error bit
10273  */
10274 static int check_extent_item(struct btrfs_fs_info *fs_info,
10275                              struct extent_buffer *eb, int slot)
10276 {
10277         struct btrfs_extent_item *ei;
10278         struct btrfs_extent_inline_ref *iref;
10279         struct btrfs_extent_data_ref *dref;
10280         unsigned long end;
10281         unsigned long ptr;
10282         int type;
10283         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10284         u32 item_size = btrfs_item_size_nr(eb, slot);
10285         u64 flags;
10286         u64 offset;
10287         int metadata = 0;
10288         int level;
10289         struct btrfs_key key;
10290         int ret;
10291         int err = 0;
10292
10293         btrfs_item_key_to_cpu(eb, &key, slot);
10294         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10295                 bytes_used += key.offset;
10296         else
10297                 bytes_used += nodesize;
10298
10299         if (item_size < sizeof(*ei)) {
10300                 /*
10301                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10302                  * old thing when on disk format is still un-determined.
10303                  * No need to care about it anymore
10304                  */
10305                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10306                 return -ENOTTY;
10307         }
10308
10309         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10310         flags = btrfs_extent_flags(eb, ei);
10311
10312         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10313                 metadata = 1;
10314         if (metadata && check_crossing_stripes(global_info, key.objectid,
10315                                                eb->len)) {
10316                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10317                       key.objectid, key.objectid + nodesize);
10318                 err |= CROSSING_STRIPE_BOUNDARY;
10319         }
10320
10321         ptr = (unsigned long)(ei + 1);
10322
10323         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10324                 /* Old EXTENT_ITEM metadata */
10325                 struct btrfs_tree_block_info *info;
10326
10327                 info = (struct btrfs_tree_block_info *)ptr;
10328                 level = btrfs_tree_block_level(eb, info);
10329                 ptr += sizeof(struct btrfs_tree_block_info);
10330         } else {
10331                 /* New METADATA_ITEM */
10332                 level = key.offset;
10333         }
10334         end = (unsigned long)ei + item_size;
10335
10336         if (ptr >= end) {
10337                 err |= ITEM_SIZE_MISMATCH;
10338                 goto out;
10339         }
10340
10341         /* Now check every backref in this extent item */
10342 next:
10343         iref = (struct btrfs_extent_inline_ref *)ptr;
10344         type = btrfs_extent_inline_ref_type(eb, iref);
10345         offset = btrfs_extent_inline_ref_offset(eb, iref);
10346         switch (type) {
10347         case BTRFS_TREE_BLOCK_REF_KEY:
10348                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10349                                                level);
10350                 err |= ret;
10351                 break;
10352         case BTRFS_SHARED_BLOCK_REF_KEY:
10353                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10354                                                  level);
10355                 err |= ret;
10356                 break;
10357         case BTRFS_EXTENT_DATA_REF_KEY:
10358                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10359                 ret = check_extent_data_backref(fs_info,
10360                                 btrfs_extent_data_ref_root(eb, dref),
10361                                 btrfs_extent_data_ref_objectid(eb, dref),
10362                                 btrfs_extent_data_ref_offset(eb, dref),
10363                                 key.objectid, key.offset,
10364                                 btrfs_extent_data_ref_count(eb, dref));
10365                 err |= ret;
10366                 break;
10367         case BTRFS_SHARED_DATA_REF_KEY:
10368                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10369                 err |= ret;
10370                 break;
10371         default:
10372                 error("extent[%llu %d %llu] has unknown ref type: %d",
10373                         key.objectid, key.type, key.offset, type);
10374                 err |= UNKNOWN_TYPE;
10375                 goto out;
10376         }
10377
10378         ptr += btrfs_extent_inline_ref_size(type);
10379         if (ptr < end)
10380                 goto next;
10381
10382 out:
10383         return err;
10384 }
10385
10386 /*
10387  * Check if a dev extent item is referred correctly by its chunk
10388  */
10389 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10390                                  struct extent_buffer *eb, int slot)
10391 {
10392         struct btrfs_root *chunk_root = fs_info->chunk_root;
10393         struct btrfs_dev_extent *ptr;
10394         struct btrfs_path path;
10395         struct btrfs_key chunk_key;
10396         struct btrfs_key devext_key;
10397         struct btrfs_chunk *chunk;
10398         struct extent_buffer *l;
10399         int num_stripes;
10400         u64 length;
10401         int i;
10402         int found_chunk = 0;
10403         int ret;
10404
10405         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10406         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10407         length = btrfs_dev_extent_length(eb, ptr);
10408
10409         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10410         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10411         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10412
10413         btrfs_init_path(&path);
10414         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10415         if (ret)
10416                 goto out;
10417
10418         l = path.nodes[0];
10419         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10420         if (btrfs_chunk_length(l, chunk) != length)
10421                 goto out;
10422
10423         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10424         for (i = 0; i < num_stripes; i++) {
10425                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10426                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10427
10428                 if (devid == devext_key.objectid &&
10429                     offset == devext_key.offset) {
10430                         found_chunk = 1;
10431                         break;
10432                 }
10433         }
10434 out:
10435         btrfs_release_path(&path);
10436         if (!found_chunk) {
10437                 error(
10438                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10439                         devext_key.objectid, devext_key.offset, length);
10440                 return REFERENCER_MISSING;
10441         }
10442         return 0;
10443 }
10444
10445 /*
10446  * Check if the used space is correct with the dev item
10447  */
10448 static int check_dev_item(struct btrfs_fs_info *fs_info,
10449                           struct extent_buffer *eb, int slot)
10450 {
10451         struct btrfs_root *dev_root = fs_info->dev_root;
10452         struct btrfs_dev_item *dev_item;
10453         struct btrfs_path path;
10454         struct btrfs_key key;
10455         struct btrfs_dev_extent *ptr;
10456         u64 dev_id;
10457         u64 used;
10458         u64 total = 0;
10459         int ret;
10460
10461         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10462         dev_id = btrfs_device_id(eb, dev_item);
10463         used = btrfs_device_bytes_used(eb, dev_item);
10464
10465         key.objectid = dev_id;
10466         key.type = BTRFS_DEV_EXTENT_KEY;
10467         key.offset = 0;
10468
10469         btrfs_init_path(&path);
10470         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10471         if (ret < 0) {
10472                 btrfs_item_key_to_cpu(eb, &key, slot);
10473                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10474                         key.objectid, key.type, key.offset);
10475                 btrfs_release_path(&path);
10476                 return REFERENCER_MISSING;
10477         }
10478
10479         /* Iterate dev_extents to calculate the used space of a device */
10480         while (1) {
10481                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10482
10483                 if (key.objectid > dev_id)
10484                         break;
10485                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10486                         goto next;
10487
10488                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10489                                      struct btrfs_dev_extent);
10490                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10491 next:
10492                 ret = btrfs_next_item(dev_root, &path);
10493                 if (ret)
10494                         break;
10495         }
10496         btrfs_release_path(&path);
10497
10498         if (used != total) {
10499                 btrfs_item_key_to_cpu(eb, &key, slot);
10500                 error(
10501 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10502                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10503                         BTRFS_DEV_EXTENT_KEY, dev_id);
10504                 return ACCOUNTING_MISMATCH;
10505         }
10506         return 0;
10507 }
10508
10509 /*
10510  * Check a block group item with its referener (chunk) and its used space
10511  * with extent/metadata item
10512  */
10513 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10514                                   struct extent_buffer *eb, int slot)
10515 {
10516         struct btrfs_root *extent_root = fs_info->extent_root;
10517         struct btrfs_root *chunk_root = fs_info->chunk_root;
10518         struct btrfs_block_group_item *bi;
10519         struct btrfs_block_group_item bg_item;
10520         struct btrfs_path path;
10521         struct btrfs_key bg_key;
10522         struct btrfs_key chunk_key;
10523         struct btrfs_key extent_key;
10524         struct btrfs_chunk *chunk;
10525         struct extent_buffer *leaf;
10526         struct btrfs_extent_item *ei;
10527         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10528         u64 flags;
10529         u64 bg_flags;
10530         u64 used;
10531         u64 total = 0;
10532         int ret;
10533         int err = 0;
10534
10535         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10536         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10537         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10538         used = btrfs_block_group_used(&bg_item);
10539         bg_flags = btrfs_block_group_flags(&bg_item);
10540
10541         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10542         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10543         chunk_key.offset = bg_key.objectid;
10544
10545         btrfs_init_path(&path);
10546         /* Search for the referencer chunk */
10547         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10548         if (ret) {
10549                 error(
10550                 "block group[%llu %llu] did not find the related chunk item",
10551                         bg_key.objectid, bg_key.offset);
10552                 err |= REFERENCER_MISSING;
10553         } else {
10554                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10555                                         struct btrfs_chunk);
10556                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10557                                                 bg_key.offset) {
10558                         error(
10559         "block group[%llu %llu] related chunk item length does not match",
10560                                 bg_key.objectid, bg_key.offset);
10561                         err |= REFERENCER_MISMATCH;
10562                 }
10563         }
10564         btrfs_release_path(&path);
10565
10566         /* Search from the block group bytenr */
10567         extent_key.objectid = bg_key.objectid;
10568         extent_key.type = 0;
10569         extent_key.offset = 0;
10570
10571         btrfs_init_path(&path);
10572         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10573         if (ret < 0)
10574                 goto out;
10575
10576         /* Iterate extent tree to account used space */
10577         while (1) {
10578                 leaf = path.nodes[0];
10579                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10580                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10581                         break;
10582
10583                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10584                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10585                         goto next;
10586                 if (extent_key.objectid < bg_key.objectid)
10587                         goto next;
10588
10589                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10590                         total += nodesize;
10591                 else
10592                         total += extent_key.offset;
10593
10594                 ei = btrfs_item_ptr(leaf, path.slots[0],
10595                                     struct btrfs_extent_item);
10596                 flags = btrfs_extent_flags(leaf, ei);
10597                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10598                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10599                                 error(
10600                         "bad extent[%llu, %llu) type mismatch with chunk",
10601                                         extent_key.objectid,
10602                                         extent_key.objectid + extent_key.offset);
10603                                 err |= CHUNK_TYPE_MISMATCH;
10604                         }
10605                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10606                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10607                                     BTRFS_BLOCK_GROUP_METADATA))) {
10608                                 error(
10609                         "bad extent[%llu, %llu) type mismatch with chunk",
10610                                         extent_key.objectid,
10611                                         extent_key.objectid + nodesize);
10612                                 err |= CHUNK_TYPE_MISMATCH;
10613                         }
10614                 }
10615 next:
10616                 ret = btrfs_next_item(extent_root, &path);
10617                 if (ret)
10618                         break;
10619         }
10620
10621 out:
10622         btrfs_release_path(&path);
10623
10624         if (total != used) {
10625                 error(
10626                 "block group[%llu %llu] used %llu but extent items used %llu",
10627                         bg_key.objectid, bg_key.offset, used, total);
10628                 err |= ACCOUNTING_MISMATCH;
10629         }
10630         return err;
10631 }
10632
10633 /*
10634  * Check a chunk item.
10635  * Including checking all referred dev_extents and block group
10636  */
10637 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10638                             struct extent_buffer *eb, int slot)
10639 {
10640         struct btrfs_root *extent_root = fs_info->extent_root;
10641         struct btrfs_root *dev_root = fs_info->dev_root;
10642         struct btrfs_path path;
10643         struct btrfs_key chunk_key;
10644         struct btrfs_key bg_key;
10645         struct btrfs_key devext_key;
10646         struct btrfs_chunk *chunk;
10647         struct extent_buffer *leaf;
10648         struct btrfs_block_group_item *bi;
10649         struct btrfs_block_group_item bg_item;
10650         struct btrfs_dev_extent *ptr;
10651         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10652         u64 length;
10653         u64 chunk_end;
10654         u64 type;
10655         u64 profile;
10656         int num_stripes;
10657         u64 offset;
10658         u64 objectid;
10659         int i;
10660         int ret;
10661         int err = 0;
10662
10663         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10664         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10665         length = btrfs_chunk_length(eb, chunk);
10666         chunk_end = chunk_key.offset + length;
10667         if (!IS_ALIGNED(length, sectorsize)) {
10668                 error("chunk[%llu %llu) not aligned to %u",
10669                         chunk_key.offset, chunk_end, sectorsize);
10670                 err |= BYTES_UNALIGNED;
10671                 goto out;
10672         }
10673
10674         type = btrfs_chunk_type(eb, chunk);
10675         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10676         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10677                 error("chunk[%llu %llu) has no chunk type",
10678                         chunk_key.offset, chunk_end);
10679                 err |= UNKNOWN_TYPE;
10680         }
10681         if (profile && (profile & (profile - 1))) {
10682                 error("chunk[%llu %llu) multiple profiles detected: %llx",
10683                         chunk_key.offset, chunk_end, profile);
10684                 err |= UNKNOWN_TYPE;
10685         }
10686
10687         bg_key.objectid = chunk_key.offset;
10688         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10689         bg_key.offset = length;
10690
10691         btrfs_init_path(&path);
10692         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10693         if (ret) {
10694                 error(
10695                 "chunk[%llu %llu) did not find the related block group item",
10696                         chunk_key.offset, chunk_end);
10697                 err |= REFERENCER_MISSING;
10698         } else{
10699                 leaf = path.nodes[0];
10700                 bi = btrfs_item_ptr(leaf, path.slots[0],
10701                                     struct btrfs_block_group_item);
10702                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10703                                    sizeof(bg_item));
10704                 if (btrfs_block_group_flags(&bg_item) != type) {
10705                         error(
10706 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10707                                 chunk_key.offset, chunk_end, type,
10708                                 btrfs_block_group_flags(&bg_item));
10709                         err |= REFERENCER_MISSING;
10710                 }
10711         }
10712
10713         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10714         for (i = 0; i < num_stripes; i++) {
10715                 btrfs_release_path(&path);
10716                 btrfs_init_path(&path);
10717                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10718                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10719                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10720
10721                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10722                                         0, 0);
10723                 if (ret)
10724                         goto not_match_dev;
10725
10726                 leaf = path.nodes[0];
10727                 ptr = btrfs_item_ptr(leaf, path.slots[0],
10728                                      struct btrfs_dev_extent);
10729                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10730                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10731                 if (objectid != chunk_key.objectid ||
10732                     offset != chunk_key.offset ||
10733                     btrfs_dev_extent_length(leaf, ptr) != length)
10734                         goto not_match_dev;
10735                 continue;
10736 not_match_dev:
10737                 err |= BACKREF_MISSING;
10738                 error(
10739                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10740                         chunk_key.objectid, chunk_end, i);
10741                 continue;
10742         }
10743         btrfs_release_path(&path);
10744 out:
10745         return err;
10746 }
10747
10748 /*
10749  * Main entry function to check known items and update related accounting info
10750  */
10751 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10752 {
10753         struct btrfs_fs_info *fs_info = root->fs_info;
10754         struct btrfs_key key;
10755         int slot = 0;
10756         int type;
10757         struct btrfs_extent_data_ref *dref;
10758         int ret;
10759         int err = 0;
10760
10761 next:
10762         btrfs_item_key_to_cpu(eb, &key, slot);
10763         type = key.type;
10764
10765         switch (type) {
10766         case BTRFS_EXTENT_DATA_KEY:
10767                 ret = check_extent_data_item(root, eb, slot);
10768                 err |= ret;
10769                 break;
10770         case BTRFS_BLOCK_GROUP_ITEM_KEY:
10771                 ret = check_block_group_item(fs_info, eb, slot);
10772                 err |= ret;
10773                 break;
10774         case BTRFS_DEV_ITEM_KEY:
10775                 ret = check_dev_item(fs_info, eb, slot);
10776                 err |= ret;
10777                 break;
10778         case BTRFS_CHUNK_ITEM_KEY:
10779                 ret = check_chunk_item(fs_info, eb, slot);
10780                 err |= ret;
10781                 break;
10782         case BTRFS_DEV_EXTENT_KEY:
10783                 ret = check_dev_extent_item(fs_info, eb, slot);
10784                 err |= ret;
10785                 break;
10786         case BTRFS_EXTENT_ITEM_KEY:
10787         case BTRFS_METADATA_ITEM_KEY:
10788                 ret = check_extent_item(fs_info, eb, slot);
10789                 err |= ret;
10790                 break;
10791         case BTRFS_EXTENT_CSUM_KEY:
10792                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10793                 break;
10794         case BTRFS_TREE_BLOCK_REF_KEY:
10795                 ret = check_tree_block_backref(fs_info, key.offset,
10796                                                key.objectid, -1);
10797                 err |= ret;
10798                 break;
10799         case BTRFS_EXTENT_DATA_REF_KEY:
10800                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10801                 ret = check_extent_data_backref(fs_info,
10802                                 btrfs_extent_data_ref_root(eb, dref),
10803                                 btrfs_extent_data_ref_objectid(eb, dref),
10804                                 btrfs_extent_data_ref_offset(eb, dref),
10805                                 key.objectid, 0,
10806                                 btrfs_extent_data_ref_count(eb, dref));
10807                 err |= ret;
10808                 break;
10809         case BTRFS_SHARED_BLOCK_REF_KEY:
10810                 ret = check_shared_block_backref(fs_info, key.offset,
10811                                                  key.objectid, -1);
10812                 err |= ret;
10813                 break;
10814         case BTRFS_SHARED_DATA_REF_KEY:
10815                 ret = check_shared_data_backref(fs_info, key.offset,
10816                                                 key.objectid);
10817                 err |= ret;
10818                 break;
10819         default:
10820                 break;
10821         }
10822
10823         if (++slot < btrfs_header_nritems(eb))
10824                 goto next;
10825
10826         return err;
10827 }
10828
10829 /*
10830  * Helper function for later fs/subvol tree check.  To determine if a tree
10831  * block should be checked.
10832  * This function will ensure only the direct referencer with lowest rootid to
10833  * check a fs/subvolume tree block.
10834  *
10835  * Backref check at extent tree would detect errors like missing subvolume
10836  * tree, so we can do aggressive check to reduce duplicated checks.
10837  */
10838 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10839 {
10840         struct btrfs_root *extent_root = root->fs_info->extent_root;
10841         struct btrfs_key key;
10842         struct btrfs_path path;
10843         struct extent_buffer *leaf;
10844         int slot;
10845         struct btrfs_extent_item *ei;
10846         unsigned long ptr;
10847         unsigned long end;
10848         int type;
10849         u32 item_size;
10850         u64 offset;
10851         struct btrfs_extent_inline_ref *iref;
10852         int ret;
10853
10854         btrfs_init_path(&path);
10855         key.objectid = btrfs_header_bytenr(eb);
10856         key.type = BTRFS_METADATA_ITEM_KEY;
10857         key.offset = (u64)-1;
10858
10859         /*
10860          * Any failure in backref resolving means we can't determine
10861          * whom the tree block belongs to.
10862          * So in that case, we need to check that tree block
10863          */
10864         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10865         if (ret < 0)
10866                 goto need_check;
10867
10868         ret = btrfs_previous_extent_item(extent_root, &path,
10869                                          btrfs_header_bytenr(eb));
10870         if (ret)
10871                 goto need_check;
10872
10873         leaf = path.nodes[0];
10874         slot = path.slots[0];
10875         btrfs_item_key_to_cpu(leaf, &key, slot);
10876         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10877
10878         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10879                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10880         } else {
10881                 struct btrfs_tree_block_info *info;
10882
10883                 info = (struct btrfs_tree_block_info *)(ei + 1);
10884                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10885         }
10886
10887         item_size = btrfs_item_size_nr(leaf, slot);
10888         ptr = (unsigned long)iref;
10889         end = (unsigned long)ei + item_size;
10890         while (ptr < end) {
10891                 iref = (struct btrfs_extent_inline_ref *)ptr;
10892                 type = btrfs_extent_inline_ref_type(leaf, iref);
10893                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10894
10895                 /*
10896                  * We only check the tree block if current root is
10897                  * the lowest referencer of it.
10898                  */
10899                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10900                     offset < root->objectid) {
10901                         btrfs_release_path(&path);
10902                         return 0;
10903                 }
10904
10905                 ptr += btrfs_extent_inline_ref_size(type);
10906         }
10907         /*
10908          * Normally we should also check keyed tree block ref, but that may be
10909          * very time consuming.  Inlined ref should already make us skip a lot
10910          * of refs now.  So skip search keyed tree block ref.
10911          */
10912
10913 need_check:
10914         btrfs_release_path(&path);
10915         return 1;
10916 }
10917
10918 /*
10919  * Traversal function for tree block. We will do:
10920  * 1) Skip shared fs/subvolume tree blocks
10921  * 2) Update related bytes accounting
10922  * 3) Pre-order traversal
10923  */
10924 static int traverse_tree_block(struct btrfs_root *root,
10925                                 struct extent_buffer *node)
10926 {
10927         struct extent_buffer *eb;
10928         struct btrfs_key key;
10929         struct btrfs_key drop_key;
10930         int level;
10931         u64 nr;
10932         int i;
10933         int err = 0;
10934         int ret;
10935
10936         /*
10937          * Skip shared fs/subvolume tree block, in that case they will
10938          * be checked by referencer with lowest rootid
10939          */
10940         if (is_fstree(root->objectid) && !should_check(root, node))
10941                 return 0;
10942
10943         /* Update bytes accounting */
10944         total_btree_bytes += node->len;
10945         if (fs_root_objectid(btrfs_header_owner(node)))
10946                 total_fs_tree_bytes += node->len;
10947         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10948                 total_extent_tree_bytes += node->len;
10949         if (!found_old_backref &&
10950             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10951             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10952             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10953                 found_old_backref = 1;
10954
10955         /* pre-order tranversal, check itself first */
10956         level = btrfs_header_level(node);
10957         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10958                                    btrfs_header_level(node),
10959                                    btrfs_header_owner(node));
10960         err |= ret;
10961         if (err)
10962                 error(
10963         "check %s failed root %llu bytenr %llu level %d, force continue check",
10964                         level ? "node":"leaf", root->objectid,
10965                         btrfs_header_bytenr(node), btrfs_header_level(node));
10966
10967         if (!level) {
10968                 btree_space_waste += btrfs_leaf_free_space(root, node);
10969                 ret = check_leaf_items(root, node);
10970                 err |= ret;
10971                 return err;
10972         }
10973
10974         nr = btrfs_header_nritems(node);
10975         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10976         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10977                 sizeof(struct btrfs_key_ptr);
10978
10979         /* Then check all its children */
10980         for (i = 0; i < nr; i++) {
10981                 u64 blocknr = btrfs_node_blockptr(node, i);
10982
10983                 btrfs_node_key_to_cpu(node, &key, i);
10984                 if (level == root->root_item.drop_level &&
10985                     is_dropped_key(&key, &drop_key))
10986                         continue;
10987
10988                 /*
10989                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10990                  * to call the function itself.
10991                  */
10992                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10993                 if (extent_buffer_uptodate(eb)) {
10994                         ret = traverse_tree_block(root, eb);
10995                         err |= ret;
10996                 }
10997                 free_extent_buffer(eb);
10998         }
10999
11000         return err;
11001 }
11002
11003 /*
11004  * Low memory usage version check_chunks_and_extents.
11005  */
11006 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11007 {
11008         struct btrfs_path path;
11009         struct btrfs_key key;
11010         struct btrfs_root *root1;
11011         struct btrfs_root *cur_root;
11012         int err = 0;
11013         int ret;
11014
11015         root1 = root->fs_info->chunk_root;
11016         ret = traverse_tree_block(root1, root1->node);
11017         err |= ret;
11018
11019         root1 = root->fs_info->tree_root;
11020         ret = traverse_tree_block(root1, root1->node);
11021         err |= ret;
11022
11023         btrfs_init_path(&path);
11024         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11025         key.offset = 0;
11026         key.type = BTRFS_ROOT_ITEM_KEY;
11027
11028         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11029         if (ret) {
11030                 error("cannot find extent treet in tree_root");
11031                 goto out;
11032         }
11033
11034         while (1) {
11035                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11036                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11037                         goto next;
11038                 key.offset = (u64)-1;
11039
11040                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11041                 if (IS_ERR(cur_root) || !cur_root) {
11042                         error("failed to read tree: %lld", key.objectid);
11043                         goto next;
11044                 }
11045
11046                 ret = traverse_tree_block(cur_root, cur_root->node);
11047                 err |= ret;
11048
11049 next:
11050                 ret = btrfs_next_item(root1, &path);
11051                 if (ret)
11052                         goto out;
11053         }
11054
11055 out:
11056         btrfs_release_path(&path);
11057         return err;
11058 }
11059
11060 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11061                            struct btrfs_root *root, int overwrite)
11062 {
11063         struct extent_buffer *c;
11064         struct extent_buffer *old = root->node;
11065         int level;
11066         int ret;
11067         struct btrfs_disk_key disk_key = {0,0,0};
11068
11069         level = 0;
11070
11071         if (overwrite) {
11072                 c = old;
11073                 extent_buffer_get(c);
11074                 goto init;
11075         }
11076         c = btrfs_alloc_free_block(trans, root,
11077                                    root->nodesize,
11078                                    root->root_key.objectid,
11079                                    &disk_key, level, 0, 0);
11080         if (IS_ERR(c)) {
11081                 c = old;
11082                 extent_buffer_get(c);
11083                 overwrite = 1;
11084         }
11085 init:
11086         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11087         btrfs_set_header_level(c, level);
11088         btrfs_set_header_bytenr(c, c->start);
11089         btrfs_set_header_generation(c, trans->transid);
11090         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11091         btrfs_set_header_owner(c, root->root_key.objectid);
11092
11093         write_extent_buffer(c, root->fs_info->fsid,
11094                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11095
11096         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11097                             btrfs_header_chunk_tree_uuid(c),
11098                             BTRFS_UUID_SIZE);
11099
11100         btrfs_mark_buffer_dirty(c);
11101         /*
11102          * this case can happen in the following case:
11103          *
11104          * 1.overwrite previous root.
11105          *
11106          * 2.reinit reloc data root, this is because we skip pin
11107          * down reloc data tree before which means we can allocate
11108          * same block bytenr here.
11109          */
11110         if (old->start == c->start) {
11111                 btrfs_set_root_generation(&root->root_item,
11112                                           trans->transid);
11113                 root->root_item.level = btrfs_header_level(root->node);
11114                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11115                                         &root->root_key, &root->root_item);
11116                 if (ret) {
11117                         free_extent_buffer(c);
11118                         return ret;
11119                 }
11120         }
11121         free_extent_buffer(old);
11122         root->node = c;
11123         add_root_to_dirty_list(root);
11124         return 0;
11125 }
11126
11127 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11128                                 struct extent_buffer *eb, int tree_root)
11129 {
11130         struct extent_buffer *tmp;
11131         struct btrfs_root_item *ri;
11132         struct btrfs_key key;
11133         u64 bytenr;
11134         u32 nodesize;
11135         int level = btrfs_header_level(eb);
11136         int nritems;
11137         int ret;
11138         int i;
11139
11140         /*
11141          * If we have pinned this block before, don't pin it again.
11142          * This can not only avoid forever loop with broken filesystem
11143          * but also give us some speedups.
11144          */
11145         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11146                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11147                 return 0;
11148
11149         btrfs_pin_extent(fs_info, eb->start, eb->len);
11150
11151         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11152         nritems = btrfs_header_nritems(eb);
11153         for (i = 0; i < nritems; i++) {
11154                 if (level == 0) {
11155                         btrfs_item_key_to_cpu(eb, &key, i);
11156                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11157                                 continue;
11158                         /* Skip the extent root and reloc roots */
11159                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11160                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11161                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11162                                 continue;
11163                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11164                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11165
11166                         /*
11167                          * If at any point we start needing the real root we
11168                          * will have to build a stump root for the root we are
11169                          * in, but for now this doesn't actually use the root so
11170                          * just pass in extent_root.
11171                          */
11172                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11173                                               nodesize, 0);
11174                         if (!extent_buffer_uptodate(tmp)) {
11175                                 fprintf(stderr, "Error reading root block\n");
11176                                 return -EIO;
11177                         }
11178                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11179                         free_extent_buffer(tmp);
11180                         if (ret)
11181                                 return ret;
11182                 } else {
11183                         bytenr = btrfs_node_blockptr(eb, i);
11184
11185                         /* If we aren't the tree root don't read the block */
11186                         if (level == 1 && !tree_root) {
11187                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11188                                 continue;
11189                         }
11190
11191                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11192                                               nodesize, 0);
11193                         if (!extent_buffer_uptodate(tmp)) {
11194                                 fprintf(stderr, "Error reading tree block\n");
11195                                 return -EIO;
11196                         }
11197                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11198                         free_extent_buffer(tmp);
11199                         if (ret)
11200                                 return ret;
11201                 }
11202         }
11203
11204         return 0;
11205 }
11206
11207 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11208 {
11209         int ret;
11210
11211         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11212         if (ret)
11213                 return ret;
11214
11215         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11216 }
11217
11218 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11219 {
11220         struct btrfs_block_group_cache *cache;
11221         struct btrfs_path path;
11222         struct extent_buffer *leaf;
11223         struct btrfs_chunk *chunk;
11224         struct btrfs_key key;
11225         int ret;
11226         u64 start;
11227
11228         btrfs_init_path(&path);
11229         key.objectid = 0;
11230         key.type = BTRFS_CHUNK_ITEM_KEY;
11231         key.offset = 0;
11232         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11233         if (ret < 0) {
11234                 btrfs_release_path(&path);
11235                 return ret;
11236         }
11237
11238         /*
11239          * We do this in case the block groups were screwed up and had alloc
11240          * bits that aren't actually set on the chunks.  This happens with
11241          * restored images every time and could happen in real life I guess.
11242          */
11243         fs_info->avail_data_alloc_bits = 0;
11244         fs_info->avail_metadata_alloc_bits = 0;
11245         fs_info->avail_system_alloc_bits = 0;
11246
11247         /* First we need to create the in-memory block groups */
11248         while (1) {
11249                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11250                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11251                         if (ret < 0) {
11252                                 btrfs_release_path(&path);
11253                                 return ret;
11254                         }
11255                         if (ret) {
11256                                 ret = 0;
11257                                 break;
11258                         }
11259                 }
11260                 leaf = path.nodes[0];
11261                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11262                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11263                         path.slots[0]++;
11264                         continue;
11265                 }
11266
11267                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11268                 btrfs_add_block_group(fs_info, 0,
11269                                       btrfs_chunk_type(leaf, chunk),
11270                                       key.objectid, key.offset,
11271                                       btrfs_chunk_length(leaf, chunk));
11272                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11273                                  key.offset + btrfs_chunk_length(leaf, chunk),
11274                                  GFP_NOFS);
11275                 path.slots[0]++;
11276         }
11277         start = 0;
11278         while (1) {
11279                 cache = btrfs_lookup_first_block_group(fs_info, start);
11280                 if (!cache)
11281                         break;
11282                 cache->cached = 1;
11283                 start = cache->key.objectid + cache->key.offset;
11284         }
11285
11286         btrfs_release_path(&path);
11287         return 0;
11288 }
11289
11290 static int reset_balance(struct btrfs_trans_handle *trans,
11291                          struct btrfs_fs_info *fs_info)
11292 {
11293         struct btrfs_root *root = fs_info->tree_root;
11294         struct btrfs_path path;
11295         struct extent_buffer *leaf;
11296         struct btrfs_key key;
11297         int del_slot, del_nr = 0;
11298         int ret;
11299         int found = 0;
11300
11301         btrfs_init_path(&path);
11302         key.objectid = BTRFS_BALANCE_OBJECTID;
11303         key.type = BTRFS_BALANCE_ITEM_KEY;
11304         key.offset = 0;
11305         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11306         if (ret) {
11307                 if (ret > 0)
11308                         ret = 0;
11309                 if (!ret)
11310                         goto reinit_data_reloc;
11311                 else
11312                         goto out;
11313         }
11314
11315         ret = btrfs_del_item(trans, root, &path);
11316         if (ret)
11317                 goto out;
11318         btrfs_release_path(&path);
11319
11320         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11321         key.type = BTRFS_ROOT_ITEM_KEY;
11322         key.offset = 0;
11323         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11324         if (ret < 0)
11325                 goto out;
11326         while (1) {
11327                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11328                         if (!found)
11329                                 break;
11330
11331                         if (del_nr) {
11332                                 ret = btrfs_del_items(trans, root, &path,
11333                                                       del_slot, del_nr);
11334                                 del_nr = 0;
11335                                 if (ret)
11336                                         goto out;
11337                         }
11338                         key.offset++;
11339                         btrfs_release_path(&path);
11340
11341                         found = 0;
11342                         ret = btrfs_search_slot(trans, root, &key, &path,
11343                                                 -1, 1);
11344                         if (ret < 0)
11345                                 goto out;
11346                         continue;
11347                 }
11348                 found = 1;
11349                 leaf = path.nodes[0];
11350                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11351                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11352                         break;
11353                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11354                         path.slots[0]++;
11355                         continue;
11356                 }
11357                 if (!del_nr) {
11358                         del_slot = path.slots[0];
11359                         del_nr = 1;
11360                 } else {
11361                         del_nr++;
11362                 }
11363                 path.slots[0]++;
11364         }
11365
11366         if (del_nr) {
11367                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11368                 if (ret)
11369                         goto out;
11370         }
11371         btrfs_release_path(&path);
11372
11373 reinit_data_reloc:
11374         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11375         key.type = BTRFS_ROOT_ITEM_KEY;
11376         key.offset = (u64)-1;
11377         root = btrfs_read_fs_root(fs_info, &key);
11378         if (IS_ERR(root)) {
11379                 fprintf(stderr, "Error reading data reloc tree\n");
11380                 ret = PTR_ERR(root);
11381                 goto out;
11382         }
11383         record_root_in_trans(trans, root);
11384         ret = btrfs_fsck_reinit_root(trans, root, 0);
11385         if (ret)
11386                 goto out;
11387         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11388 out:
11389         btrfs_release_path(&path);
11390         return ret;
11391 }
11392
11393 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11394                               struct btrfs_fs_info *fs_info)
11395 {
11396         u64 start = 0;
11397         int ret;
11398
11399         /*
11400          * The only reason we don't do this is because right now we're just
11401          * walking the trees we find and pinning down their bytes, we don't look
11402          * at any of the leaves.  In order to do mixed groups we'd have to check
11403          * the leaves of any fs roots and pin down the bytes for any file
11404          * extents we find.  Not hard but why do it if we don't have to?
11405          */
11406         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11407                 fprintf(stderr, "We don't support re-initing the extent tree "
11408                         "for mixed block groups yet, please notify a btrfs "
11409                         "developer you want to do this so they can add this "
11410                         "functionality.\n");
11411                 return -EINVAL;
11412         }
11413
11414         /*
11415          * first we need to walk all of the trees except the extent tree and pin
11416          * down the bytes that are in use so we don't overwrite any existing
11417          * metadata.
11418          */
11419         ret = pin_metadata_blocks(fs_info);
11420         if (ret) {
11421                 fprintf(stderr, "error pinning down used bytes\n");
11422                 return ret;
11423         }
11424
11425         /*
11426          * Need to drop all the block groups since we're going to recreate all
11427          * of them again.
11428          */
11429         btrfs_free_block_groups(fs_info);
11430         ret = reset_block_groups(fs_info);
11431         if (ret) {
11432                 fprintf(stderr, "error resetting the block groups\n");
11433                 return ret;
11434         }
11435
11436         /* Ok we can allocate now, reinit the extent root */
11437         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11438         if (ret) {
11439                 fprintf(stderr, "extent root initialization failed\n");
11440                 /*
11441                  * When the transaction code is updated we should end the
11442                  * transaction, but for now progs only knows about commit so
11443                  * just return an error.
11444                  */
11445                 return ret;
11446         }
11447
11448         /*
11449          * Now we have all the in-memory block groups setup so we can make
11450          * allocations properly, and the metadata we care about is safe since we
11451          * pinned all of it above.
11452          */
11453         while (1) {
11454                 struct btrfs_block_group_cache *cache;
11455
11456                 cache = btrfs_lookup_first_block_group(fs_info, start);
11457                 if (!cache)
11458                         break;
11459                 start = cache->key.objectid + cache->key.offset;
11460                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11461                                         &cache->key, &cache->item,
11462                                         sizeof(cache->item));
11463                 if (ret) {
11464                         fprintf(stderr, "Error adding block group\n");
11465                         return ret;
11466                 }
11467                 btrfs_extent_post_op(trans, fs_info->extent_root);
11468         }
11469
11470         ret = reset_balance(trans, fs_info);
11471         if (ret)
11472                 fprintf(stderr, "error resetting the pending balance\n");
11473
11474         return ret;
11475 }
11476
11477 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11478 {
11479         struct btrfs_path path;
11480         struct btrfs_trans_handle *trans;
11481         struct btrfs_key key;
11482         int ret;
11483
11484         printf("Recowing metadata block %llu\n", eb->start);
11485         key.objectid = btrfs_header_owner(eb);
11486         key.type = BTRFS_ROOT_ITEM_KEY;
11487         key.offset = (u64)-1;
11488
11489         root = btrfs_read_fs_root(root->fs_info, &key);
11490         if (IS_ERR(root)) {
11491                 fprintf(stderr, "Couldn't find owner root %llu\n",
11492                         key.objectid);
11493                 return PTR_ERR(root);
11494         }
11495
11496         trans = btrfs_start_transaction(root, 1);
11497         if (IS_ERR(trans))
11498                 return PTR_ERR(trans);
11499
11500         btrfs_init_path(&path);
11501         path.lowest_level = btrfs_header_level(eb);
11502         if (path.lowest_level)
11503                 btrfs_node_key_to_cpu(eb, &key, 0);
11504         else
11505                 btrfs_item_key_to_cpu(eb, &key, 0);
11506
11507         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11508         btrfs_commit_transaction(trans, root);
11509         btrfs_release_path(&path);
11510         return ret;
11511 }
11512
11513 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11514 {
11515         struct btrfs_path path;
11516         struct btrfs_trans_handle *trans;
11517         struct btrfs_key key;
11518         int ret;
11519
11520         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11521                bad->key.type, bad->key.offset);
11522         key.objectid = bad->root_id;
11523         key.type = BTRFS_ROOT_ITEM_KEY;
11524         key.offset = (u64)-1;
11525
11526         root = btrfs_read_fs_root(root->fs_info, &key);
11527         if (IS_ERR(root)) {
11528                 fprintf(stderr, "Couldn't find owner root %llu\n",
11529                         key.objectid);
11530                 return PTR_ERR(root);
11531         }
11532
11533         trans = btrfs_start_transaction(root, 1);
11534         if (IS_ERR(trans))
11535                 return PTR_ERR(trans);
11536
11537         btrfs_init_path(&path);
11538         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11539         if (ret) {
11540                 if (ret > 0)
11541                         ret = 0;
11542                 goto out;
11543         }
11544         ret = btrfs_del_item(trans, root, &path);
11545 out:
11546         btrfs_commit_transaction(trans, root);
11547         btrfs_release_path(&path);
11548         return ret;
11549 }
11550
11551 static int zero_log_tree(struct btrfs_root *root)
11552 {
11553         struct btrfs_trans_handle *trans;
11554         int ret;
11555
11556         trans = btrfs_start_transaction(root, 1);
11557         if (IS_ERR(trans)) {
11558                 ret = PTR_ERR(trans);
11559                 return ret;
11560         }
11561         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11562         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11563         ret = btrfs_commit_transaction(trans, root);
11564         return ret;
11565 }
11566
11567 static int populate_csum(struct btrfs_trans_handle *trans,
11568                          struct btrfs_root *csum_root, char *buf, u64 start,
11569                          u64 len)
11570 {
11571         u64 offset = 0;
11572         u64 sectorsize;
11573         int ret = 0;
11574
11575         while (offset < len) {
11576                 sectorsize = csum_root->sectorsize;
11577                 ret = read_extent_data(csum_root, buf, start + offset,
11578                                        &sectorsize, 0);
11579                 if (ret)
11580                         break;
11581                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11582                                             start + offset, buf, sectorsize);
11583                 if (ret)
11584                         break;
11585                 offset += sectorsize;
11586         }
11587         return ret;
11588 }
11589
11590 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11591                                       struct btrfs_root *csum_root,
11592                                       struct btrfs_root *cur_root)
11593 {
11594         struct btrfs_path path;
11595         struct btrfs_key key;
11596         struct extent_buffer *node;
11597         struct btrfs_file_extent_item *fi;
11598         char *buf = NULL;
11599         u64 start = 0;
11600         u64 len = 0;
11601         int slot = 0;
11602         int ret = 0;
11603
11604         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11605         if (!buf)
11606                 return -ENOMEM;
11607
11608         btrfs_init_path(&path);
11609         key.objectid = 0;
11610         key.offset = 0;
11611         key.type = 0;
11612         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11613         if (ret < 0)
11614                 goto out;
11615         /* Iterate all regular file extents and fill its csum */
11616         while (1) {
11617                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11618
11619                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11620                         goto next;
11621                 node = path.nodes[0];
11622                 slot = path.slots[0];
11623                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11624                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11625                         goto next;
11626                 start = btrfs_file_extent_disk_bytenr(node, fi);
11627                 len = btrfs_file_extent_disk_num_bytes(node, fi);
11628
11629                 ret = populate_csum(trans, csum_root, buf, start, len);
11630                 if (ret == -EEXIST)
11631                         ret = 0;
11632                 if (ret < 0)
11633                         goto out;
11634 next:
11635                 /*
11636                  * TODO: if next leaf is corrupted, jump to nearest next valid
11637                  * leaf.
11638                  */
11639                 ret = btrfs_next_item(cur_root, &path);
11640                 if (ret < 0)
11641                         goto out;
11642                 if (ret > 0) {
11643                         ret = 0;
11644                         goto out;
11645                 }
11646         }
11647
11648 out:
11649         btrfs_release_path(&path);
11650         free(buf);
11651         return ret;
11652 }
11653
11654 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11655                                   struct btrfs_root *csum_root)
11656 {
11657         struct btrfs_fs_info *fs_info = csum_root->fs_info;
11658         struct btrfs_path path;
11659         struct btrfs_root *tree_root = fs_info->tree_root;
11660         struct btrfs_root *cur_root;
11661         struct extent_buffer *node;
11662         struct btrfs_key key;
11663         int slot = 0;
11664         int ret = 0;
11665
11666         btrfs_init_path(&path);
11667         key.objectid = BTRFS_FS_TREE_OBJECTID;
11668         key.offset = 0;
11669         key.type = BTRFS_ROOT_ITEM_KEY;
11670         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11671         if (ret < 0)
11672                 goto out;
11673         if (ret > 0) {
11674                 ret = -ENOENT;
11675                 goto out;
11676         }
11677
11678         while (1) {
11679                 node = path.nodes[0];
11680                 slot = path.slots[0];
11681                 btrfs_item_key_to_cpu(node, &key, slot);
11682                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11683                         goto out;
11684                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11685                         goto next;
11686                 if (!is_fstree(key.objectid))
11687                         goto next;
11688                 key.offset = (u64)-1;
11689
11690                 cur_root = btrfs_read_fs_root(fs_info, &key);
11691                 if (IS_ERR(cur_root) || !cur_root) {
11692                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11693                                 key.objectid);
11694                         goto out;
11695                 }
11696                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11697                                 cur_root);
11698                 if (ret < 0)
11699                         goto out;
11700 next:
11701                 ret = btrfs_next_item(tree_root, &path);
11702                 if (ret > 0) {
11703                         ret = 0;
11704                         goto out;
11705                 }
11706                 if (ret < 0)
11707                         goto out;
11708         }
11709
11710 out:
11711         btrfs_release_path(&path);
11712         return ret;
11713 }
11714
11715 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11716                                       struct btrfs_root *csum_root)
11717 {
11718         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11719         struct btrfs_path path;
11720         struct btrfs_extent_item *ei;
11721         struct extent_buffer *leaf;
11722         char *buf;
11723         struct btrfs_key key;
11724         int ret;
11725
11726         btrfs_init_path(&path);
11727         key.objectid = 0;
11728         key.type = BTRFS_EXTENT_ITEM_KEY;
11729         key.offset = 0;
11730         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11731         if (ret < 0) {
11732                 btrfs_release_path(&path);
11733                 return ret;
11734         }
11735
11736         buf = malloc(csum_root->sectorsize);
11737         if (!buf) {
11738                 btrfs_release_path(&path);
11739                 return -ENOMEM;
11740         }
11741
11742         while (1) {
11743                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11744                         ret = btrfs_next_leaf(extent_root, &path);
11745                         if (ret < 0)
11746                                 break;
11747                         if (ret) {
11748                                 ret = 0;
11749                                 break;
11750                         }
11751                 }
11752                 leaf = path.nodes[0];
11753
11754                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11755                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11756                         path.slots[0]++;
11757                         continue;
11758                 }
11759
11760                 ei = btrfs_item_ptr(leaf, path.slots[0],
11761                                     struct btrfs_extent_item);
11762                 if (!(btrfs_extent_flags(leaf, ei) &
11763                       BTRFS_EXTENT_FLAG_DATA)) {
11764                         path.slots[0]++;
11765                         continue;
11766                 }
11767
11768                 ret = populate_csum(trans, csum_root, buf, key.objectid,
11769                                     key.offset);
11770                 if (ret)
11771                         break;
11772                 path.slots[0]++;
11773         }
11774
11775         btrfs_release_path(&path);
11776         free(buf);
11777         return ret;
11778 }
11779
11780 /*
11781  * Recalculate the csum and put it into the csum tree.
11782  *
11783  * Extent tree init will wipe out all the extent info, so in that case, we
11784  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
11785  * will use fs/subvol trees to init the csum tree.
11786  */
11787 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11788                           struct btrfs_root *csum_root,
11789                           int search_fs_tree)
11790 {
11791         if (search_fs_tree)
11792                 return fill_csum_tree_from_fs(trans, csum_root);
11793         else
11794                 return fill_csum_tree_from_extent(trans, csum_root);
11795 }
11796
11797 static void free_roots_info_cache(void)
11798 {
11799         if (!roots_info_cache)
11800                 return;
11801
11802         while (!cache_tree_empty(roots_info_cache)) {
11803                 struct cache_extent *entry;
11804                 struct root_item_info *rii;
11805
11806                 entry = first_cache_extent(roots_info_cache);
11807                 if (!entry)
11808                         break;
11809                 remove_cache_extent(roots_info_cache, entry);
11810                 rii = container_of(entry, struct root_item_info, cache_extent);
11811                 free(rii);
11812         }
11813
11814         free(roots_info_cache);
11815         roots_info_cache = NULL;
11816 }
11817
11818 static int build_roots_info_cache(struct btrfs_fs_info *info)
11819 {
11820         int ret = 0;
11821         struct btrfs_key key;
11822         struct extent_buffer *leaf;
11823         struct btrfs_path path;
11824
11825         if (!roots_info_cache) {
11826                 roots_info_cache = malloc(sizeof(*roots_info_cache));
11827                 if (!roots_info_cache)
11828                         return -ENOMEM;
11829                 cache_tree_init(roots_info_cache);
11830         }
11831
11832         btrfs_init_path(&path);
11833         key.objectid = 0;
11834         key.type = BTRFS_EXTENT_ITEM_KEY;
11835         key.offset = 0;
11836         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11837         if (ret < 0)
11838                 goto out;
11839         leaf = path.nodes[0];
11840
11841         while (1) {
11842                 struct btrfs_key found_key;
11843                 struct btrfs_extent_item *ei;
11844                 struct btrfs_extent_inline_ref *iref;
11845                 int slot = path.slots[0];
11846                 int type;
11847                 u64 flags;
11848                 u64 root_id;
11849                 u8 level;
11850                 struct cache_extent *entry;
11851                 struct root_item_info *rii;
11852
11853                 if (slot >= btrfs_header_nritems(leaf)) {
11854                         ret = btrfs_next_leaf(info->extent_root, &path);
11855                         if (ret < 0) {
11856                                 break;
11857                         } else if (ret) {
11858                                 ret = 0;
11859                                 break;
11860                         }
11861                         leaf = path.nodes[0];
11862                         slot = path.slots[0];
11863                 }
11864
11865                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11866
11867                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11868                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11869                         goto next;
11870
11871                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11872                 flags = btrfs_extent_flags(leaf, ei);
11873
11874                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11875                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11876                         goto next;
11877
11878                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11879                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11880                         level = found_key.offset;
11881                 } else {
11882                         struct btrfs_tree_block_info *binfo;
11883
11884                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11885                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11886                         level = btrfs_tree_block_level(leaf, binfo);
11887                 }
11888
11889                 /*
11890                  * For a root extent, it must be of the following type and the
11891                  * first (and only one) iref in the item.
11892                  */
11893                 type = btrfs_extent_inline_ref_type(leaf, iref);
11894                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11895                         goto next;
11896
11897                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11898                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11899                 if (!entry) {
11900                         rii = malloc(sizeof(struct root_item_info));
11901                         if (!rii) {
11902                                 ret = -ENOMEM;
11903                                 goto out;
11904                         }
11905                         rii->cache_extent.start = root_id;
11906                         rii->cache_extent.size = 1;
11907                         rii->level = (u8)-1;
11908                         entry = &rii->cache_extent;
11909                         ret = insert_cache_extent(roots_info_cache, entry);
11910                         ASSERT(ret == 0);
11911                 } else {
11912                         rii = container_of(entry, struct root_item_info,
11913                                            cache_extent);
11914                 }
11915
11916                 ASSERT(rii->cache_extent.start == root_id);
11917                 ASSERT(rii->cache_extent.size == 1);
11918
11919                 if (level > rii->level || rii->level == (u8)-1) {
11920                         rii->level = level;
11921                         rii->bytenr = found_key.objectid;
11922                         rii->gen = btrfs_extent_generation(leaf, ei);
11923                         rii->node_count = 1;
11924                 } else if (level == rii->level) {
11925                         rii->node_count++;
11926                 }
11927 next:
11928                 path.slots[0]++;
11929         }
11930
11931 out:
11932         btrfs_release_path(&path);
11933
11934         return ret;
11935 }
11936
11937 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11938                                   struct btrfs_path *path,
11939                                   const struct btrfs_key *root_key,
11940                                   const int read_only_mode)
11941 {
11942         const u64 root_id = root_key->objectid;
11943         struct cache_extent *entry;
11944         struct root_item_info *rii;
11945         struct btrfs_root_item ri;
11946         unsigned long offset;
11947
11948         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11949         if (!entry) {
11950                 fprintf(stderr,
11951                         "Error: could not find extent items for root %llu\n",
11952                         root_key->objectid);
11953                 return -ENOENT;
11954         }
11955
11956         rii = container_of(entry, struct root_item_info, cache_extent);
11957         ASSERT(rii->cache_extent.start == root_id);
11958         ASSERT(rii->cache_extent.size == 1);
11959
11960         if (rii->node_count != 1) {
11961                 fprintf(stderr,
11962                         "Error: could not find btree root extent for root %llu\n",
11963                         root_id);
11964                 return -ENOENT;
11965         }
11966
11967         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11968         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11969
11970         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11971             btrfs_root_level(&ri) != rii->level ||
11972             btrfs_root_generation(&ri) != rii->gen) {
11973
11974                 /*
11975                  * If we're in repair mode but our caller told us to not update
11976                  * the root item, i.e. just check if it needs to be updated, don't
11977                  * print this message, since the caller will call us again shortly
11978                  * for the same root item without read only mode (the caller will
11979                  * open a transaction first).
11980                  */
11981                 if (!(read_only_mode && repair))
11982                         fprintf(stderr,
11983                                 "%sroot item for root %llu,"
11984                                 " current bytenr %llu, current gen %llu, current level %u,"
11985                                 " new bytenr %llu, new gen %llu, new level %u\n",
11986                                 (read_only_mode ? "" : "fixing "),
11987                                 root_id,
11988                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11989                                 btrfs_root_level(&ri),
11990                                 rii->bytenr, rii->gen, rii->level);
11991
11992                 if (btrfs_root_generation(&ri) > rii->gen) {
11993                         fprintf(stderr,
11994                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11995                                 root_id, btrfs_root_generation(&ri), rii->gen);
11996                         return -EINVAL;
11997                 }
11998
11999                 if (!read_only_mode) {
12000                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12001                         btrfs_set_root_level(&ri, rii->level);
12002                         btrfs_set_root_generation(&ri, rii->gen);
12003                         write_extent_buffer(path->nodes[0], &ri,
12004                                             offset, sizeof(ri));
12005                 }
12006
12007                 return 1;
12008         }
12009
12010         return 0;
12011 }
12012
12013 /*
12014  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12015  * caused read-only snapshots to be corrupted if they were created at a moment
12016  * when the source subvolume/snapshot had orphan items. The issue was that the
12017  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12018  * node instead of the post orphan cleanup root node.
12019  * So this function, and its callees, just detects and fixes those cases. Even
12020  * though the regression was for read-only snapshots, this function applies to
12021  * any snapshot/subvolume root.
12022  * This must be run before any other repair code - not doing it so, makes other
12023  * repair code delete or modify backrefs in the extent tree for example, which
12024  * will result in an inconsistent fs after repairing the root items.
12025  */
12026 static int repair_root_items(struct btrfs_fs_info *info)
12027 {
12028         struct btrfs_path path;
12029         struct btrfs_key key;
12030         struct extent_buffer *leaf;
12031         struct btrfs_trans_handle *trans = NULL;
12032         int ret = 0;
12033         int bad_roots = 0;
12034         int need_trans = 0;
12035
12036         btrfs_init_path(&path);
12037
12038         ret = build_roots_info_cache(info);
12039         if (ret)
12040                 goto out;
12041
12042         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12043         key.type = BTRFS_ROOT_ITEM_KEY;
12044         key.offset = 0;
12045
12046 again:
12047         /*
12048          * Avoid opening and committing transactions if a leaf doesn't have
12049          * any root items that need to be fixed, so that we avoid rotating
12050          * backup roots unnecessarily.
12051          */
12052         if (need_trans) {
12053                 trans = btrfs_start_transaction(info->tree_root, 1);
12054                 if (IS_ERR(trans)) {
12055                         ret = PTR_ERR(trans);
12056                         goto out;
12057                 }
12058         }
12059
12060         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12061                                 0, trans ? 1 : 0);
12062         if (ret < 0)
12063                 goto out;
12064         leaf = path.nodes[0];
12065
12066         while (1) {
12067                 struct btrfs_key found_key;
12068
12069                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12070                         int no_more_keys = find_next_key(&path, &key);
12071
12072                         btrfs_release_path(&path);
12073                         if (trans) {
12074                                 ret = btrfs_commit_transaction(trans,
12075                                                                info->tree_root);
12076                                 trans = NULL;
12077                                 if (ret < 0)
12078                                         goto out;
12079                         }
12080                         need_trans = 0;
12081                         if (no_more_keys)
12082                                 break;
12083                         goto again;
12084                 }
12085
12086                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12087
12088                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12089                         goto next;
12090                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12091                         goto next;
12092
12093                 ret = maybe_repair_root_item(info, &path, &found_key,
12094                                              trans ? 0 : 1);
12095                 if (ret < 0)
12096                         goto out;
12097                 if (ret) {
12098                         if (!trans && repair) {
12099                                 need_trans = 1;
12100                                 key = found_key;
12101                                 btrfs_release_path(&path);
12102                                 goto again;
12103                         }
12104                         bad_roots++;
12105                 }
12106 next:
12107                 path.slots[0]++;
12108         }
12109         ret = 0;
12110 out:
12111         free_roots_info_cache();
12112         btrfs_release_path(&path);
12113         if (trans)
12114                 btrfs_commit_transaction(trans, info->tree_root);
12115         if (ret < 0)
12116                 return ret;
12117
12118         return bad_roots;
12119 }
12120
12121 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12122 {
12123         struct btrfs_trans_handle *trans;
12124         struct btrfs_block_group_cache *bg_cache;
12125         u64 current = 0;
12126         int ret = 0;
12127
12128         /* Clear all free space cache inodes and its extent data */
12129         while (1) {
12130                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12131                 if (!bg_cache)
12132                         break;
12133                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12134                 if (ret < 0)
12135                         return ret;
12136                 current = bg_cache->key.objectid + bg_cache->key.offset;
12137         }
12138
12139         /* Don't forget to set cache_generation to -1 */
12140         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12141         if (IS_ERR(trans)) {
12142                 error("failed to update super block cache generation");
12143                 return PTR_ERR(trans);
12144         }
12145         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12146         btrfs_commit_transaction(trans, fs_info->tree_root);
12147
12148         return ret;
12149 }
12150
12151 const char * const cmd_check_usage[] = {
12152         "btrfs check [options] <device>",
12153         "Check structural integrity of a filesystem (unmounted).",
12154         "Check structural integrity of an unmounted filesystem. Verify internal",
12155         "trees' consistency and item connectivity. In the repair mode try to",
12156         "fix the problems found. ",
12157         "WARNING: the repair mode is considered dangerous",
12158         "",
12159         "-s|--super <superblock>     use this superblock copy",
12160         "-b|--backup                 use the first valid backup root copy",
12161         "--repair                    try to repair the filesystem",
12162         "--readonly                  run in read-only mode (default)",
12163         "--init-csum-tree            create a new CRC tree",
12164         "--init-extent-tree          create a new extent tree",
12165         "--mode <MODE>               allows choice of memory/IO trade-offs",
12166         "                            where MODE is one of:",
12167         "                            original - read inodes and extents to memory (requires",
12168         "                                       more memory, does less IO)",
12169         "                            lowmem   - try to use less memory but read blocks again",
12170         "                                       when needed",
12171         "--check-data-csum           verify checksums of data blocks",
12172         "-Q|--qgroup-report          print a report on qgroup consistency",
12173         "-E|--subvol-extents <subvolid>",
12174         "                            print subvolume extents and sharing state",
12175         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12176         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12177         "-p|--progress               indicate progress",
12178         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12179         NULL
12180 };
12181
12182 int cmd_check(int argc, char **argv)
12183 {
12184         struct cache_tree root_cache;
12185         struct btrfs_root *root;
12186         struct btrfs_fs_info *info;
12187         u64 bytenr = 0;
12188         u64 subvolid = 0;
12189         u64 tree_root_bytenr = 0;
12190         u64 chunk_root_bytenr = 0;
12191         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12192         int ret;
12193         u64 num;
12194         int init_csum_tree = 0;
12195         int readonly = 0;
12196         int clear_space_cache = 0;
12197         int qgroup_report = 0;
12198         int qgroups_repaired = 0;
12199         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12200
12201         while(1) {
12202                 int c;
12203                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12204                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12205                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12206                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12207                 static const struct option long_options[] = {
12208                         { "super", required_argument, NULL, 's' },
12209                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12210                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12211                         { "init-csum-tree", no_argument, NULL,
12212                                 GETOPT_VAL_INIT_CSUM },
12213                         { "init-extent-tree", no_argument, NULL,
12214                                 GETOPT_VAL_INIT_EXTENT },
12215                         { "check-data-csum", no_argument, NULL,
12216                                 GETOPT_VAL_CHECK_CSUM },
12217                         { "backup", no_argument, NULL, 'b' },
12218                         { "subvol-extents", required_argument, NULL, 'E' },
12219                         { "qgroup-report", no_argument, NULL, 'Q' },
12220                         { "tree-root", required_argument, NULL, 'r' },
12221                         { "chunk-root", required_argument, NULL,
12222                                 GETOPT_VAL_CHUNK_TREE },
12223                         { "progress", no_argument, NULL, 'p' },
12224                         { "mode", required_argument, NULL,
12225                                 GETOPT_VAL_MODE },
12226                         { "clear-space-cache", required_argument, NULL,
12227                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12228                         { NULL, 0, NULL, 0}
12229                 };
12230
12231                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12232                 if (c < 0)
12233                         break;
12234                 switch(c) {
12235                         case 'a': /* ignored */ break;
12236                         case 'b':
12237                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12238                                 break;
12239                         case 's':
12240                                 num = arg_strtou64(optarg);
12241                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12242                                         error(
12243                                         "super mirror should be less than %d",
12244                                                 BTRFS_SUPER_MIRROR_MAX);
12245                                         exit(1);
12246                                 }
12247                                 bytenr = btrfs_sb_offset(((int)num));
12248                                 printf("using SB copy %llu, bytenr %llu\n", num,
12249                                        (unsigned long long)bytenr);
12250                                 break;
12251                         case 'Q':
12252                                 qgroup_report = 1;
12253                                 break;
12254                         case 'E':
12255                                 subvolid = arg_strtou64(optarg);
12256                                 break;
12257                         case 'r':
12258                                 tree_root_bytenr = arg_strtou64(optarg);
12259                                 break;
12260                         case GETOPT_VAL_CHUNK_TREE:
12261                                 chunk_root_bytenr = arg_strtou64(optarg);
12262                                 break;
12263                         case 'p':
12264                                 ctx.progress_enabled = true;
12265                                 break;
12266                         case '?':
12267                         case 'h':
12268                                 usage(cmd_check_usage);
12269                         case GETOPT_VAL_REPAIR:
12270                                 printf("enabling repair mode\n");
12271                                 repair = 1;
12272                                 ctree_flags |= OPEN_CTREE_WRITES;
12273                                 break;
12274                         case GETOPT_VAL_READONLY:
12275                                 readonly = 1;
12276                                 break;
12277                         case GETOPT_VAL_INIT_CSUM:
12278                                 printf("Creating a new CRC tree\n");
12279                                 init_csum_tree = 1;
12280                                 repair = 1;
12281                                 ctree_flags |= OPEN_CTREE_WRITES;
12282                                 break;
12283                         case GETOPT_VAL_INIT_EXTENT:
12284                                 init_extent_tree = 1;
12285                                 ctree_flags |= (OPEN_CTREE_WRITES |
12286                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12287                                 repair = 1;
12288                                 break;
12289                         case GETOPT_VAL_CHECK_CSUM:
12290                                 check_data_csum = 1;
12291                                 break;
12292                         case GETOPT_VAL_MODE:
12293                                 check_mode = parse_check_mode(optarg);
12294                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12295                                         error("unknown mode: %s", optarg);
12296                                         exit(1);
12297                                 }
12298                                 break;
12299                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12300                                 if (strcmp(optarg, "v1") == 0) {
12301                                         clear_space_cache = 1;
12302                                 } else if (strcmp(optarg, "v2") == 0) {
12303                                         clear_space_cache = 2;
12304                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12305                                 } else {
12306                                         error(
12307                 "invalid argument to --clear-space-cache, must be v1 or v2");
12308                                         exit(1);
12309                                 }
12310                                 ctree_flags |= OPEN_CTREE_WRITES;
12311                                 break;
12312                 }
12313         }
12314
12315         if (check_argc_exact(argc - optind, 1))
12316                 usage(cmd_check_usage);
12317
12318         if (ctx.progress_enabled) {
12319                 ctx.tp = TASK_NOTHING;
12320                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12321         }
12322
12323         /* This check is the only reason for --readonly to exist */
12324         if (readonly && repair) {
12325                 error("repair options are not compatible with --readonly");
12326                 exit(1);
12327         }
12328
12329         /*
12330          * Not supported yet
12331          */
12332         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12333                 error("low memory mode doesn't support repair yet");
12334                 exit(1);
12335         }
12336
12337         radix_tree_init();
12338         cache_tree_init(&root_cache);
12339
12340         if((ret = check_mounted(argv[optind])) < 0) {
12341                 error("could not check mount status: %s", strerror(-ret));
12342                 goto err_out;
12343         } else if(ret) {
12344                 error("%s is currently mounted, aborting", argv[optind]);
12345                 ret = -EBUSY;
12346                 goto err_out;
12347         }
12348
12349         /* only allow partial opening under repair mode */
12350         if (repair)
12351                 ctree_flags |= OPEN_CTREE_PARTIAL;
12352
12353         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12354                                   chunk_root_bytenr, ctree_flags);
12355         if (!info) {
12356                 error("cannot open file system");
12357                 ret = -EIO;
12358                 goto err_out;
12359         }
12360
12361         global_info = info;
12362         root = info->fs_root;
12363         if (clear_space_cache == 1) {
12364                 if (btrfs_fs_compat_ro(info,
12365                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12366                         error(
12367                 "free space cache v2 detected, use --clear-space-cache v2");
12368                         ret = 1;
12369                         goto close_out;
12370                 }
12371                 printf("Clearing free space cache\n");
12372                 ret = clear_free_space_cache(info);
12373                 if (ret) {
12374                         error("failed to clear free space cache");
12375                         ret = 1;
12376                 } else {
12377                         printf("Free space cache cleared\n");
12378                 }
12379                 goto close_out;
12380         } else if (clear_space_cache == 2) {
12381                 if (!btrfs_fs_compat_ro(info,
12382                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12383                         printf("no free space cache v2 to clear\n");
12384                         ret = 0;
12385                         goto close_out;
12386                 }
12387                 printf("Clear free space cache v2\n");
12388                 ret = btrfs_clear_free_space_tree(info);
12389                 if (ret) {
12390                         error("failed to clear free space cache v2: %d", ret);
12391                         ret = 1;
12392                 } else {
12393                         printf("free space cache v2 cleared\n");
12394                 }
12395                 goto close_out;
12396         }
12397
12398         /*
12399          * repair mode will force us to commit transaction which
12400          * will make us fail to load log tree when mounting.
12401          */
12402         if (repair && btrfs_super_log_root(info->super_copy)) {
12403                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12404                 if (!ret) {
12405                         ret = 1;
12406                         goto close_out;
12407                 }
12408                 ret = zero_log_tree(root);
12409                 if (ret) {
12410                         error("failed to zero log tree: %d", ret);
12411                         goto close_out;
12412                 }
12413         }
12414
12415         uuid_unparse(info->super_copy->fsid, uuidbuf);
12416         if (qgroup_report) {
12417                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12418                        uuidbuf);
12419                 ret = qgroup_verify_all(info);
12420                 if (ret == 0)
12421                         report_qgroups(1);
12422                 goto close_out;
12423         }
12424         if (subvolid) {
12425                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12426                        subvolid, argv[optind], uuidbuf);
12427                 ret = print_extent_state(info, subvolid);
12428                 goto close_out;
12429         }
12430         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12431
12432         if (!extent_buffer_uptodate(info->tree_root->node) ||
12433             !extent_buffer_uptodate(info->dev_root->node) ||
12434             !extent_buffer_uptodate(info->chunk_root->node)) {
12435                 error("critical roots corrupted, unable to check the filesystem");
12436                 ret = -EIO;
12437                 goto close_out;
12438         }
12439
12440         if (init_extent_tree || init_csum_tree) {
12441                 struct btrfs_trans_handle *trans;
12442
12443                 trans = btrfs_start_transaction(info->extent_root, 0);
12444                 if (IS_ERR(trans)) {
12445                         error("error starting transaction");
12446                         ret = PTR_ERR(trans);
12447                         goto close_out;
12448                 }
12449
12450                 if (init_extent_tree) {
12451                         printf("Creating a new extent tree\n");
12452                         ret = reinit_extent_tree(trans, info);
12453                         if (ret)
12454                                 goto close_out;
12455                 }
12456
12457                 if (init_csum_tree) {
12458                         printf("Reinitialize checksum tree\n");
12459                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12460                         if (ret) {
12461                                 error("checksum tree initialization failed: %d",
12462                                                 ret);
12463                                 ret = -EIO;
12464                                 goto close_out;
12465                         }
12466
12467                         ret = fill_csum_tree(trans, info->csum_root,
12468                                              init_extent_tree);
12469                         if (ret) {
12470                                 error("checksum tree refilling failed: %d", ret);
12471                                 return -EIO;
12472                         }
12473                 }
12474                 /*
12475                  * Ok now we commit and run the normal fsck, which will add
12476                  * extent entries for all of the items it finds.
12477                  */
12478                 ret = btrfs_commit_transaction(trans, info->extent_root);
12479                 if (ret)
12480                         goto close_out;
12481         }
12482         if (!extent_buffer_uptodate(info->extent_root->node)) {
12483                 error("critical: extent_root, unable to check the filesystem");
12484                 ret = -EIO;
12485                 goto close_out;
12486         }
12487         if (!extent_buffer_uptodate(info->csum_root->node)) {
12488                 error("critical: csum_root, unable to check the filesystem");
12489                 ret = -EIO;
12490                 goto close_out;
12491         }
12492
12493         if (!ctx.progress_enabled)
12494                 fprintf(stderr, "checking extents\n");
12495         if (check_mode == CHECK_MODE_LOWMEM)
12496                 ret = check_chunks_and_extents_v2(root);
12497         else
12498                 ret = check_chunks_and_extents(root);
12499         if (ret)
12500                 error(
12501                 "errors found in extent allocation tree or chunk allocation");
12502
12503         ret = repair_root_items(info);
12504         if (ret < 0)
12505                 goto close_out;
12506         if (repair) {
12507                 fprintf(stderr, "Fixed %d roots.\n", ret);
12508                 ret = 0;
12509         } else if (ret > 0) {
12510                 fprintf(stderr,
12511                        "Found %d roots with an outdated root item.\n",
12512                        ret);
12513                 fprintf(stderr,
12514                         "Please run a filesystem check with the option --repair to fix them.\n");
12515                 ret = 1;
12516                 goto close_out;
12517         }
12518
12519         if (!ctx.progress_enabled) {
12520                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12521                         fprintf(stderr, "checking free space tree\n");
12522                 else
12523                         fprintf(stderr, "checking free space cache\n");
12524         }
12525         ret = check_space_cache(root);
12526         if (ret)
12527                 goto out;
12528
12529         /*
12530          * We used to have to have these hole extents in between our real
12531          * extents so if we don't have this flag set we need to make sure there
12532          * are no gaps in the file extents for inodes, otherwise we can just
12533          * ignore it when this happens.
12534          */
12535         no_holes = btrfs_fs_incompat(root->fs_info,
12536                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12537         if (!ctx.progress_enabled)
12538                 fprintf(stderr, "checking fs roots\n");
12539         ret = check_fs_roots(root, &root_cache);
12540         if (ret)
12541                 goto out;
12542
12543         fprintf(stderr, "checking csums\n");
12544         ret = check_csums(root);
12545         if (ret)
12546                 goto out;
12547
12548         fprintf(stderr, "checking root refs\n");
12549         ret = check_root_refs(root, &root_cache);
12550         if (ret)
12551                 goto out;
12552
12553         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12554                 struct extent_buffer *eb;
12555
12556                 eb = list_first_entry(&root->fs_info->recow_ebs,
12557                                       struct extent_buffer, recow);
12558                 list_del_init(&eb->recow);
12559                 ret = recow_extent_buffer(root, eb);
12560                 if (ret)
12561                         break;
12562         }
12563
12564         while (!list_empty(&delete_items)) {
12565                 struct bad_item *bad;
12566
12567                 bad = list_first_entry(&delete_items, struct bad_item, list);
12568                 list_del_init(&bad->list);
12569                 if (repair)
12570                         ret = delete_bad_item(root, bad);
12571                 free(bad);
12572         }
12573
12574         if (info->quota_enabled) {
12575                 int err;
12576                 fprintf(stderr, "checking quota groups\n");
12577                 err = qgroup_verify_all(info);
12578                 if (err)
12579                         goto out;
12580                 report_qgroups(0);
12581                 err = repair_qgroups(info, &qgroups_repaired);
12582                 if (err)
12583                         goto out;
12584         }
12585
12586         if (!list_empty(&root->fs_info->recow_ebs)) {
12587                 error("transid errors in file system");
12588                 ret = 1;
12589         }
12590 out:
12591         /* Don't override original ret */
12592         if (!ret && qgroups_repaired)
12593                 ret = qgroups_repaired;
12594
12595         if (found_old_backref) { /*
12596                  * there was a disk format change when mixed
12597                  * backref was in testing tree. The old format
12598                  * existed about one week.
12599                  */
12600                 printf("\n * Found old mixed backref format. "
12601                        "The old format is not supported! *"
12602                        "\n * Please mount the FS in readonly mode, "
12603                        "backup data and re-format the FS. *\n\n");
12604                 ret = 1;
12605         }
12606         printf("found %llu bytes used err is %d\n",
12607                (unsigned long long)bytes_used, ret);
12608         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12609         printf("total tree bytes: %llu\n",
12610                (unsigned long long)total_btree_bytes);
12611         printf("total fs tree bytes: %llu\n",
12612                (unsigned long long)total_fs_tree_bytes);
12613         printf("total extent tree bytes: %llu\n",
12614                (unsigned long long)total_extent_tree_bytes);
12615         printf("btree space waste bytes: %llu\n",
12616                (unsigned long long)btree_space_waste);
12617         printf("file data blocks allocated: %llu\n referenced %llu\n",
12618                 (unsigned long long)data_bytes_allocated,
12619                 (unsigned long long)data_bytes_referenced);
12620
12621         free_qgroup_counts();
12622         free_root_recs_tree(&root_cache);
12623 close_out:
12624         close_ctree(root);
12625 err_out:
12626         if (ctx.progress_enabled)
12627                 task_deinit(ctx.info);
12628
12629         return ret;
12630 }