24c729bed832dfc86a36ddde4b12ed492db9b976
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         u8 filetype;
189         u8 ref_type;
190         int errors;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         struct rb_node *rb;
663         size_t size;
664         int ret;
665
666         rec = malloc(sizeof(*rec));
667         if (!rec)
668                 return ERR_PTR(-ENOMEM);
669         memcpy(rec, orig_rec, sizeof(*rec));
670         rec->refs = 1;
671         INIT_LIST_HEAD(&rec->backrefs);
672         INIT_LIST_HEAD(&rec->orphan_extents);
673         rec->holes = RB_ROOT;
674
675         list_for_each_entry(orig, &orig_rec->backrefs, list) {
676                 size = sizeof(*orig) + orig->namelen + 1;
677                 backref = malloc(size);
678                 if (!backref) {
679                         ret = -ENOMEM;
680                         goto cleanup;
681                 }
682                 memcpy(backref, orig, size);
683                 list_add_tail(&backref->list, &rec->backrefs);
684         }
685         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686                 dst_orphan = malloc(sizeof(*dst_orphan));
687                 if (!dst_orphan) {
688                         ret = -ENOMEM;
689                         goto cleanup;
690                 }
691                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693         }
694         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
695         if (ret < 0)
696                 goto cleanup_rb;
697
698         return rec;
699
700 cleanup_rb:
701         rb = rb_first(&rec->holes);
702         while (rb) {
703                 struct file_extent_hole *hole;
704
705                 hole = rb_entry(rb, struct file_extent_hole, node);
706                 rb = rb_next(rb);
707                 free(hole);
708         }
709
710 cleanup:
711         if (!list_empty(&rec->backrefs))
712                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713                         list_del(&orig->list);
714                         free(orig);
715                 }
716
717         if (!list_empty(&rec->orphan_extents))
718                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719                         list_del(&orig->list);
720                         free(orig);
721                 }
722
723         free(rec);
724
725         return ERR_PTR(ret);
726 }
727
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
729                                       u64 objectid)
730 {
731         struct orphan_data_extent *orphan;
732
733         if (list_empty(orphan_extents))
734                 return;
735         printf("The following data extent is lost in tree %llu:\n",
736                objectid);
737         list_for_each_entry(orphan, orphan_extents, list) {
738                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
740                        orphan->disk_len);
741         }
742 }
743
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
745 {
746         u64 root_objectid = root->root_key.objectid;
747         int errors = rec->errors;
748
749         if (!errors)
750                 return;
751         /* reloc root errors, we print its corresponding fs root objectid*/
752         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753                 root_objectid = root->root_key.offset;
754                 fprintf(stderr, "reloc");
755         }
756         fprintf(stderr, "root %llu inode %llu errors %x",
757                 (unsigned long long) root_objectid,
758                 (unsigned long long) rec->ino, rec->errors);
759
760         if (errors & I_ERR_NO_INODE_ITEM)
761                 fprintf(stderr, ", no inode item");
762         if (errors & I_ERR_NO_ORPHAN_ITEM)
763                 fprintf(stderr, ", no orphan item");
764         if (errors & I_ERR_DUP_INODE_ITEM)
765                 fprintf(stderr, ", dup inode item");
766         if (errors & I_ERR_DUP_DIR_INDEX)
767                 fprintf(stderr, ", dup dir index");
768         if (errors & I_ERR_ODD_DIR_ITEM)
769                 fprintf(stderr, ", odd dir item");
770         if (errors & I_ERR_ODD_FILE_EXTENT)
771                 fprintf(stderr, ", odd file extent");
772         if (errors & I_ERR_BAD_FILE_EXTENT)
773                 fprintf(stderr, ", bad file extent");
774         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775                 fprintf(stderr, ", file extent overlap");
776         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777                 fprintf(stderr, ", file extent discount");
778         if (errors & I_ERR_DIR_ISIZE_WRONG)
779                 fprintf(stderr, ", dir isize wrong");
780         if (errors & I_ERR_FILE_NBYTES_WRONG)
781                 fprintf(stderr, ", nbytes wrong");
782         if (errors & I_ERR_ODD_CSUM_ITEM)
783                 fprintf(stderr, ", odd csum item");
784         if (errors & I_ERR_SOME_CSUM_MISSING)
785                 fprintf(stderr, ", some csum missing");
786         if (errors & I_ERR_LINK_COUNT_WRONG)
787                 fprintf(stderr, ", link count wrong");
788         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789                 fprintf(stderr, ", orphan file extent");
790         fprintf(stderr, "\n");
791         /* Print the orphan extents if needed */
792         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
794
795         /* Print the holes if needed */
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797                 struct file_extent_hole *hole;
798                 struct rb_node *node;
799                 int found = 0;
800
801                 node = rb_first(&rec->holes);
802                 fprintf(stderr, "Found file extent holes:\n");
803                 while (node) {
804                         found = 1;
805                         hole = rb_entry(node, struct file_extent_hole, node);
806                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
807                                 hole->start, hole->len);
808                         node = rb_next(node);
809                 }
810                 if (!found)
811                         fprintf(stderr, "\tstart: 0, len: %llu\n",
812                                 round_up(rec->isize, root->sectorsize));
813         }
814 }
815
816 static void print_ref_error(int errors)
817 {
818         if (errors & REF_ERR_NO_DIR_ITEM)
819                 fprintf(stderr, ", no dir item");
820         if (errors & REF_ERR_NO_DIR_INDEX)
821                 fprintf(stderr, ", no dir index");
822         if (errors & REF_ERR_NO_INODE_REF)
823                 fprintf(stderr, ", no inode ref");
824         if (errors & REF_ERR_DUP_DIR_ITEM)
825                 fprintf(stderr, ", dup dir item");
826         if (errors & REF_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & REF_ERR_DUP_INODE_REF)
829                 fprintf(stderr, ", dup inode ref");
830         if (errors & REF_ERR_INDEX_UNMATCH)
831                 fprintf(stderr, ", index mismatch");
832         if (errors & REF_ERR_FILETYPE_UNMATCH)
833                 fprintf(stderr, ", filetype mismatch");
834         if (errors & REF_ERR_NAME_TOO_LONG)
835                 fprintf(stderr, ", name too long");
836         if (errors & REF_ERR_NO_ROOT_REF)
837                 fprintf(stderr, ", no root ref");
838         if (errors & REF_ERR_NO_ROOT_BACKREF)
839                 fprintf(stderr, ", no root backref");
840         if (errors & REF_ERR_DUP_ROOT_REF)
841                 fprintf(stderr, ", dup root ref");
842         if (errors & REF_ERR_DUP_ROOT_BACKREF)
843                 fprintf(stderr, ", dup root backref");
844         fprintf(stderr, "\n");
845 }
846
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
848                                           u64 ino, int mod)
849 {
850         struct ptr_node *node;
851         struct cache_extent *cache;
852         struct inode_record *rec = NULL;
853         int ret;
854
855         cache = lookup_cache_extent(inode_cache, ino, 1);
856         if (cache) {
857                 node = container_of(cache, struct ptr_node, cache);
858                 rec = node->data;
859                 if (mod && rec->refs > 1) {
860                         node->data = clone_inode_rec(rec);
861                         if (IS_ERR(node->data))
862                                 return node->data;
863                         rec->refs--;
864                         rec = node->data;
865                 }
866         } else if (mod) {
867                 rec = calloc(1, sizeof(*rec));
868                 if (!rec)
869                         return ERR_PTR(-ENOMEM);
870                 rec->ino = ino;
871                 rec->extent_start = (u64)-1;
872                 rec->refs = 1;
873                 INIT_LIST_HEAD(&rec->backrefs);
874                 INIT_LIST_HEAD(&rec->orphan_extents);
875                 rec->holes = RB_ROOT;
876
877                 node = malloc(sizeof(*node));
878                 if (!node) {
879                         free(rec);
880                         return ERR_PTR(-ENOMEM);
881                 }
882                 node->cache.start = ino;
883                 node->cache.size = 1;
884                 node->data = rec;
885
886                 if (ino == BTRFS_FREE_INO_OBJECTID)
887                         rec->found_link = 1;
888
889                 ret = insert_cache_extent(inode_cache, &node->cache);
890                 if (ret)
891                         return ERR_PTR(-EEXIST);
892         }
893         return rec;
894 }
895
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
897 {
898         struct orphan_data_extent *orphan;
899
900         while (!list_empty(orphan_extents)) {
901                 orphan = list_entry(orphan_extents->next,
902                                     struct orphan_data_extent, list);
903                 list_del(&orphan->list);
904                 free(orphan);
905         }
906 }
907
908 static void free_inode_rec(struct inode_record *rec)
909 {
910         struct inode_backref *backref;
911
912         if (--rec->refs > 0)
913                 return;
914
915         while (!list_empty(&rec->backrefs)) {
916                 backref = to_inode_backref(rec->backrefs.next);
917                 list_del(&backref->list);
918                 free(backref);
919         }
920         free_orphan_data_extents(&rec->orphan_extents);
921         free_file_extent_holes(&rec->holes);
922         free(rec);
923 }
924
925 static int can_free_inode_rec(struct inode_record *rec)
926 {
927         if (!rec->errors && rec->checked && rec->found_inode_item &&
928             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
929                 return 1;
930         return 0;
931 }
932
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934                                  struct inode_record *rec)
935 {
936         struct cache_extent *cache;
937         struct inode_backref *tmp, *backref;
938         struct ptr_node *node;
939         u8 filetype;
940
941         if (!rec->found_inode_item)
942                 return;
943
944         filetype = imode_to_type(rec->imode);
945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946                 if (backref->found_dir_item && backref->found_dir_index) {
947                         if (backref->filetype != filetype)
948                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949                         if (!backref->errors && backref->found_inode_ref &&
950                             rec->nlink == rec->found_link) {
951                                 list_del(&backref->list);
952                                 free(backref);
953                         }
954                 }
955         }
956
957         if (!rec->checked || rec->merging)
958                 return;
959
960         if (S_ISDIR(rec->imode)) {
961                 if (rec->found_size != rec->isize)
962                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963                 if (rec->found_file_extent)
964                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
965         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966                 if (rec->found_dir_item)
967                         rec->errors |= I_ERR_ODD_DIR_ITEM;
968                 if (rec->found_size != rec->nbytes)
969                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970                 if (rec->nlink > 0 && !no_holes &&
971                     (rec->extent_end < rec->isize ||
972                      first_extent_gap(&rec->holes) < rec->isize))
973                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
974         }
975
976         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977                 if (rec->found_csum_item && rec->nodatasum)
978                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
979                 if (rec->some_csum_missing && !rec->nodatasum)
980                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
981         }
982
983         BUG_ON(rec->refs != 1);
984         if (can_free_inode_rec(rec)) {
985                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986                 node = container_of(cache, struct ptr_node, cache);
987                 BUG_ON(node->data != rec);
988                 remove_cache_extent(inode_cache, &node->cache);
989                 free(node);
990                 free_inode_rec(rec);
991         }
992 }
993
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
995 {
996         struct btrfs_path path;
997         struct btrfs_key key;
998         int ret;
999
1000         key.objectid = BTRFS_ORPHAN_OBJECTID;
1001         key.type = BTRFS_ORPHAN_ITEM_KEY;
1002         key.offset = ino;
1003
1004         btrfs_init_path(&path);
1005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006         btrfs_release_path(&path);
1007         if (ret > 0)
1008                 ret = -ENOENT;
1009         return ret;
1010 }
1011
1012 static int process_inode_item(struct extent_buffer *eb,
1013                               int slot, struct btrfs_key *key,
1014                               struct shared_node *active_node)
1015 {
1016         struct inode_record *rec;
1017         struct btrfs_inode_item *item;
1018
1019         rec = active_node->current;
1020         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021         if (rec->found_inode_item) {
1022                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1023                 return 1;
1024         }
1025         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026         rec->nlink = btrfs_inode_nlink(eb, item);
1027         rec->isize = btrfs_inode_size(eb, item);
1028         rec->nbytes = btrfs_inode_nbytes(eb, item);
1029         rec->imode = btrfs_inode_mode(eb, item);
1030         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1031                 rec->nodatasum = 1;
1032         rec->found_inode_item = 1;
1033         if (rec->nlink == 0)
1034                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035         maybe_free_inode_rec(&active_node->inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1040                                                 const char *name,
1041                                                 int namelen, u64 dir)
1042 {
1043         struct inode_backref *backref;
1044
1045         list_for_each_entry(backref, &rec->backrefs, list) {
1046                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1047                         break;
1048                 if (backref->dir != dir || backref->namelen != namelen)
1049                         continue;
1050                 if (memcmp(name, backref->name, namelen))
1051                         continue;
1052                 return backref;
1053         }
1054
1055         backref = malloc(sizeof(*backref) + namelen + 1);
1056         if (!backref)
1057                 return NULL;
1058         memset(backref, 0, sizeof(*backref));
1059         backref->dir = dir;
1060         backref->namelen = namelen;
1061         memcpy(backref->name, name, namelen);
1062         backref->name[namelen] = '\0';
1063         list_add_tail(&backref->list, &rec->backrefs);
1064         return backref;
1065 }
1066
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068                              u64 ino, u64 dir, u64 index,
1069                              const char *name, int namelen,
1070                              u8 filetype, u8 itemtype, int errors)
1071 {
1072         struct inode_record *rec;
1073         struct inode_backref *backref;
1074
1075         rec = get_inode_rec(inode_cache, ino, 1);
1076         BUG_ON(IS_ERR(rec));
1077         backref = get_inode_backref(rec, name, namelen, dir);
1078         BUG_ON(!backref);
1079         if (errors)
1080                 backref->errors |= errors;
1081         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082                 if (backref->found_dir_index)
1083                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084                 if (backref->found_inode_ref && backref->index != index)
1085                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1086                 if (backref->found_dir_item && backref->filetype != filetype)
1087                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1088
1089                 backref->index = index;
1090                 backref->filetype = filetype;
1091                 backref->found_dir_index = 1;
1092         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1093                 rec->found_link++;
1094                 if (backref->found_dir_item)
1095                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096                 if (backref->found_dir_index && backref->filetype != filetype)
1097                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1098
1099                 backref->filetype = filetype;
1100                 backref->found_dir_item = 1;
1101         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103                 if (backref->found_inode_ref)
1104                         backref->errors |= REF_ERR_DUP_INODE_REF;
1105                 if (backref->found_dir_index && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 else
1108                         backref->index = index;
1109
1110                 backref->ref_type = itemtype;
1111                 backref->found_inode_ref = 1;
1112         } else {
1113                 BUG_ON(1);
1114         }
1115
1116         maybe_free_inode_rec(inode_cache, rec);
1117         return 0;
1118 }
1119
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121                             struct cache_tree *dst_cache)
1122 {
1123         struct inode_backref *backref;
1124         u32 dir_count = 0;
1125         int ret = 0;
1126
1127         dst->merging = 1;
1128         list_for_each_entry(backref, &src->backrefs, list) {
1129                 if (backref->found_dir_index) {
1130                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1131                                         backref->index, backref->name,
1132                                         backref->namelen, backref->filetype,
1133                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1134                 }
1135                 if (backref->found_dir_item) {
1136                         dir_count++;
1137                         add_inode_backref(dst_cache, dst->ino,
1138                                         backref->dir, 0, backref->name,
1139                                         backref->namelen, backref->filetype,
1140                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1141                 }
1142                 if (backref->found_inode_ref) {
1143                         add_inode_backref(dst_cache, dst->ino,
1144                                         backref->dir, backref->index,
1145                                         backref->name, backref->namelen, 0,
1146                                         backref->ref_type, backref->errors);
1147                 }
1148         }
1149
1150         if (src->found_dir_item)
1151                 dst->found_dir_item = 1;
1152         if (src->found_file_extent)
1153                 dst->found_file_extent = 1;
1154         if (src->found_csum_item)
1155                 dst->found_csum_item = 1;
1156         if (src->some_csum_missing)
1157                 dst->some_csum_missing = 1;
1158         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1160                 if (ret < 0)
1161                         return ret;
1162         }
1163
1164         BUG_ON(src->found_link < dir_count);
1165         dst->found_link += src->found_link - dir_count;
1166         dst->found_size += src->found_size;
1167         if (src->extent_start != (u64)-1) {
1168                 if (dst->extent_start == (u64)-1) {
1169                         dst->extent_start = src->extent_start;
1170                         dst->extent_end = src->extent_end;
1171                 } else {
1172                         if (dst->extent_end > src->extent_start)
1173                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174                         else if (dst->extent_end < src->extent_start) {
1175                                 ret = add_file_extent_hole(&dst->holes,
1176                                         dst->extent_end,
1177                                         src->extent_start - dst->extent_end);
1178                         }
1179                         if (dst->extent_end < src->extent_end)
1180                                 dst->extent_end = src->extent_end;
1181                 }
1182         }
1183
1184         dst->errors |= src->errors;
1185         if (src->found_inode_item) {
1186                 if (!dst->found_inode_item) {
1187                         dst->nlink = src->nlink;
1188                         dst->isize = src->isize;
1189                         dst->nbytes = src->nbytes;
1190                         dst->imode = src->imode;
1191                         dst->nodatasum = src->nodatasum;
1192                         dst->found_inode_item = 1;
1193                 } else {
1194                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1195                 }
1196         }
1197         dst->merging = 0;
1198
1199         return 0;
1200 }
1201
1202 static int splice_shared_node(struct shared_node *src_node,
1203                               struct shared_node *dst_node)
1204 {
1205         struct cache_extent *cache;
1206         struct ptr_node *node, *ins;
1207         struct cache_tree *src, *dst;
1208         struct inode_record *rec, *conflict;
1209         u64 current_ino = 0;
1210         int splice = 0;
1211         int ret;
1212
1213         if (--src_node->refs == 0)
1214                 splice = 1;
1215         if (src_node->current)
1216                 current_ino = src_node->current->ino;
1217
1218         src = &src_node->root_cache;
1219         dst = &dst_node->root_cache;
1220 again:
1221         cache = search_cache_extent(src, 0);
1222         while (cache) {
1223                 node = container_of(cache, struct ptr_node, cache);
1224                 rec = node->data;
1225                 cache = next_cache_extent(cache);
1226
1227                 if (splice) {
1228                         remove_cache_extent(src, &node->cache);
1229                         ins = node;
1230                 } else {
1231                         ins = malloc(sizeof(*ins));
1232                         BUG_ON(!ins);
1233                         ins->cache.start = node->cache.start;
1234                         ins->cache.size = node->cache.size;
1235                         ins->data = rec;
1236                         rec->refs++;
1237                 }
1238                 ret = insert_cache_extent(dst, &ins->cache);
1239                 if (ret == -EEXIST) {
1240                         conflict = get_inode_rec(dst, rec->ino, 1);
1241                         BUG_ON(IS_ERR(conflict));
1242                         merge_inode_recs(rec, conflict, dst);
1243                         if (rec->checked) {
1244                                 conflict->checked = 1;
1245                                 if (dst_node->current == conflict)
1246                                         dst_node->current = NULL;
1247                         }
1248                         maybe_free_inode_rec(dst, conflict);
1249                         free_inode_rec(rec);
1250                         free(ins);
1251                 } else {
1252                         BUG_ON(ret);
1253                 }
1254         }
1255
1256         if (src == &src_node->root_cache) {
1257                 src = &src_node->inode_cache;
1258                 dst = &dst_node->inode_cache;
1259                 goto again;
1260         }
1261
1262         if (current_ino > 0 && (!dst_node->current ||
1263             current_ino > dst_node->current->ino)) {
1264                 if (dst_node->current) {
1265                         dst_node->current->checked = 1;
1266                         maybe_free_inode_rec(dst, dst_node->current);
1267                 }
1268                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269                 BUG_ON(IS_ERR(dst_node->current));
1270         }
1271         return 0;
1272 }
1273
1274 static void free_inode_ptr(struct cache_extent *cache)
1275 {
1276         struct ptr_node *node;
1277         struct inode_record *rec;
1278
1279         node = container_of(cache, struct ptr_node, cache);
1280         rec = node->data;
1281         free_inode_rec(rec);
1282         free(node);
1283 }
1284
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1286
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1288                                             u64 bytenr)
1289 {
1290         struct cache_extent *cache;
1291         struct shared_node *node;
1292
1293         cache = lookup_cache_extent(shared, bytenr, 1);
1294         if (cache) {
1295                 node = container_of(cache, struct shared_node, cache);
1296                 return node;
1297         }
1298         return NULL;
1299 }
1300
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1302 {
1303         int ret;
1304         struct shared_node *node;
1305
1306         node = calloc(1, sizeof(*node));
1307         if (!node)
1308                 return -ENOMEM;
1309         node->cache.start = bytenr;
1310         node->cache.size = 1;
1311         cache_tree_init(&node->root_cache);
1312         cache_tree_init(&node->inode_cache);
1313         node->refs = refs;
1314
1315         ret = insert_cache_extent(shared, &node->cache);
1316
1317         return ret;
1318 }
1319
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321                              struct walk_control *wc, int level)
1322 {
1323         struct shared_node *node;
1324         struct shared_node *dest;
1325         int ret;
1326
1327         if (level == wc->active_node)
1328                 return 0;
1329
1330         BUG_ON(wc->active_node <= level);
1331         node = find_shared_node(&wc->shared, bytenr);
1332         if (!node) {
1333                 ret = add_shared_node(&wc->shared, bytenr, refs);
1334                 BUG_ON(ret);
1335                 node = find_shared_node(&wc->shared, bytenr);
1336                 wc->nodes[level] = node;
1337                 wc->active_node = level;
1338                 return 0;
1339         }
1340
1341         if (wc->root_level == wc->active_node &&
1342             btrfs_root_refs(&root->root_item) == 0) {
1343                 if (--node->refs == 0) {
1344                         free_inode_recs_tree(&node->root_cache);
1345                         free_inode_recs_tree(&node->inode_cache);
1346                         remove_cache_extent(&wc->shared, &node->cache);
1347                         free(node);
1348                 }
1349                 return 1;
1350         }
1351
1352         dest = wc->nodes[wc->active_node];
1353         splice_shared_node(node, dest);
1354         if (node->refs == 0) {
1355                 remove_cache_extent(&wc->shared, &node->cache);
1356                 free(node);
1357         }
1358         return 1;
1359 }
1360
1361 static int leave_shared_node(struct btrfs_root *root,
1362                              struct walk_control *wc, int level)
1363 {
1364         struct shared_node *node;
1365         struct shared_node *dest;
1366         int i;
1367
1368         if (level == wc->root_level)
1369                 return 0;
1370
1371         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1372                 if (wc->nodes[i])
1373                         break;
1374         }
1375         BUG_ON(i >= BTRFS_MAX_LEVEL);
1376
1377         node = wc->nodes[wc->active_node];
1378         wc->nodes[wc->active_node] = NULL;
1379         wc->active_node = i;
1380
1381         dest = wc->nodes[wc->active_node];
1382         if (wc->active_node < wc->root_level ||
1383             btrfs_root_refs(&root->root_item) > 0) {
1384                 BUG_ON(node->refs <= 1);
1385                 splice_shared_node(node, dest);
1386         } else {
1387                 BUG_ON(node->refs < 2);
1388                 node->refs--;
1389         }
1390         return 0;
1391 }
1392
1393 /*
1394  * Returns:
1395  * < 0 - on error
1396  * 1   - if the root with id child_root_id is a child of root parent_root_id
1397  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1398  *       has other root(s) as parent(s)
1399  * 2   - if the root child_root_id doesn't have any parent roots
1400  */
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1402                          u64 child_root_id)
1403 {
1404         struct btrfs_path path;
1405         struct btrfs_key key;
1406         struct extent_buffer *leaf;
1407         int has_parent = 0;
1408         int ret;
1409
1410         btrfs_init_path(&path);
1411
1412         key.objectid = parent_root_id;
1413         key.type = BTRFS_ROOT_REF_KEY;
1414         key.offset = child_root_id;
1415         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1416                                 0, 0);
1417         if (ret < 0)
1418                 return ret;
1419         btrfs_release_path(&path);
1420         if (!ret)
1421                 return 1;
1422
1423         key.objectid = child_root_id;
1424         key.type = BTRFS_ROOT_BACKREF_KEY;
1425         key.offset = 0;
1426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1427                                 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430
1431         while (1) {
1432                 leaf = path.nodes[0];
1433                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1435                         if (ret)
1436                                 break;
1437                         leaf = path.nodes[0];
1438                 }
1439
1440                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441                 if (key.objectid != child_root_id ||
1442                     key.type != BTRFS_ROOT_BACKREF_KEY)
1443                         break;
1444
1445                 has_parent = 1;
1446
1447                 if (key.offset == parent_root_id) {
1448                         btrfs_release_path(&path);
1449                         return 1;
1450                 }
1451
1452                 path.slots[0]++;
1453         }
1454 out:
1455         btrfs_release_path(&path);
1456         if (ret < 0)
1457                 return ret;
1458         return has_parent ? 0 : 2;
1459 }
1460
1461 static int process_dir_item(struct btrfs_root *root,
1462                             struct extent_buffer *eb,
1463                             int slot, struct btrfs_key *key,
1464                             struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u32 data_len;
1471         int error;
1472         int nritems = 0;
1473         u8 filetype;
1474         struct btrfs_dir_item *di;
1475         struct inode_record *rec;
1476         struct cache_tree *root_cache;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_key location;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         root_cache = &active_node->root_cache;
1482         inode_cache = &active_node->inode_cache;
1483         rec = active_node->current;
1484         rec->found_dir_item = 1;
1485
1486         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 nritems++;
1490                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491                 name_len = btrfs_dir_name_len(eb, di);
1492                 data_len = btrfs_dir_data_len(eb, di);
1493                 filetype = btrfs_dir_type(eb, di);
1494
1495                 rec->found_size += name_len;
1496                 if (name_len <= BTRFS_NAME_LEN) {
1497                         len = name_len;
1498                         error = 0;
1499                 } else {
1500                         len = BTRFS_NAME_LEN;
1501                         error = REF_ERR_NAME_TOO_LONG;
1502                 }
1503                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1504
1505                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506                         add_inode_backref(inode_cache, location.objectid,
1507                                           key->objectid, key->offset, namebuf,
1508                                           len, filetype, key->type, error);
1509                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510                         add_inode_backref(root_cache, location.objectid,
1511                                           key->objectid, key->offset,
1512                                           namebuf, len, filetype,
1513                                           key->type, error);
1514                 } else {
1515                         fprintf(stderr, "invalid location in dir item %u\n",
1516                                 location.type);
1517                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518                                           key->objectid, key->offset, namebuf,
1519                                           len, filetype, key->type, error);
1520                 }
1521
1522                 len = sizeof(*di) + name_len + data_len;
1523                 di = (struct btrfs_dir_item *)((char *)di + len);
1524                 cur += len;
1525         }
1526         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1528
1529         return 0;
1530 }
1531
1532 static int process_inode_ref(struct extent_buffer *eb,
1533                              int slot, struct btrfs_key *key,
1534                              struct shared_node *active_node)
1535 {
1536         u32 total;
1537         u32 cur = 0;
1538         u32 len;
1539         u32 name_len;
1540         u64 index;
1541         int error;
1542         struct cache_tree *inode_cache;
1543         struct btrfs_inode_ref *ref;
1544         char namebuf[BTRFS_NAME_LEN];
1545
1546         inode_cache = &active_node->inode_cache;
1547
1548         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549         total = btrfs_item_size_nr(eb, slot);
1550         while (cur < total) {
1551                 name_len = btrfs_inode_ref_name_len(eb, ref);
1552                 index = btrfs_inode_ref_index(eb, ref);
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561                 add_inode_backref(inode_cache, key->objectid, key->offset,
1562                                   index, namebuf, len, 0, key->type, error);
1563
1564                 len = sizeof(*ref) + name_len;
1565                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1566                 cur += len;
1567         }
1568         return 0;
1569 }
1570
1571 static int process_inode_extref(struct extent_buffer *eb,
1572                                 int slot, struct btrfs_key *key,
1573                                 struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u64 index;
1580         u64 parent;
1581         int error;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_inode_extref *extref;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         inode_cache = &active_node->inode_cache;
1587
1588         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589         total = btrfs_item_size_nr(eb, slot);
1590         while (cur < total) {
1591                 name_len = btrfs_inode_extref_name_len(eb, extref);
1592                 index = btrfs_inode_extref_index(eb, extref);
1593                 parent = btrfs_inode_extref_parent(eb, extref);
1594                 if (name_len <= BTRFS_NAME_LEN) {
1595                         len = name_len;
1596                         error = 0;
1597                 } else {
1598                         len = BTRFS_NAME_LEN;
1599                         error = REF_ERR_NAME_TOO_LONG;
1600                 }
1601                 read_extent_buffer(eb, namebuf,
1602                                    (unsigned long)(extref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, parent,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*extref) + name_len;
1607                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611
1612 }
1613
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615                             u64 len, u64 *found)
1616 {
1617         struct btrfs_key key;
1618         struct btrfs_path path;
1619         struct extent_buffer *leaf;
1620         int ret;
1621         size_t size;
1622         *found = 0;
1623         u64 csum_end;
1624         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1625
1626         btrfs_init_path(&path);
1627
1628         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1629         key.offset = start;
1630         key.type = BTRFS_EXTENT_CSUM_KEY;
1631
1632         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1633                                 &key, &path, 0, 0);
1634         if (ret < 0)
1635                 goto out;
1636         if (ret > 0 && path.slots[0] > 0) {
1637                 leaf = path.nodes[0];
1638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640                     key.type == BTRFS_EXTENT_CSUM_KEY)
1641                         path.slots[0]--;
1642         }
1643
1644         while (len > 0) {
1645                 leaf = path.nodes[0];
1646                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1648                         if (ret > 0)
1649                                 break;
1650                         else if (ret < 0)
1651                                 goto out;
1652                         leaf = path.nodes[0];
1653                 }
1654
1655                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657                     key.type != BTRFS_EXTENT_CSUM_KEY)
1658                         break;
1659
1660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661                 if (key.offset >= start + len)
1662                         break;
1663
1664                 if (key.offset > start)
1665                         start = key.offset;
1666
1667                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669                 if (csum_end > start) {
1670                         size = min(csum_end - start, len);
1671                         len -= size;
1672                         start += size;
1673                         *found += size;
1674                 }
1675
1676                 path.slots[0]++;
1677         }
1678 out:
1679         btrfs_release_path(&path);
1680         if (ret < 0)
1681                 return ret;
1682         return 0;
1683 }
1684
1685 static int process_file_extent(struct btrfs_root *root,
1686                                 struct extent_buffer *eb,
1687                                 int slot, struct btrfs_key *key,
1688                                 struct shared_node *active_node)
1689 {
1690         struct inode_record *rec;
1691         struct btrfs_file_extent_item *fi;
1692         u64 num_bytes = 0;
1693         u64 disk_bytenr = 0;
1694         u64 extent_offset = 0;
1695         u64 mask = root->sectorsize - 1;
1696         int extent_type;
1697         int ret;
1698
1699         rec = active_node->current;
1700         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701         rec->found_file_extent = 1;
1702
1703         if (rec->extent_start == (u64)-1) {
1704                 rec->extent_start = key->offset;
1705                 rec->extent_end = key->offset;
1706         }
1707
1708         if (rec->extent_end > key->offset)
1709                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710         else if (rec->extent_end < key->offset) {
1711                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712                                            key->offset - rec->extent_end);
1713                 if (ret < 0)
1714                         return ret;
1715         }
1716
1717         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718         extent_type = btrfs_file_extent_type(eb, fi);
1719
1720         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1722                 if (num_bytes == 0)
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 rec->found_size += num_bytes;
1725                 num_bytes = (num_bytes + mask) & ~mask;
1726         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730                 extent_offset = btrfs_file_extent_offset(eb, fi);
1731                 if (num_bytes == 0 || (num_bytes & mask))
1732                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733                 if (num_bytes + extent_offset >
1734                     btrfs_file_extent_ram_bytes(eb, fi))
1735                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737                     (btrfs_file_extent_compression(eb, fi) ||
1738                      btrfs_file_extent_encryption(eb, fi) ||
1739                      btrfs_file_extent_other_encoding(eb, fi)))
1740                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741                 if (disk_bytenr > 0)
1742                         rec->found_size += num_bytes;
1743         } else {
1744                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1745         }
1746         rec->extent_end = key->offset + num_bytes;
1747
1748         /*
1749          * The data reloc tree will copy full extents into its inode and then
1750          * copy the corresponding csums.  Because the extent it copied could be
1751          * a preallocated extent that hasn't been written to yet there may be no
1752          * csums to copy, ergo we won't have csums for our file extent.  This is
1753          * ok so just don't bother checking csums if the inode belongs to the
1754          * data reloc tree.
1755          */
1756         if (disk_bytenr > 0 &&
1757             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1758                 u64 found;
1759                 if (btrfs_file_extent_compression(eb, fi))
1760                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1761                 else
1762                         disk_bytenr += extent_offset;
1763
1764                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1765                 if (ret < 0)
1766                         return ret;
1767                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1768                         if (found > 0)
1769                                 rec->found_csum_item = 1;
1770                         if (found < num_bytes)
1771                                 rec->some_csum_missing = 1;
1772                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1773                         if (found > 0)
1774                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781                             struct walk_control *wc)
1782 {
1783         struct btrfs_key key;
1784         u32 nritems;
1785         int i;
1786         int ret = 0;
1787         struct cache_tree *inode_cache;
1788         struct shared_node *active_node;
1789
1790         if (wc->root_level == wc->active_node &&
1791             btrfs_root_refs(&root->root_item) == 0)
1792                 return 0;
1793
1794         active_node = wc->nodes[wc->active_node];
1795         inode_cache = &active_node->inode_cache;
1796         nritems = btrfs_header_nritems(eb);
1797         for (i = 0; i < nritems; i++) {
1798                 btrfs_item_key_to_cpu(eb, &key, i);
1799
1800                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1801                         continue;
1802                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1803                         continue;
1804
1805                 if (active_node->current == NULL ||
1806                     active_node->current->ino < key.objectid) {
1807                         if (active_node->current) {
1808                                 active_node->current->checked = 1;
1809                                 maybe_free_inode_rec(inode_cache,
1810                                                      active_node->current);
1811                         }
1812                         active_node->current = get_inode_rec(inode_cache,
1813                                                              key.objectid, 1);
1814                         BUG_ON(IS_ERR(active_node->current));
1815                 }
1816                 switch (key.type) {
1817                 case BTRFS_DIR_ITEM_KEY:
1818                 case BTRFS_DIR_INDEX_KEY:
1819                         ret = process_dir_item(root, eb, i, &key, active_node);
1820                         break;
1821                 case BTRFS_INODE_REF_KEY:
1822                         ret = process_inode_ref(eb, i, &key, active_node);
1823                         break;
1824                 case BTRFS_INODE_EXTREF_KEY:
1825                         ret = process_inode_extref(eb, i, &key, active_node);
1826                         break;
1827                 case BTRFS_INODE_ITEM_KEY:
1828                         ret = process_inode_item(eb, i, &key, active_node);
1829                         break;
1830                 case BTRFS_EXTENT_DATA_KEY:
1831                         ret = process_file_extent(root, eb, i, &key,
1832                                                   active_node);
1833                         break;
1834                 default:
1835                         break;
1836                 };
1837         }
1838         return ret;
1839 }
1840
1841 static void reada_walk_down(struct btrfs_root *root,
1842                             struct extent_buffer *node, int slot)
1843 {
1844         u64 bytenr;
1845         u64 ptr_gen;
1846         u32 nritems;
1847         u32 blocksize;
1848         int i;
1849         int level;
1850
1851         level = btrfs_header_level(node);
1852         if (level != 1)
1853                 return;
1854
1855         nritems = btrfs_header_nritems(node);
1856         blocksize = root->nodesize;
1857         for (i = slot; i < nritems; i++) {
1858                 bytenr = btrfs_node_blockptr(node, i);
1859                 ptr_gen = btrfs_node_ptr_generation(node, i);
1860                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1861         }
1862 }
1863
1864 /*
1865  * Check the child node/leaf by the following condition:
1866  * 1. the first item key of the node/leaf should be the same with the one
1867  *    in parent.
1868  * 2. block in parent node should match the child node/leaf.
1869  * 3. generation of parent node and child's header should be consistent.
1870  *
1871  * Or the child node/leaf pointed by the key in parent is not valid.
1872  *
1873  * We hope to check leaf owner too, but since subvol may share leaves,
1874  * which makes leaf owner check not so strong, key check should be
1875  * sufficient enough for that case.
1876  */
1877 static int check_child_node(struct btrfs_root *root,
1878                             struct extent_buffer *parent, int slot,
1879                             struct extent_buffer *child)
1880 {
1881         struct btrfs_key parent_key;
1882         struct btrfs_key child_key;
1883         int ret = 0;
1884
1885         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886         if (btrfs_header_level(child) == 0)
1887                 btrfs_item_key_to_cpu(child, &child_key, 0);
1888         else
1889                 btrfs_node_key_to_cpu(child, &child_key, 0);
1890
1891         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1892                 ret = -EINVAL;
1893                 fprintf(stderr,
1894                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895                         parent_key.objectid, parent_key.type, parent_key.offset,
1896                         child_key.objectid, child_key.type, child_key.offset);
1897         }
1898         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1899                 ret = -EINVAL;
1900                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901                         btrfs_node_blockptr(parent, slot),
1902                         btrfs_header_bytenr(child));
1903         }
1904         if (btrfs_node_ptr_generation(parent, slot) !=
1905             btrfs_header_generation(child)) {
1906                 ret = -EINVAL;
1907                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908                         btrfs_header_generation(child),
1909                         btrfs_node_ptr_generation(parent, slot));
1910         }
1911         return ret;
1912 }
1913
1914 struct node_refs {
1915         u64 bytenr[BTRFS_MAX_LEVEL];
1916         u64 refs[BTRFS_MAX_LEVEL];
1917 };
1918
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920                           struct walk_control *wc, int *level,
1921                           struct node_refs *nrefs)
1922 {
1923         enum btrfs_tree_block_status status;
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         struct extent_buffer *next;
1927         struct extent_buffer *cur;
1928         u32 blocksize;
1929         int ret, err = 0;
1930         u64 refs;
1931
1932         WARN_ON(*level < 0);
1933         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1934
1935         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936                 refs = nrefs->refs[*level];
1937                 ret = 0;
1938         } else {
1939                 ret = btrfs_lookup_extent_info(NULL, root,
1940                                        path->nodes[*level]->start,
1941                                        *level, 1, &refs, NULL);
1942                 if (ret < 0) {
1943                         err = ret;
1944                         goto out;
1945                 }
1946                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947                 nrefs->refs[*level] = refs;
1948         }
1949
1950         if (refs > 1) {
1951                 ret = enter_shared_node(root, path->nodes[*level]->start,
1952                                         refs, wc, *level);
1953                 if (ret > 0) {
1954                         err = ret;
1955                         goto out;
1956                 }
1957         }
1958
1959         while (*level >= 0) {
1960                 WARN_ON(*level < 0);
1961                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962                 cur = path->nodes[*level];
1963
1964                 if (btrfs_header_level(cur) != *level)
1965                         WARN_ON(1);
1966
1967                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1968                         break;
1969                 if (*level == 0) {
1970                         ret = process_one_leaf(root, cur, wc);
1971                         if (ret < 0)
1972                                 err = ret;
1973                         break;
1974                 }
1975                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977                 blocksize = root->nodesize;
1978
1979                 if (bytenr == nrefs->bytenr[*level - 1]) {
1980                         refs = nrefs->refs[*level - 1];
1981                 } else {
1982                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983                                         *level - 1, 1, &refs, NULL);
1984                         if (ret < 0) {
1985                                 refs = 0;
1986                         } else {
1987                                 nrefs->bytenr[*level - 1] = bytenr;
1988                                 nrefs->refs[*level - 1] = refs;
1989                         }
1990                 }
1991
1992                 if (refs > 1) {
1993                         ret = enter_shared_node(root, bytenr, refs,
1994                                                 wc, *level - 1);
1995                         if (ret > 0) {
1996                                 path->slots[*level]++;
1997                                 continue;
1998                         }
1999                 }
2000
2001                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003                         free_extent_buffer(next);
2004                         reada_walk_down(root, cur, path->slots[*level]);
2005                         next = read_tree_block(root, bytenr, blocksize,
2006                                                ptr_gen);
2007                         if (!extent_buffer_uptodate(next)) {
2008                                 struct btrfs_key node_key;
2009
2010                                 btrfs_node_key_to_cpu(path->nodes[*level],
2011                                                       &node_key,
2012                                                       path->slots[*level]);
2013                                 btrfs_add_corrupt_extent_record(root->fs_info,
2014                                                 &node_key,
2015                                                 path->nodes[*level]->start,
2016                                                 root->nodesize, *level);
2017                                 err = -EIO;
2018                                 goto out;
2019                         }
2020                 }
2021
2022                 ret = check_child_node(root, cur, path->slots[*level], next);
2023                 if (ret) {
2024                         err = ret;
2025                         goto out;
2026                 }
2027
2028                 if (btrfs_is_leaf(next))
2029                         status = btrfs_check_leaf(root, NULL, next);
2030                 else
2031                         status = btrfs_check_node(root, NULL, next);
2032                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033                         free_extent_buffer(next);
2034                         err = -EIO;
2035                         goto out;
2036                 }
2037
2038                 *level = *level - 1;
2039                 free_extent_buffer(path->nodes[*level]);
2040                 path->nodes[*level] = next;
2041                 path->slots[*level] = 0;
2042         }
2043 out:
2044         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2045         return err;
2046 }
2047
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049                         struct walk_control *wc, int *level)
2050 {
2051         int i;
2052         struct extent_buffer *leaf;
2053
2054         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055                 leaf = path->nodes[i];
2056                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2057                         path->slots[i]++;
2058                         *level = i;
2059                         return 0;
2060                 } else {
2061                         free_extent_buffer(path->nodes[*level]);
2062                         path->nodes[*level] = NULL;
2063                         BUG_ON(*level > wc->active_node);
2064                         if (*level == wc->active_node)
2065                                 leave_shared_node(root, wc, *level);
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 static int check_root_dir(struct inode_record *rec)
2073 {
2074         struct inode_backref *backref;
2075         int ret = -1;
2076
2077         if (!rec->found_inode_item || rec->errors)
2078                 goto out;
2079         if (rec->nlink != 1 || rec->found_link != 0)
2080                 goto out;
2081         if (list_empty(&rec->backrefs))
2082                 goto out;
2083         backref = to_inode_backref(rec->backrefs.next);
2084         if (!backref->found_inode_ref)
2085                 goto out;
2086         if (backref->index != 0 || backref->namelen != 2 ||
2087             memcmp(backref->name, "..", 2))
2088                 goto out;
2089         if (backref->found_dir_index || backref->found_dir_item)
2090                 goto out;
2091         ret = 0;
2092 out:
2093         return ret;
2094 }
2095
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097                               struct btrfs_root *root, struct btrfs_path *path,
2098                               struct inode_record *rec)
2099 {
2100         struct btrfs_inode_item *ei;
2101         struct btrfs_key key;
2102         int ret;
2103
2104         key.objectid = rec->ino;
2105         key.type = BTRFS_INODE_ITEM_KEY;
2106         key.offset = (u64)-1;
2107
2108         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2109         if (ret < 0)
2110                 goto out;
2111         if (ret) {
2112                 if (!path->slots[0]) {
2113                         ret = -ENOENT;
2114                         goto out;
2115                 }
2116                 path->slots[0]--;
2117                 ret = 0;
2118         }
2119         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120         if (key.objectid != rec->ino) {
2121                 ret = -ENOENT;
2122                 goto out;
2123         }
2124
2125         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126                             struct btrfs_inode_item);
2127         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128         btrfs_mark_buffer_dirty(path->nodes[0]);
2129         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131                root->root_key.objectid);
2132 out:
2133         btrfs_release_path(path);
2134         return ret;
2135 }
2136
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138                                     struct btrfs_root *root,
2139                                     struct btrfs_path *path,
2140                                     struct inode_record *rec)
2141 {
2142         int ret;
2143
2144         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145         btrfs_release_path(path);
2146         if (!ret)
2147                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2148         return ret;
2149 }
2150
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152                                struct btrfs_root *root,
2153                                struct btrfs_path *path,
2154                                struct inode_record *rec)
2155 {
2156         struct btrfs_inode_item *ei;
2157         struct btrfs_key key;
2158         int ret = 0;
2159
2160         key.objectid = rec->ino;
2161         key.type = BTRFS_INODE_ITEM_KEY;
2162         key.offset = 0;
2163
2164         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2165         if (ret) {
2166                 if (ret > 0)
2167                         ret = -ENOENT;
2168                 goto out;
2169         }
2170
2171         /* Since ret == 0, no need to check anything */
2172         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173                             struct btrfs_inode_item);
2174         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175         btrfs_mark_buffer_dirty(path->nodes[0]);
2176         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177         printf("reset nbytes for ino %llu root %llu\n",
2178                rec->ino, root->root_key.objectid);
2179 out:
2180         btrfs_release_path(path);
2181         return ret;
2182 }
2183
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185                                  struct cache_tree *inode_cache,
2186                                  struct inode_record *rec,
2187                                  struct inode_backref *backref)
2188 {
2189         struct btrfs_path path;
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_dir_item *dir_item;
2192         struct extent_buffer *leaf;
2193         struct btrfs_key key;
2194         struct btrfs_disk_key disk_key;
2195         struct inode_record *dir_rec;
2196         unsigned long name_ptr;
2197         u32 data_size = sizeof(*dir_item) + backref->namelen;
2198         int ret;
2199
2200         trans = btrfs_start_transaction(root, 1);
2201         if (IS_ERR(trans))
2202                 return PTR_ERR(trans);
2203
2204         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205                 (unsigned long long)rec->ino);
2206
2207         btrfs_init_path(&path);
2208         key.objectid = backref->dir;
2209         key.type = BTRFS_DIR_INDEX_KEY;
2210         key.offset = backref->index;
2211         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2212         BUG_ON(ret);
2213
2214         leaf = path.nodes[0];
2215         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2216
2217         disk_key.objectid = cpu_to_le64(rec->ino);
2218         disk_key.type = BTRFS_INODE_ITEM_KEY;
2219         disk_key.offset = 0;
2220
2221         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223         btrfs_set_dir_data_len(leaf, dir_item, 0);
2224         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225         name_ptr = (unsigned long)(dir_item + 1);
2226         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227         btrfs_mark_buffer_dirty(leaf);
2228         btrfs_release_path(&path);
2229         btrfs_commit_transaction(trans, root);
2230
2231         backref->found_dir_index = 1;
2232         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233         BUG_ON(IS_ERR(dir_rec));
2234         if (!dir_rec)
2235                 return 0;
2236         dir_rec->found_size += backref->namelen;
2237         if (dir_rec->found_size == dir_rec->isize &&
2238             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240         if (dir_rec->found_size != dir_rec->isize)
2241                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2242
2243         return 0;
2244 }
2245
2246 static int delete_dir_index(struct btrfs_root *root,
2247                             struct cache_tree *inode_cache,
2248                             struct inode_record *rec,
2249                             struct inode_backref *backref)
2250 {
2251         struct btrfs_trans_handle *trans;
2252         struct btrfs_dir_item *di;
2253         struct btrfs_path *path;
2254         int ret = 0;
2255
2256         path = btrfs_alloc_path();
2257         if (!path)
2258                 return -ENOMEM;
2259
2260         trans = btrfs_start_transaction(root, 1);
2261         if (IS_ERR(trans)) {
2262                 btrfs_free_path(path);
2263                 return PTR_ERR(trans);
2264         }
2265
2266
2267         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2268                 (unsigned long long)backref->dir,
2269                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2270                 (unsigned long long)root->objectid);
2271
2272         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2273                                     backref->name, backref->namelen,
2274                                     backref->index, -1);
2275         if (IS_ERR(di)) {
2276                 ret = PTR_ERR(di);
2277                 btrfs_free_path(path);
2278                 btrfs_commit_transaction(trans, root);
2279                 if (ret == -ENOENT)
2280                         return 0;
2281                 return ret;
2282         }
2283
2284         if (!di)
2285                 ret = btrfs_del_item(trans, root, path);
2286         else
2287                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2288         BUG_ON(ret);
2289         btrfs_free_path(path);
2290         btrfs_commit_transaction(trans, root);
2291         return ret;
2292 }
2293
2294 static int create_inode_item(struct btrfs_root *root,
2295                              struct inode_record *rec,
2296                              struct inode_backref *backref, int root_dir)
2297 {
2298         struct btrfs_trans_handle *trans;
2299         struct btrfs_inode_item inode_item;
2300         time_t now = time(NULL);
2301         int ret;
2302
2303         trans = btrfs_start_transaction(root, 1);
2304         if (IS_ERR(trans)) {
2305                 ret = PTR_ERR(trans);
2306                 return ret;
2307         }
2308
2309         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2310                 "be incomplete, please check permissions and content after "
2311                 "the fsck completes.\n", (unsigned long long)root->objectid,
2312                 (unsigned long long)rec->ino);
2313
2314         memset(&inode_item, 0, sizeof(inode_item));
2315         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2316         if (root_dir)
2317                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2318         else
2319                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2320         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2321         if (rec->found_dir_item) {
2322                 if (rec->found_file_extent)
2323                         fprintf(stderr, "root %llu inode %llu has both a dir "
2324                                 "item and extents, unsure if it is a dir or a "
2325                                 "regular file so setting it as a directory\n",
2326                                 (unsigned long long)root->objectid,
2327                                 (unsigned long long)rec->ino);
2328                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2329                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2330         } else if (!rec->found_dir_item) {
2331                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2332                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2333         }
2334         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2335         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2336         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2337         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2338         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2339         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2340         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2341         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2342
2343         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2344         BUG_ON(ret);
2345         btrfs_commit_transaction(trans, root);
2346         return 0;
2347 }
2348
2349 static int repair_inode_backrefs(struct btrfs_root *root,
2350                                  struct inode_record *rec,
2351                                  struct cache_tree *inode_cache,
2352                                  int delete)
2353 {
2354         struct inode_backref *tmp, *backref;
2355         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2356         int ret = 0;
2357         int repaired = 0;
2358
2359         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2360                 if (!delete && rec->ino == root_dirid) {
2361                         if (!rec->found_inode_item) {
2362                                 ret = create_inode_item(root, rec, backref, 1);
2363                                 if (ret)
2364                                         break;
2365                                 repaired++;
2366                         }
2367                 }
2368
2369                 /* Index 0 for root dir's are special, don't mess with it */
2370                 if (rec->ino == root_dirid && backref->index == 0)
2371                         continue;
2372
2373                 if (delete &&
2374                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2375                      (backref->found_dir_index && backref->found_inode_ref &&
2376                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2377                         ret = delete_dir_index(root, inode_cache, rec, backref);
2378                         if (ret)
2379                                 break;
2380                         repaired++;
2381                         list_del(&backref->list);
2382                         free(backref);
2383                 }
2384
2385                 if (!delete && !backref->found_dir_index &&
2386                     backref->found_dir_item && backref->found_inode_ref) {
2387                         ret = add_missing_dir_index(root, inode_cache, rec,
2388                                                     backref);
2389                         if (ret)
2390                                 break;
2391                         repaired++;
2392                         if (backref->found_dir_item &&
2393                             backref->found_dir_index &&
2394                             backref->found_dir_index) {
2395                                 if (!backref->errors &&
2396                                     backref->found_inode_ref) {
2397                                         list_del(&backref->list);
2398                                         free(backref);
2399                                 }
2400                         }
2401                 }
2402
2403                 if (!delete && (!backref->found_dir_index &&
2404                                 !backref->found_dir_item &&
2405                                 backref->found_inode_ref)) {
2406                         struct btrfs_trans_handle *trans;
2407                         struct btrfs_key location;
2408
2409                         ret = check_dir_conflict(root, backref->name,
2410                                                  backref->namelen,
2411                                                  backref->dir,
2412                                                  backref->index);
2413                         if (ret) {
2414                                 /*
2415                                  * let nlink fixing routine to handle it,
2416                                  * which can do it better.
2417                                  */
2418                                 ret = 0;
2419                                 break;
2420                         }
2421                         location.objectid = rec->ino;
2422                         location.type = BTRFS_INODE_ITEM_KEY;
2423                         location.offset = 0;
2424
2425                         trans = btrfs_start_transaction(root, 1);
2426                         if (IS_ERR(trans)) {
2427                                 ret = PTR_ERR(trans);
2428                                 break;
2429                         }
2430                         fprintf(stderr, "adding missing dir index/item pair "
2431                                 "for inode %llu\n",
2432                                 (unsigned long long)rec->ino);
2433                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2434                                                     backref->namelen,
2435                                                     backref->dir, &location,
2436                                                     imode_to_type(rec->imode),
2437                                                     backref->index);
2438                         BUG_ON(ret);
2439                         btrfs_commit_transaction(trans, root);
2440                         repaired++;
2441                 }
2442
2443                 if (!delete && (backref->found_inode_ref &&
2444                                 backref->found_dir_index &&
2445                                 backref->found_dir_item &&
2446                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2447                                 !rec->found_inode_item)) {
2448                         ret = create_inode_item(root, rec, backref, 0);
2449                         if (ret)
2450                                 break;
2451                         repaired++;
2452                 }
2453
2454         }
2455         return ret ? ret : repaired;
2456 }
2457
2458 /*
2459  * To determine the file type for nlink/inode_item repair
2460  *
2461  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2462  * Return -ENOENT if file type is not found.
2463  */
2464 static int find_file_type(struct inode_record *rec, u8 *type)
2465 {
2466         struct inode_backref *backref;
2467
2468         /* For inode item recovered case */
2469         if (rec->found_inode_item) {
2470                 *type = imode_to_type(rec->imode);
2471                 return 0;
2472         }
2473
2474         list_for_each_entry(backref, &rec->backrefs, list) {
2475                 if (backref->found_dir_index || backref->found_dir_item) {
2476                         *type = backref->filetype;
2477                         return 0;
2478                 }
2479         }
2480         return -ENOENT;
2481 }
2482
2483 /*
2484  * To determine the file name for nlink repair
2485  *
2486  * Return 0 if file name is found, set name and namelen.
2487  * Return -ENOENT if file name is not found.
2488  */
2489 static int find_file_name(struct inode_record *rec,
2490                           char *name, int *namelen)
2491 {
2492         struct inode_backref *backref;
2493
2494         list_for_each_entry(backref, &rec->backrefs, list) {
2495                 if (backref->found_dir_index || backref->found_dir_item ||
2496                     backref->found_inode_ref) {
2497                         memcpy(name, backref->name, backref->namelen);
2498                         *namelen = backref->namelen;
2499                         return 0;
2500                 }
2501         }
2502         return -ENOENT;
2503 }
2504
2505 /* Reset the nlink of the inode to the correct one */
2506 static int reset_nlink(struct btrfs_trans_handle *trans,
2507                        struct btrfs_root *root,
2508                        struct btrfs_path *path,
2509                        struct inode_record *rec)
2510 {
2511         struct inode_backref *backref;
2512         struct inode_backref *tmp;
2513         struct btrfs_key key;
2514         struct btrfs_inode_item *inode_item;
2515         int ret = 0;
2516
2517         /* We don't believe this either, reset it and iterate backref */
2518         rec->found_link = 0;
2519
2520         /* Remove all backref including the valid ones */
2521         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2522                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2523                                    backref->index, backref->name,
2524                                    backref->namelen, 0);
2525                 if (ret < 0)
2526                         goto out;
2527
2528                 /* remove invalid backref, so it won't be added back */
2529                 if (!(backref->found_dir_index &&
2530                       backref->found_dir_item &&
2531                       backref->found_inode_ref)) {
2532                         list_del(&backref->list);
2533                         free(backref);
2534                 } else {
2535                         rec->found_link++;
2536                 }
2537         }
2538
2539         /* Set nlink to 0 */
2540         key.objectid = rec->ino;
2541         key.type = BTRFS_INODE_ITEM_KEY;
2542         key.offset = 0;
2543         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2544         if (ret < 0)
2545                 goto out;
2546         if (ret > 0) {
2547                 ret = -ENOENT;
2548                 goto out;
2549         }
2550         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2551                                     struct btrfs_inode_item);
2552         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2553         btrfs_mark_buffer_dirty(path->nodes[0]);
2554         btrfs_release_path(path);
2555
2556         /*
2557          * Add back valid inode_ref/dir_item/dir_index,
2558          * add_link() will handle the nlink inc, so new nlink must be correct
2559          */
2560         list_for_each_entry(backref, &rec->backrefs, list) {
2561                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2562                                      backref->name, backref->namelen,
2563                                      backref->filetype, &backref->index, 1);
2564                 if (ret < 0)
2565                         goto out;
2566         }
2567 out:
2568         btrfs_release_path(path);
2569         return ret;
2570 }
2571
2572 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2573                                struct btrfs_root *root,
2574                                struct btrfs_path *path,
2575                                struct inode_record *rec)
2576 {
2577         char *dir_name = "lost+found";
2578         char namebuf[BTRFS_NAME_LEN] = {0};
2579         u64 lost_found_ino;
2580         u32 mode = 0700;
2581         u8 type = 0;
2582         int namelen = 0;
2583         int name_recovered = 0;
2584         int type_recovered = 0;
2585         int ret = 0;
2586
2587         /*
2588          * Get file name and type first before these invalid inode ref
2589          * are deleted by remove_all_invalid_backref()
2590          */
2591         name_recovered = !find_file_name(rec, namebuf, &namelen);
2592         type_recovered = !find_file_type(rec, &type);
2593
2594         if (!name_recovered) {
2595                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2596                        rec->ino, rec->ino);
2597                 namelen = count_digits(rec->ino);
2598                 sprintf(namebuf, "%llu", rec->ino);
2599                 name_recovered = 1;
2600         }
2601         if (!type_recovered) {
2602                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2603                        rec->ino);
2604                 type = BTRFS_FT_REG_FILE;
2605                 type_recovered = 1;
2606         }
2607
2608         ret = reset_nlink(trans, root, path, rec);
2609         if (ret < 0) {
2610                 fprintf(stderr,
2611                         "Failed to reset nlink for inode %llu: %s\n",
2612                         rec->ino, strerror(-ret));
2613                 goto out;
2614         }
2615
2616         if (rec->found_link == 0) {
2617                 lost_found_ino = root->highest_inode;
2618                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2619                         ret = -EOVERFLOW;
2620                         goto out;
2621                 }
2622                 lost_found_ino++;
2623                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2624                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2625                                   mode);
2626                 if (ret < 0) {
2627                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2628                                 dir_name, strerror(-ret));
2629                         goto out;
2630                 }
2631                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2632                                      namebuf, namelen, type, NULL, 1);
2633                 /*
2634                  * Add ".INO" suffix several times to handle case where
2635                  * "FILENAME.INO" is already taken by another file.
2636                  */
2637                 while (ret == -EEXIST) {
2638                         /*
2639                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2640                          */
2641                         if (namelen + count_digits(rec->ino) + 1 >
2642                             BTRFS_NAME_LEN) {
2643                                 ret = -EFBIG;
2644                                 goto out;
2645                         }
2646                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2647                                  ".%llu", rec->ino);
2648                         namelen += count_digits(rec->ino) + 1;
2649                         ret = btrfs_add_link(trans, root, rec->ino,
2650                                              lost_found_ino, namebuf,
2651                                              namelen, type, NULL, 1);
2652                 }
2653                 if (ret < 0) {
2654                         fprintf(stderr,
2655                                 "Failed to link the inode %llu to %s dir: %s\n",
2656                                 rec->ino, dir_name, strerror(-ret));
2657                         goto out;
2658                 }
2659                 /*
2660                  * Just increase the found_link, don't actually add the
2661                  * backref. This will make things easier and this inode
2662                  * record will be freed after the repair is done.
2663                  * So fsck will not report problem about this inode.
2664                  */
2665                 rec->found_link++;
2666                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2667                        namelen, namebuf, dir_name);
2668         }
2669         printf("Fixed the nlink of inode %llu\n", rec->ino);
2670 out:
2671         /*
2672          * Clear the flag anyway, or we will loop forever for the same inode
2673          * as it will not be removed from the bad inode list and the dead loop
2674          * happens.
2675          */
2676         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2677         btrfs_release_path(path);
2678         return ret;
2679 }
2680
2681 /*
2682  * Check if there is any normal(reg or prealloc) file extent for given
2683  * ino.
2684  * This is used to determine the file type when neither its dir_index/item or
2685  * inode_item exists.
2686  *
2687  * This will *NOT* report error, if any error happens, just consider it does
2688  * not have any normal file extent.
2689  */
2690 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2691 {
2692         struct btrfs_path *path;
2693         struct btrfs_key key;
2694         struct btrfs_key found_key;
2695         struct btrfs_file_extent_item *fi;
2696         u8 type;
2697         int ret = 0;
2698
2699         path = btrfs_alloc_path();
2700         if (!path)
2701                 goto out;
2702         key.objectid = ino;
2703         key.type = BTRFS_EXTENT_DATA_KEY;
2704         key.offset = 0;
2705
2706         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2707         if (ret < 0) {
2708                 ret = 0;
2709                 goto out;
2710         }
2711         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2712                 ret = btrfs_next_leaf(root, path);
2713                 if (ret) {
2714                         ret = 0;
2715                         goto out;
2716                 }
2717         }
2718         while (1) {
2719                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2720                                       path->slots[0]);
2721                 if (found_key.objectid != ino ||
2722                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2723                         break;
2724                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2725                                     struct btrfs_file_extent_item);
2726                 type = btrfs_file_extent_type(path->nodes[0], fi);
2727                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2728                         ret = 1;
2729                         goto out;
2730                 }
2731         }
2732 out:
2733         btrfs_free_path(path);
2734         return ret;
2735 }
2736
2737 static u32 btrfs_type_to_imode(u8 type)
2738 {
2739         static u32 imode_by_btrfs_type[] = {
2740                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2741                 [BTRFS_FT_DIR]          = S_IFDIR,
2742                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2743                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2744                 [BTRFS_FT_FIFO]         = S_IFIFO,
2745                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2746                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2747         };
2748
2749         return imode_by_btrfs_type[(type)];
2750 }
2751
2752 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2753                                 struct btrfs_root *root,
2754                                 struct btrfs_path *path,
2755                                 struct inode_record *rec)
2756 {
2757         u8 filetype;
2758         u32 mode = 0700;
2759         int type_recovered = 0;
2760         int ret = 0;
2761
2762         printf("Trying to rebuild inode:%llu\n", rec->ino);
2763
2764         type_recovered = !find_file_type(rec, &filetype);
2765
2766         /*
2767          * Try to determine inode type if type not found.
2768          *
2769          * For found regular file extent, it must be FILE.
2770          * For found dir_item/index, it must be DIR.
2771          *
2772          * For undetermined one, use FILE as fallback.
2773          *
2774          * TODO:
2775          * 1. If found backref(inode_index/item is already handled) to it,
2776          *    it must be DIR.
2777          *    Need new inode-inode ref structure to allow search for that.
2778          */
2779         if (!type_recovered) {
2780                 if (rec->found_file_extent &&
2781                     find_normal_file_extent(root, rec->ino)) {
2782                         type_recovered = 1;
2783                         filetype = BTRFS_FT_REG_FILE;
2784                 } else if (rec->found_dir_item) {
2785                         type_recovered = 1;
2786                         filetype = BTRFS_FT_DIR;
2787                 } else if (!list_empty(&rec->orphan_extents)) {
2788                         type_recovered = 1;
2789                         filetype = BTRFS_FT_REG_FILE;
2790                 } else{
2791                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2792                                rec->ino);
2793                         type_recovered = 1;
2794                         filetype = BTRFS_FT_REG_FILE;
2795                 }
2796         }
2797
2798         ret = btrfs_new_inode(trans, root, rec->ino,
2799                               mode | btrfs_type_to_imode(filetype));
2800         if (ret < 0)
2801                 goto out;
2802
2803         /*
2804          * Here inode rebuild is done, we only rebuild the inode item,
2805          * don't repair the nlink(like move to lost+found).
2806          * That is the job of nlink repair.
2807          *
2808          * We just fill the record and return
2809          */
2810         rec->found_dir_item = 1;
2811         rec->imode = mode | btrfs_type_to_imode(filetype);
2812         rec->nlink = 0;
2813         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2814         /* Ensure the inode_nlinks repair function will be called */
2815         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2816 out:
2817         return ret;
2818 }
2819
2820 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2821                                       struct btrfs_root *root,
2822                                       struct btrfs_path *path,
2823                                       struct inode_record *rec)
2824 {
2825         struct orphan_data_extent *orphan;
2826         struct orphan_data_extent *tmp;
2827         int ret = 0;
2828
2829         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2830                 /*
2831                  * Check for conflicting file extents
2832                  *
2833                  * Here we don't know whether the extents is compressed or not,
2834                  * so we can only assume it not compressed nor data offset,
2835                  * and use its disk_len as extent length.
2836                  */
2837                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2838                                        orphan->offset, orphan->disk_len, 0);
2839                 btrfs_release_path(path);
2840                 if (ret < 0)
2841                         goto out;
2842                 if (!ret) {
2843                         fprintf(stderr,
2844                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2845                                 orphan->disk_bytenr, orphan->disk_len);
2846                         ret = btrfs_free_extent(trans,
2847                                         root->fs_info->extent_root,
2848                                         orphan->disk_bytenr, orphan->disk_len,
2849                                         0, root->objectid, orphan->objectid,
2850                                         orphan->offset);
2851                         if (ret < 0)
2852                                 goto out;
2853                 }
2854                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2855                                 orphan->offset, orphan->disk_bytenr,
2856                                 orphan->disk_len, orphan->disk_len);
2857                 if (ret < 0)
2858                         goto out;
2859
2860                 /* Update file size info */
2861                 rec->found_size += orphan->disk_len;
2862                 if (rec->found_size == rec->nbytes)
2863                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2864
2865                 /* Update the file extent hole info too */
2866                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2867                                            orphan->disk_len);
2868                 if (ret < 0)
2869                         goto out;
2870                 if (RB_EMPTY_ROOT(&rec->holes))
2871                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2872
2873                 list_del(&orphan->list);
2874                 free(orphan);
2875         }
2876         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2877 out:
2878         return ret;
2879 }
2880
2881 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2882                                         struct btrfs_root *root,
2883                                         struct btrfs_path *path,
2884                                         struct inode_record *rec)
2885 {
2886         struct rb_node *node;
2887         struct file_extent_hole *hole;
2888         int found = 0;
2889         int ret = 0;
2890
2891         node = rb_first(&rec->holes);
2892
2893         while (node) {
2894                 found = 1;
2895                 hole = rb_entry(node, struct file_extent_hole, node);
2896                 ret = btrfs_punch_hole(trans, root, rec->ino,
2897                                        hole->start, hole->len);
2898                 if (ret < 0)
2899                         goto out;
2900                 ret = del_file_extent_hole(&rec->holes, hole->start,
2901                                            hole->len);
2902                 if (ret < 0)
2903                         goto out;
2904                 if (RB_EMPTY_ROOT(&rec->holes))
2905                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2906                 node = rb_first(&rec->holes);
2907         }
2908         /* special case for a file losing all its file extent */
2909         if (!found) {
2910                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2911                                        round_up(rec->isize, root->sectorsize));
2912                 if (ret < 0)
2913                         goto out;
2914         }
2915         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2916                rec->ino, root->objectid);
2917 out:
2918         return ret;
2919 }
2920
2921 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2922 {
2923         struct btrfs_trans_handle *trans;
2924         struct btrfs_path *path;
2925         int ret = 0;
2926
2927         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2928                              I_ERR_NO_ORPHAN_ITEM |
2929                              I_ERR_LINK_COUNT_WRONG |
2930                              I_ERR_NO_INODE_ITEM |
2931                              I_ERR_FILE_EXTENT_ORPHAN |
2932                              I_ERR_FILE_EXTENT_DISCOUNT|
2933                              I_ERR_FILE_NBYTES_WRONG)))
2934                 return rec->errors;
2935
2936         path = btrfs_alloc_path();
2937         if (!path)
2938                 return -ENOMEM;
2939
2940         /*
2941          * For nlink repair, it may create a dir and add link, so
2942          * 2 for parent(256)'s dir_index and dir_item
2943          * 2 for lost+found dir's inode_item and inode_ref
2944          * 1 for the new inode_ref of the file
2945          * 2 for lost+found dir's dir_index and dir_item for the file
2946          */
2947         trans = btrfs_start_transaction(root, 7);
2948         if (IS_ERR(trans)) {
2949                 btrfs_free_path(path);
2950                 return PTR_ERR(trans);
2951         }
2952
2953         if (rec->errors & I_ERR_NO_INODE_ITEM)
2954                 ret = repair_inode_no_item(trans, root, path, rec);
2955         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2956                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2957         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2958                 ret = repair_inode_discount_extent(trans, root, path, rec);
2959         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2960                 ret = repair_inode_isize(trans, root, path, rec);
2961         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2962                 ret = repair_inode_orphan_item(trans, root, path, rec);
2963         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2964                 ret = repair_inode_nlinks(trans, root, path, rec);
2965         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2966                 ret = repair_inode_nbytes(trans, root, path, rec);
2967         btrfs_commit_transaction(trans, root);
2968         btrfs_free_path(path);
2969         return ret;
2970 }
2971
2972 static int check_inode_recs(struct btrfs_root *root,
2973                             struct cache_tree *inode_cache)
2974 {
2975         struct cache_extent *cache;
2976         struct ptr_node *node;
2977         struct inode_record *rec;
2978         struct inode_backref *backref;
2979         int stage = 0;
2980         int ret = 0;
2981         int err = 0;
2982         u64 error = 0;
2983         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2984
2985         if (btrfs_root_refs(&root->root_item) == 0) {
2986                 if (!cache_tree_empty(inode_cache))
2987                         fprintf(stderr, "warning line %d\n", __LINE__);
2988                 return 0;
2989         }
2990
2991         /*
2992          * We need to record the highest inode number for later 'lost+found'
2993          * dir creation.
2994          * We must select an ino not used/referred by any existing inode, or
2995          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2996          * this may cause 'lost+found' dir has wrong nlinks.
2997          */
2998         cache = last_cache_extent(inode_cache);
2999         if (cache) {
3000                 node = container_of(cache, struct ptr_node, cache);
3001                 rec = node->data;
3002                 if (rec->ino > root->highest_inode)
3003                         root->highest_inode = rec->ino;
3004         }
3005
3006         /*
3007          * We need to repair backrefs first because we could change some of the
3008          * errors in the inode recs.
3009          *
3010          * We also need to go through and delete invalid backrefs first and then
3011          * add the correct ones second.  We do this because we may get EEXIST
3012          * when adding back the correct index because we hadn't yet deleted the
3013          * invalid index.
3014          *
3015          * For example, if we were missing a dir index then the directories
3016          * isize would be wrong, so if we fixed the isize to what we thought it
3017          * would be and then fixed the backref we'd still have a invalid fs, so
3018          * we need to add back the dir index and then check to see if the isize
3019          * is still wrong.
3020          */
3021         while (stage < 3) {
3022                 stage++;
3023                 if (stage == 3 && !err)
3024                         break;
3025
3026                 cache = search_cache_extent(inode_cache, 0);
3027                 while (repair && cache) {
3028                         node = container_of(cache, struct ptr_node, cache);
3029                         rec = node->data;
3030                         cache = next_cache_extent(cache);
3031
3032                         /* Need to free everything up and rescan */
3033                         if (stage == 3) {
3034                                 remove_cache_extent(inode_cache, &node->cache);
3035                                 free(node);
3036                                 free_inode_rec(rec);
3037                                 continue;
3038                         }
3039
3040                         if (list_empty(&rec->backrefs))
3041                                 continue;
3042
3043                         ret = repair_inode_backrefs(root, rec, inode_cache,
3044                                                     stage == 1);
3045                         if (ret < 0) {
3046                                 err = ret;
3047                                 stage = 2;
3048                                 break;
3049                         } if (ret > 0) {
3050                                 err = -EAGAIN;
3051                         }
3052                 }
3053         }
3054         if (err)
3055                 return err;
3056
3057         rec = get_inode_rec(inode_cache, root_dirid, 0);
3058         BUG_ON(IS_ERR(rec));
3059         if (rec) {
3060                 ret = check_root_dir(rec);
3061                 if (ret) {
3062                         fprintf(stderr, "root %llu root dir %llu error\n",
3063                                 (unsigned long long)root->root_key.objectid,
3064                                 (unsigned long long)root_dirid);
3065                         print_inode_error(root, rec);
3066                         error++;
3067                 }
3068         } else {
3069                 if (repair) {
3070                         struct btrfs_trans_handle *trans;
3071
3072                         trans = btrfs_start_transaction(root, 1);
3073                         if (IS_ERR(trans)) {
3074                                 err = PTR_ERR(trans);
3075                                 return err;
3076                         }
3077
3078                         fprintf(stderr,
3079                                 "root %llu missing its root dir, recreating\n",
3080                                 (unsigned long long)root->objectid);
3081
3082                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3083                         BUG_ON(ret);
3084
3085                         btrfs_commit_transaction(trans, root);
3086                         return -EAGAIN;
3087                 }
3088
3089                 fprintf(stderr, "root %llu root dir %llu not found\n",
3090                         (unsigned long long)root->root_key.objectid,
3091                         (unsigned long long)root_dirid);
3092         }
3093
3094         while (1) {
3095                 cache = search_cache_extent(inode_cache, 0);
3096                 if (!cache)
3097                         break;
3098                 node = container_of(cache, struct ptr_node, cache);
3099                 rec = node->data;
3100                 remove_cache_extent(inode_cache, &node->cache);
3101                 free(node);
3102                 if (rec->ino == root_dirid ||
3103                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3104                         free_inode_rec(rec);
3105                         continue;
3106                 }
3107
3108                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3109                         ret = check_orphan_item(root, rec->ino);
3110                         if (ret == 0)
3111                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3112                         if (can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                 }
3117
3118                 if (!rec->found_inode_item)
3119                         rec->errors |= I_ERR_NO_INODE_ITEM;
3120                 if (rec->found_link != rec->nlink)
3121                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3122                 if (repair) {
3123                         ret = try_repair_inode(root, rec);
3124                         if (ret == 0 && can_free_inode_rec(rec)) {
3125                                 free_inode_rec(rec);
3126                                 continue;
3127                         }
3128                         ret = 0;
3129                 }
3130
3131                 if (!(repair && ret == 0))
3132                         error++;
3133                 print_inode_error(root, rec);
3134                 list_for_each_entry(backref, &rec->backrefs, list) {
3135                         if (!backref->found_dir_item)
3136                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3137                         if (!backref->found_dir_index)
3138                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3139                         if (!backref->found_inode_ref)
3140                                 backref->errors |= REF_ERR_NO_INODE_REF;
3141                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3142                                 " namelen %u name %s filetype %d errors %x",
3143                                 (unsigned long long)backref->dir,
3144                                 (unsigned long long)backref->index,
3145                                 backref->namelen, backref->name,
3146                                 backref->filetype, backref->errors);
3147                         print_ref_error(backref->errors);
3148                 }
3149                 free_inode_rec(rec);
3150         }
3151         return (error > 0) ? -1 : 0;
3152 }
3153
3154 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3155                                         u64 objectid)
3156 {
3157         struct cache_extent *cache;
3158         struct root_record *rec = NULL;
3159         int ret;
3160
3161         cache = lookup_cache_extent(root_cache, objectid, 1);
3162         if (cache) {
3163                 rec = container_of(cache, struct root_record, cache);
3164         } else {
3165                 rec = calloc(1, sizeof(*rec));
3166                 if (!rec)
3167                         return ERR_PTR(-ENOMEM);
3168                 rec->objectid = objectid;
3169                 INIT_LIST_HEAD(&rec->backrefs);
3170                 rec->cache.start = objectid;
3171                 rec->cache.size = 1;
3172
3173                 ret = insert_cache_extent(root_cache, &rec->cache);
3174                 if (ret)
3175                         return ERR_PTR(-EEXIST);
3176         }
3177         return rec;
3178 }
3179
3180 static struct root_backref *get_root_backref(struct root_record *rec,
3181                                              u64 ref_root, u64 dir, u64 index,
3182                                              const char *name, int namelen)
3183 {
3184         struct root_backref *backref;
3185
3186         list_for_each_entry(backref, &rec->backrefs, list) {
3187                 if (backref->ref_root != ref_root || backref->dir != dir ||
3188                     backref->namelen != namelen)
3189                         continue;
3190                 if (memcmp(name, backref->name, namelen))
3191                         continue;
3192                 return backref;
3193         }
3194
3195         backref = calloc(1, sizeof(*backref) + namelen + 1);
3196         if (!backref)
3197                 return NULL;
3198         backref->ref_root = ref_root;
3199         backref->dir = dir;
3200         backref->index = index;
3201         backref->namelen = namelen;
3202         memcpy(backref->name, name, namelen);
3203         backref->name[namelen] = '\0';
3204         list_add_tail(&backref->list, &rec->backrefs);
3205         return backref;
3206 }
3207
3208 static void free_root_record(struct cache_extent *cache)
3209 {
3210         struct root_record *rec;
3211         struct root_backref *backref;
3212
3213         rec = container_of(cache, struct root_record, cache);
3214         while (!list_empty(&rec->backrefs)) {
3215                 backref = to_root_backref(rec->backrefs.next);
3216                 list_del(&backref->list);
3217                 free(backref);
3218         }
3219
3220         free(rec);
3221 }
3222
3223 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3224
3225 static int add_root_backref(struct cache_tree *root_cache,
3226                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3227                             const char *name, int namelen,
3228                             int item_type, int errors)
3229 {
3230         struct root_record *rec;
3231         struct root_backref *backref;
3232
3233         rec = get_root_rec(root_cache, root_id);
3234         BUG_ON(IS_ERR(rec));
3235         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3236         BUG_ON(!backref);
3237
3238         backref->errors |= errors;
3239
3240         if (item_type != BTRFS_DIR_ITEM_KEY) {
3241                 if (backref->found_dir_index || backref->found_back_ref ||
3242                     backref->found_forward_ref) {
3243                         if (backref->index != index)
3244                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3245                 } else {
3246                         backref->index = index;
3247                 }
3248         }
3249
3250         if (item_type == BTRFS_DIR_ITEM_KEY) {
3251                 if (backref->found_forward_ref)
3252                         rec->found_ref++;
3253                 backref->found_dir_item = 1;
3254         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3255                 backref->found_dir_index = 1;
3256         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3257                 if (backref->found_forward_ref)
3258                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3259                 else if (backref->found_dir_item)
3260                         rec->found_ref++;
3261                 backref->found_forward_ref = 1;
3262         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3263                 if (backref->found_back_ref)
3264                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3265                 backref->found_back_ref = 1;
3266         } else {
3267                 BUG_ON(1);
3268         }
3269
3270         if (backref->found_forward_ref && backref->found_dir_item)
3271                 backref->reachable = 1;
3272         return 0;
3273 }
3274
3275 static int merge_root_recs(struct btrfs_root *root,
3276                            struct cache_tree *src_cache,
3277                            struct cache_tree *dst_cache)
3278 {
3279         struct cache_extent *cache;
3280         struct ptr_node *node;
3281         struct inode_record *rec;
3282         struct inode_backref *backref;
3283         int ret = 0;
3284
3285         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3286                 free_inode_recs_tree(src_cache);
3287                 return 0;
3288         }
3289
3290         while (1) {
3291                 cache = search_cache_extent(src_cache, 0);
3292                 if (!cache)
3293                         break;
3294                 node = container_of(cache, struct ptr_node, cache);
3295                 rec = node->data;
3296                 remove_cache_extent(src_cache, &node->cache);
3297                 free(node);
3298
3299                 ret = is_child_root(root, root->objectid, rec->ino);
3300                 if (ret < 0)
3301                         break;
3302                 else if (ret == 0)
3303                         goto skip;
3304
3305                 list_for_each_entry(backref, &rec->backrefs, list) {
3306                         BUG_ON(backref->found_inode_ref);
3307                         if (backref->found_dir_item)
3308                                 add_root_backref(dst_cache, rec->ino,
3309                                         root->root_key.objectid, backref->dir,
3310                                         backref->index, backref->name,
3311                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3312                                         backref->errors);
3313                         if (backref->found_dir_index)
3314                                 add_root_backref(dst_cache, rec->ino,
3315                                         root->root_key.objectid, backref->dir,
3316                                         backref->index, backref->name,
3317                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3318                                         backref->errors);
3319                 }
3320 skip:
3321                 free_inode_rec(rec);
3322         }
3323         if (ret < 0)
3324                 return ret;
3325         return 0;
3326 }
3327
3328 static int check_root_refs(struct btrfs_root *root,
3329                            struct cache_tree *root_cache)
3330 {
3331         struct root_record *rec;
3332         struct root_record *ref_root;
3333         struct root_backref *backref;
3334         struct cache_extent *cache;
3335         int loop = 1;
3336         int ret;
3337         int error;
3338         int errors = 0;
3339
3340         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3341         BUG_ON(IS_ERR(rec));
3342         rec->found_ref = 1;
3343
3344         /* fixme: this can not detect circular references */
3345         while (loop) {
3346                 loop = 0;
3347                 cache = search_cache_extent(root_cache, 0);
3348                 while (1) {
3349                         if (!cache)
3350                                 break;
3351                         rec = container_of(cache, struct root_record, cache);
3352                         cache = next_cache_extent(cache);
3353
3354                         if (rec->found_ref == 0)
3355                                 continue;
3356
3357                         list_for_each_entry(backref, &rec->backrefs, list) {
3358                                 if (!backref->reachable)
3359                                         continue;
3360
3361                                 ref_root = get_root_rec(root_cache,
3362                                                         backref->ref_root);
3363                                 BUG_ON(IS_ERR(ref_root));
3364                                 if (ref_root->found_ref > 0)
3365                                         continue;
3366
3367                                 backref->reachable = 0;
3368                                 rec->found_ref--;
3369                                 if (rec->found_ref == 0)
3370                                         loop = 1;
3371                         }
3372                 }
3373         }
3374
3375         cache = search_cache_extent(root_cache, 0);
3376         while (1) {
3377                 if (!cache)
3378                         break;
3379                 rec = container_of(cache, struct root_record, cache);
3380                 cache = next_cache_extent(cache);
3381
3382                 if (rec->found_ref == 0 &&
3383                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3384                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3385                         ret = check_orphan_item(root->fs_info->tree_root,
3386                                                 rec->objectid);
3387                         if (ret == 0)
3388                                 continue;
3389
3390                         /*
3391                          * If we don't have a root item then we likely just have
3392                          * a dir item in a snapshot for this root but no actual
3393                          * ref key or anything so it's meaningless.
3394                          */
3395                         if (!rec->found_root_item)
3396                                 continue;
3397                         errors++;
3398                         fprintf(stderr, "fs tree %llu not referenced\n",
3399                                 (unsigned long long)rec->objectid);
3400                 }
3401
3402                 error = 0;
3403                 if (rec->found_ref > 0 && !rec->found_root_item)
3404                         error = 1;
3405                 list_for_each_entry(backref, &rec->backrefs, list) {
3406                         if (!backref->found_dir_item)
3407                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3408                         if (!backref->found_dir_index)
3409                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3410                         if (!backref->found_back_ref)
3411                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3412                         if (!backref->found_forward_ref)
3413                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3414                         if (backref->reachable && backref->errors)
3415                                 error = 1;
3416                 }
3417                 if (!error)
3418                         continue;
3419
3420                 errors++;
3421                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3422                         (unsigned long long)rec->objectid, rec->found_ref,
3423                          rec->found_root_item ? "" : "not found");
3424
3425                 list_for_each_entry(backref, &rec->backrefs, list) {
3426                         if (!backref->reachable)
3427                                 continue;
3428                         if (!backref->errors && rec->found_root_item)
3429                                 continue;
3430                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3431                                 " index %llu namelen %u name %s errors %x\n",
3432                                 (unsigned long long)backref->ref_root,
3433                                 (unsigned long long)backref->dir,
3434                                 (unsigned long long)backref->index,
3435                                 backref->namelen, backref->name,
3436                                 backref->errors);
3437                         print_ref_error(backref->errors);
3438                 }
3439         }
3440         return errors > 0 ? 1 : 0;
3441 }
3442
3443 static int process_root_ref(struct extent_buffer *eb, int slot,
3444                             struct btrfs_key *key,
3445                             struct cache_tree *root_cache)
3446 {
3447         u64 dirid;
3448         u64 index;
3449         u32 len;
3450         u32 name_len;
3451         struct btrfs_root_ref *ref;
3452         char namebuf[BTRFS_NAME_LEN];
3453         int error;
3454
3455         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3456
3457         dirid = btrfs_root_ref_dirid(eb, ref);
3458         index = btrfs_root_ref_sequence(eb, ref);
3459         name_len = btrfs_root_ref_name_len(eb, ref);
3460
3461         if (name_len <= BTRFS_NAME_LEN) {
3462                 len = name_len;
3463                 error = 0;
3464         } else {
3465                 len = BTRFS_NAME_LEN;
3466                 error = REF_ERR_NAME_TOO_LONG;
3467         }
3468         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3469
3470         if (key->type == BTRFS_ROOT_REF_KEY) {
3471                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3472                                  index, namebuf, len, key->type, error);
3473         } else {
3474                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3475                                  index, namebuf, len, key->type, error);
3476         }
3477         return 0;
3478 }
3479
3480 static void free_corrupt_block(struct cache_extent *cache)
3481 {
3482         struct btrfs_corrupt_block *corrupt;
3483
3484         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3485         free(corrupt);
3486 }
3487
3488 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3489
3490 /*
3491  * Repair the btree of the given root.
3492  *
3493  * The fix is to remove the node key in corrupt_blocks cache_tree.
3494  * and rebalance the tree.
3495  * After the fix, the btree should be writeable.
3496  */
3497 static int repair_btree(struct btrfs_root *root,
3498                         struct cache_tree *corrupt_blocks)
3499 {
3500         struct btrfs_trans_handle *trans;
3501         struct btrfs_path *path;
3502         struct btrfs_corrupt_block *corrupt;
3503         struct cache_extent *cache;
3504         struct btrfs_key key;
3505         u64 offset;
3506         int level;
3507         int ret = 0;
3508
3509         if (cache_tree_empty(corrupt_blocks))
3510                 return 0;
3511
3512         path = btrfs_alloc_path();
3513         if (!path)
3514                 return -ENOMEM;
3515
3516         trans = btrfs_start_transaction(root, 1);
3517         if (IS_ERR(trans)) {
3518                 ret = PTR_ERR(trans);
3519                 fprintf(stderr, "Error starting transaction: %s\n",
3520                         strerror(-ret));
3521                 goto out_free_path;
3522         }
3523         cache = first_cache_extent(corrupt_blocks);
3524         while (cache) {
3525                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3526                                        cache);
3527                 level = corrupt->level;
3528                 path->lowest_level = level;
3529                 key.objectid = corrupt->key.objectid;
3530                 key.type = corrupt->key.type;
3531                 key.offset = corrupt->key.offset;
3532
3533                 /*
3534                  * Here we don't want to do any tree balance, since it may
3535                  * cause a balance with corrupted brother leaf/node,
3536                  * so ins_len set to 0 here.
3537                  * Balance will be done after all corrupt node/leaf is deleted.
3538                  */
3539                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3540                 if (ret < 0)
3541                         goto out;
3542                 offset = btrfs_node_blockptr(path->nodes[level],
3543                                              path->slots[level]);
3544
3545                 /* Remove the ptr */
3546                 ret = btrfs_del_ptr(trans, root, path, level,
3547                                     path->slots[level]);
3548                 if (ret < 0)
3549                         goto out;
3550                 /*
3551                  * Remove the corresponding extent
3552                  * return value is not concerned.
3553                  */
3554                 btrfs_release_path(path);
3555                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3556                                         0, root->root_key.objectid,
3557                                         level - 1, 0);
3558                 cache = next_cache_extent(cache);
3559         }
3560
3561         /* Balance the btree using btrfs_search_slot() */
3562         cache = first_cache_extent(corrupt_blocks);
3563         while (cache) {
3564                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3565                                        cache);
3566                 memcpy(&key, &corrupt->key, sizeof(key));
3567                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3568                 if (ret < 0)
3569                         goto out;
3570                 /* return will always >0 since it won't find the item */
3571                 ret = 0;
3572                 btrfs_release_path(path);
3573                 cache = next_cache_extent(cache);
3574         }
3575 out:
3576         btrfs_commit_transaction(trans, root);
3577 out_free_path:
3578         btrfs_free_path(path);
3579         return ret;
3580 }
3581
3582 static int check_fs_root(struct btrfs_root *root,
3583                          struct cache_tree *root_cache,
3584                          struct walk_control *wc)
3585 {
3586         int ret = 0;
3587         int err = 0;
3588         int wret;
3589         int level;
3590         struct btrfs_path path;
3591         struct shared_node root_node;
3592         struct root_record *rec;
3593         struct btrfs_root_item *root_item = &root->root_item;
3594         struct cache_tree corrupt_blocks;
3595         struct orphan_data_extent *orphan;
3596         struct orphan_data_extent *tmp;
3597         enum btrfs_tree_block_status status;
3598         struct node_refs nrefs;
3599
3600         /*
3601          * Reuse the corrupt_block cache tree to record corrupted tree block
3602          *
3603          * Unlike the usage in extent tree check, here we do it in a per
3604          * fs/subvol tree base.
3605          */
3606         cache_tree_init(&corrupt_blocks);
3607         root->fs_info->corrupt_blocks = &corrupt_blocks;
3608
3609         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3610                 rec = get_root_rec(root_cache, root->root_key.objectid);
3611                 BUG_ON(IS_ERR(rec));
3612                 if (btrfs_root_refs(root_item) > 0)
3613                         rec->found_root_item = 1;
3614         }
3615
3616         btrfs_init_path(&path);
3617         memset(&root_node, 0, sizeof(root_node));
3618         cache_tree_init(&root_node.root_cache);
3619         cache_tree_init(&root_node.inode_cache);
3620         memset(&nrefs, 0, sizeof(nrefs));
3621
3622         /* Move the orphan extent record to corresponding inode_record */
3623         list_for_each_entry_safe(orphan, tmp,
3624                                  &root->orphan_data_extents, list) {
3625                 struct inode_record *inode;
3626
3627                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3628                                       1);
3629                 BUG_ON(IS_ERR(inode));
3630                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3631                 list_move(&orphan->list, &inode->orphan_extents);
3632         }
3633
3634         level = btrfs_header_level(root->node);
3635         memset(wc->nodes, 0, sizeof(wc->nodes));
3636         wc->nodes[level] = &root_node;
3637         wc->active_node = level;
3638         wc->root_level = level;
3639
3640         /* We may not have checked the root block, lets do that now */
3641         if (btrfs_is_leaf(root->node))
3642                 status = btrfs_check_leaf(root, NULL, root->node);
3643         else
3644                 status = btrfs_check_node(root, NULL, root->node);
3645         if (status != BTRFS_TREE_BLOCK_CLEAN)
3646                 return -EIO;
3647
3648         if (btrfs_root_refs(root_item) > 0 ||
3649             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3650                 path.nodes[level] = root->node;
3651                 extent_buffer_get(root->node);
3652                 path.slots[level] = 0;
3653         } else {
3654                 struct btrfs_key key;
3655                 struct btrfs_disk_key found_key;
3656
3657                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3658                 level = root_item->drop_level;
3659                 path.lowest_level = level;
3660                 if (level > btrfs_header_level(root->node) ||
3661                     level >= BTRFS_MAX_LEVEL) {
3662                         error("ignoring invalid drop level: %u", level);
3663                         goto skip_walking;
3664                 }
3665                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3666                 if (wret < 0)
3667                         goto skip_walking;
3668                 btrfs_node_key(path.nodes[level], &found_key,
3669                                 path.slots[level]);
3670                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3671                                         sizeof(found_key)));
3672         }
3673
3674         while (1) {
3675                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3676                 if (wret < 0)
3677                         ret = wret;
3678                 if (wret != 0)
3679                         break;
3680
3681                 wret = walk_up_tree(root, &path, wc, &level);
3682                 if (wret < 0)
3683                         ret = wret;
3684                 if (wret != 0)
3685                         break;
3686         }
3687 skip_walking:
3688         btrfs_release_path(&path);
3689
3690         if (!cache_tree_empty(&corrupt_blocks)) {
3691                 struct cache_extent *cache;
3692                 struct btrfs_corrupt_block *corrupt;
3693
3694                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3695                        root->root_key.objectid);
3696                 cache = first_cache_extent(&corrupt_blocks);
3697                 while (cache) {
3698                         corrupt = container_of(cache,
3699                                                struct btrfs_corrupt_block,
3700                                                cache);
3701                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3702                                cache->start, corrupt->level,
3703                                corrupt->key.objectid, corrupt->key.type,
3704                                corrupt->key.offset);
3705                         cache = next_cache_extent(cache);
3706                 }
3707                 if (repair) {
3708                         printf("Try to repair the btree for root %llu\n",
3709                                root->root_key.objectid);
3710                         ret = repair_btree(root, &corrupt_blocks);
3711                         if (ret < 0)
3712                                 fprintf(stderr, "Failed to repair btree: %s\n",
3713                                         strerror(-ret));
3714                         if (!ret)
3715                                 printf("Btree for root %llu is fixed\n",
3716                                        root->root_key.objectid);
3717                 }
3718         }
3719
3720         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3721         if (err < 0)
3722                 ret = err;
3723
3724         if (root_node.current) {
3725                 root_node.current->checked = 1;
3726                 maybe_free_inode_rec(&root_node.inode_cache,
3727                                 root_node.current);
3728         }
3729
3730         err = check_inode_recs(root, &root_node.inode_cache);
3731         if (!ret)
3732                 ret = err;
3733
3734         free_corrupt_blocks_tree(&corrupt_blocks);
3735         root->fs_info->corrupt_blocks = NULL;
3736         free_orphan_data_extents(&root->orphan_data_extents);
3737         return ret;
3738 }
3739
3740 static int fs_root_objectid(u64 objectid)
3741 {
3742         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3743             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3744                 return 1;
3745         return is_fstree(objectid);
3746 }
3747
3748 static int check_fs_roots(struct btrfs_root *root,
3749                           struct cache_tree *root_cache)
3750 {
3751         struct btrfs_path path;
3752         struct btrfs_key key;
3753         struct walk_control wc;
3754         struct extent_buffer *leaf, *tree_node;
3755         struct btrfs_root *tmp_root;
3756         struct btrfs_root *tree_root = root->fs_info->tree_root;
3757         int ret;
3758         int err = 0;
3759
3760         if (ctx.progress_enabled) {
3761                 ctx.tp = TASK_FS_ROOTS;
3762                 task_start(ctx.info);
3763         }
3764
3765         /*
3766          * Just in case we made any changes to the extent tree that weren't
3767          * reflected into the free space cache yet.
3768          */
3769         if (repair)
3770                 reset_cached_block_groups(root->fs_info);
3771         memset(&wc, 0, sizeof(wc));
3772         cache_tree_init(&wc.shared);
3773         btrfs_init_path(&path);
3774
3775 again:
3776         key.offset = 0;
3777         key.objectid = 0;
3778         key.type = BTRFS_ROOT_ITEM_KEY;
3779         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3780         if (ret < 0) {
3781                 err = 1;
3782                 goto out;
3783         }
3784         tree_node = tree_root->node;
3785         while (1) {
3786                 if (tree_node != tree_root->node) {
3787                         free_root_recs_tree(root_cache);
3788                         btrfs_release_path(&path);
3789                         goto again;
3790                 }
3791                 leaf = path.nodes[0];
3792                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3793                         ret = btrfs_next_leaf(tree_root, &path);
3794                         if (ret) {
3795                                 if (ret < 0)
3796                                         err = 1;
3797                                 break;
3798                         }
3799                         leaf = path.nodes[0];
3800                 }
3801                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3802                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3803                     fs_root_objectid(key.objectid)) {
3804                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3805                                 tmp_root = btrfs_read_fs_root_no_cache(
3806                                                 root->fs_info, &key);
3807                         } else {
3808                                 key.offset = (u64)-1;
3809                                 tmp_root = btrfs_read_fs_root(
3810                                                 root->fs_info, &key);
3811                         }
3812                         if (IS_ERR(tmp_root)) {
3813                                 err = 1;
3814                                 goto next;
3815                         }
3816                         ret = check_fs_root(tmp_root, root_cache, &wc);
3817                         if (ret == -EAGAIN) {
3818                                 free_root_recs_tree(root_cache);
3819                                 btrfs_release_path(&path);
3820                                 goto again;
3821                         }
3822                         if (ret)
3823                                 err = 1;
3824                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3825                                 btrfs_free_fs_root(tmp_root);
3826                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3827                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3828                         process_root_ref(leaf, path.slots[0], &key,
3829                                          root_cache);
3830                 }
3831 next:
3832                 path.slots[0]++;
3833         }
3834 out:
3835         btrfs_release_path(&path);
3836         if (err)
3837                 free_extent_cache_tree(&wc.shared);
3838         if (!cache_tree_empty(&wc.shared))
3839                 fprintf(stderr, "warning line %d\n", __LINE__);
3840
3841         task_stop(ctx.info);
3842
3843         return err;
3844 }
3845
3846 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3847 {
3848         struct list_head *cur = rec->backrefs.next;
3849         struct extent_backref *back;
3850         struct tree_backref *tback;
3851         struct data_backref *dback;
3852         u64 found = 0;
3853         int err = 0;
3854
3855         while(cur != &rec->backrefs) {
3856                 back = to_extent_backref(cur);
3857                 cur = cur->next;
3858                 if (!back->found_extent_tree) {
3859                         err = 1;
3860                         if (!print_errs)
3861                                 goto out;
3862                         if (back->is_data) {
3863                                 dback = to_data_backref(back);
3864                                 fprintf(stderr, "Backref %llu %s %llu"
3865                                         " owner %llu offset %llu num_refs %lu"
3866                                         " not found in extent tree\n",
3867                                         (unsigned long long)rec->start,
3868                                         back->full_backref ?
3869                                         "parent" : "root",
3870                                         back->full_backref ?
3871                                         (unsigned long long)dback->parent:
3872                                         (unsigned long long)dback->root,
3873                                         (unsigned long long)dback->owner,
3874                                         (unsigned long long)dback->offset,
3875                                         (unsigned long)dback->num_refs);
3876                         } else {
3877                                 tback = to_tree_backref(back);
3878                                 fprintf(stderr, "Backref %llu parent %llu"
3879                                         " root %llu not found in extent tree\n",
3880                                         (unsigned long long)rec->start,
3881                                         (unsigned long long)tback->parent,
3882                                         (unsigned long long)tback->root);
3883                         }
3884                 }
3885                 if (!back->is_data && !back->found_ref) {
3886                         err = 1;
3887                         if (!print_errs)
3888                                 goto out;
3889                         tback = to_tree_backref(back);
3890                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3891                                 (unsigned long long)rec->start,
3892                                 back->full_backref ? "parent" : "root",
3893                                 back->full_backref ?
3894                                 (unsigned long long)tback->parent :
3895                                 (unsigned long long)tback->root, back);
3896                 }
3897                 if (back->is_data) {
3898                         dback = to_data_backref(back);
3899                         if (dback->found_ref != dback->num_refs) {
3900                                 err = 1;
3901                                 if (!print_errs)
3902                                         goto out;
3903                                 fprintf(stderr, "Incorrect local backref count"
3904                                         " on %llu %s %llu owner %llu"
3905                                         " offset %llu found %u wanted %u back %p\n",
3906                                         (unsigned long long)rec->start,
3907                                         back->full_backref ?
3908                                         "parent" : "root",
3909                                         back->full_backref ?
3910                                         (unsigned long long)dback->parent:
3911                                         (unsigned long long)dback->root,
3912                                         (unsigned long long)dback->owner,
3913                                         (unsigned long long)dback->offset,
3914                                         dback->found_ref, dback->num_refs, back);
3915                         }
3916                         if (dback->disk_bytenr != rec->start) {
3917                                 err = 1;
3918                                 if (!print_errs)
3919                                         goto out;
3920                                 fprintf(stderr, "Backref disk bytenr does not"
3921                                         " match extent record, bytenr=%llu, "
3922                                         "ref bytenr=%llu\n",
3923                                         (unsigned long long)rec->start,
3924                                         (unsigned long long)dback->disk_bytenr);
3925                         }
3926
3927                         if (dback->bytes != rec->nr) {
3928                                 err = 1;
3929                                 if (!print_errs)
3930                                         goto out;
3931                                 fprintf(stderr, "Backref bytes do not match "
3932                                         "extent backref, bytenr=%llu, ref "
3933                                         "bytes=%llu, backref bytes=%llu\n",
3934                                         (unsigned long long)rec->start,
3935                                         (unsigned long long)rec->nr,
3936                                         (unsigned long long)dback->bytes);
3937                         }
3938                 }
3939                 if (!back->is_data) {
3940                         found += 1;
3941                 } else {
3942                         dback = to_data_backref(back);
3943                         found += dback->found_ref;
3944                 }
3945         }
3946         if (found != rec->refs) {
3947                 err = 1;
3948                 if (!print_errs)
3949                         goto out;
3950                 fprintf(stderr, "Incorrect global backref count "
3951                         "on %llu found %llu wanted %llu\n",
3952                         (unsigned long long)rec->start,
3953                         (unsigned long long)found,
3954                         (unsigned long long)rec->refs);
3955         }
3956 out:
3957         return err;
3958 }
3959
3960 static int free_all_extent_backrefs(struct extent_record *rec)
3961 {
3962         struct extent_backref *back;
3963         struct list_head *cur;
3964         while (!list_empty(&rec->backrefs)) {
3965                 cur = rec->backrefs.next;
3966                 back = to_extent_backref(cur);
3967                 list_del(cur);
3968                 free(back);
3969         }
3970         return 0;
3971 }
3972
3973 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3974                                      struct cache_tree *extent_cache)
3975 {
3976         struct cache_extent *cache;
3977         struct extent_record *rec;
3978
3979         while (1) {
3980                 cache = first_cache_extent(extent_cache);
3981                 if (!cache)
3982                         break;
3983                 rec = container_of(cache, struct extent_record, cache);
3984                 remove_cache_extent(extent_cache, cache);
3985                 free_all_extent_backrefs(rec);
3986                 free(rec);
3987         }
3988 }
3989
3990 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3991                                  struct extent_record *rec)
3992 {
3993         if (rec->content_checked && rec->owner_ref_checked &&
3994             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3995             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3996             !rec->bad_full_backref && !rec->crossing_stripes &&
3997             !rec->wrong_chunk_type) {
3998                 remove_cache_extent(extent_cache, &rec->cache);
3999                 free_all_extent_backrefs(rec);
4000                 list_del_init(&rec->list);
4001                 free(rec);
4002         }
4003         return 0;
4004 }
4005
4006 static int check_owner_ref(struct btrfs_root *root,
4007                             struct extent_record *rec,
4008                             struct extent_buffer *buf)
4009 {
4010         struct extent_backref *node;
4011         struct tree_backref *back;
4012         struct btrfs_root *ref_root;
4013         struct btrfs_key key;
4014         struct btrfs_path path;
4015         struct extent_buffer *parent;
4016         int level;
4017         int found = 0;
4018         int ret;
4019
4020         list_for_each_entry(node, &rec->backrefs, list) {
4021                 if (node->is_data)
4022                         continue;
4023                 if (!node->found_ref)
4024                         continue;
4025                 if (node->full_backref)
4026                         continue;
4027                 back = to_tree_backref(node);
4028                 if (btrfs_header_owner(buf) == back->root)
4029                         return 0;
4030         }
4031         BUG_ON(rec->is_root);
4032
4033         /* try to find the block by search corresponding fs tree */
4034         key.objectid = btrfs_header_owner(buf);
4035         key.type = BTRFS_ROOT_ITEM_KEY;
4036         key.offset = (u64)-1;
4037
4038         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4039         if (IS_ERR(ref_root))
4040                 return 1;
4041
4042         level = btrfs_header_level(buf);
4043         if (level == 0)
4044                 btrfs_item_key_to_cpu(buf, &key, 0);
4045         else
4046                 btrfs_node_key_to_cpu(buf, &key, 0);
4047
4048         btrfs_init_path(&path);
4049         path.lowest_level = level + 1;
4050         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4051         if (ret < 0)
4052                 return 0;
4053
4054         parent = path.nodes[level + 1];
4055         if (parent && buf->start == btrfs_node_blockptr(parent,
4056                                                         path.slots[level + 1]))
4057                 found = 1;
4058
4059         btrfs_release_path(&path);
4060         return found ? 0 : 1;
4061 }
4062
4063 static int is_extent_tree_record(struct extent_record *rec)
4064 {
4065         struct list_head *cur = rec->backrefs.next;
4066         struct extent_backref *node;
4067         struct tree_backref *back;
4068         int is_extent = 0;
4069
4070         while(cur != &rec->backrefs) {
4071                 node = to_extent_backref(cur);
4072                 cur = cur->next;
4073                 if (node->is_data)
4074                         return 0;
4075                 back = to_tree_backref(node);
4076                 if (node->full_backref)
4077                         return 0;
4078                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4079                         is_extent = 1;
4080         }
4081         return is_extent;
4082 }
4083
4084
4085 static int record_bad_block_io(struct btrfs_fs_info *info,
4086                                struct cache_tree *extent_cache,
4087                                u64 start, u64 len)
4088 {
4089         struct extent_record *rec;
4090         struct cache_extent *cache;
4091         struct btrfs_key key;
4092
4093         cache = lookup_cache_extent(extent_cache, start, len);
4094         if (!cache)
4095                 return 0;
4096
4097         rec = container_of(cache, struct extent_record, cache);
4098         if (!is_extent_tree_record(rec))
4099                 return 0;
4100
4101         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4102         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4103 }
4104
4105 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4106                        struct extent_buffer *buf, int slot)
4107 {
4108         if (btrfs_header_level(buf)) {
4109                 struct btrfs_key_ptr ptr1, ptr2;
4110
4111                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4112                                    sizeof(struct btrfs_key_ptr));
4113                 read_extent_buffer(buf, &ptr2,
4114                                    btrfs_node_key_ptr_offset(slot + 1),
4115                                    sizeof(struct btrfs_key_ptr));
4116                 write_extent_buffer(buf, &ptr1,
4117                                     btrfs_node_key_ptr_offset(slot + 1),
4118                                     sizeof(struct btrfs_key_ptr));
4119                 write_extent_buffer(buf, &ptr2,
4120                                     btrfs_node_key_ptr_offset(slot),
4121                                     sizeof(struct btrfs_key_ptr));
4122                 if (slot == 0) {
4123                         struct btrfs_disk_key key;
4124                         btrfs_node_key(buf, &key, 0);
4125                         btrfs_fixup_low_keys(root, path, &key,
4126                                              btrfs_header_level(buf) + 1);
4127                 }
4128         } else {
4129                 struct btrfs_item *item1, *item2;
4130                 struct btrfs_key k1, k2;
4131                 char *item1_data, *item2_data;
4132                 u32 item1_offset, item2_offset, item1_size, item2_size;
4133
4134                 item1 = btrfs_item_nr(slot);
4135                 item2 = btrfs_item_nr(slot + 1);
4136                 btrfs_item_key_to_cpu(buf, &k1, slot);
4137                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4138                 item1_offset = btrfs_item_offset(buf, item1);
4139                 item2_offset = btrfs_item_offset(buf, item2);
4140                 item1_size = btrfs_item_size(buf, item1);
4141                 item2_size = btrfs_item_size(buf, item2);
4142
4143                 item1_data = malloc(item1_size);
4144                 if (!item1_data)
4145                         return -ENOMEM;
4146                 item2_data = malloc(item2_size);
4147                 if (!item2_data) {
4148                         free(item1_data);
4149                         return -ENOMEM;
4150                 }
4151
4152                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4153                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4154
4155                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4156                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4157                 free(item1_data);
4158                 free(item2_data);
4159
4160                 btrfs_set_item_offset(buf, item1, item2_offset);
4161                 btrfs_set_item_offset(buf, item2, item1_offset);
4162                 btrfs_set_item_size(buf, item1, item2_size);
4163                 btrfs_set_item_size(buf, item2, item1_size);
4164
4165                 path->slots[0] = slot;
4166                 btrfs_set_item_key_unsafe(root, path, &k2);
4167                 path->slots[0] = slot + 1;
4168                 btrfs_set_item_key_unsafe(root, path, &k1);
4169         }
4170         return 0;
4171 }
4172
4173 static int fix_key_order(struct btrfs_trans_handle *trans,
4174                          struct btrfs_root *root,
4175                          struct btrfs_path *path)
4176 {
4177         struct extent_buffer *buf;
4178         struct btrfs_key k1, k2;
4179         int i;
4180         int level = path->lowest_level;
4181         int ret = -EIO;
4182
4183         buf = path->nodes[level];
4184         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4185                 if (level) {
4186                         btrfs_node_key_to_cpu(buf, &k1, i);
4187                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4188                 } else {
4189                         btrfs_item_key_to_cpu(buf, &k1, i);
4190                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4191                 }
4192                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4193                         continue;
4194                 ret = swap_values(root, path, buf, i);
4195                 if (ret)
4196                         break;
4197                 btrfs_mark_buffer_dirty(buf);
4198                 i = 0;
4199         }
4200         return ret;
4201 }
4202
4203 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4204                              struct btrfs_root *root,
4205                              struct btrfs_path *path,
4206                              struct extent_buffer *buf, int slot)
4207 {
4208         struct btrfs_key key;
4209         int nritems = btrfs_header_nritems(buf);
4210
4211         btrfs_item_key_to_cpu(buf, &key, slot);
4212
4213         /* These are all the keys we can deal with missing. */
4214         if (key.type != BTRFS_DIR_INDEX_KEY &&
4215             key.type != BTRFS_EXTENT_ITEM_KEY &&
4216             key.type != BTRFS_METADATA_ITEM_KEY &&
4217             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4218             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4219                 return -1;
4220
4221         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4222                (unsigned long long)key.objectid, key.type,
4223                (unsigned long long)key.offset, slot, buf->start);
4224         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4225                               btrfs_item_nr_offset(slot + 1),
4226                               sizeof(struct btrfs_item) *
4227                               (nritems - slot - 1));
4228         btrfs_set_header_nritems(buf, nritems - 1);
4229         if (slot == 0) {
4230                 struct btrfs_disk_key disk_key;
4231
4232                 btrfs_item_key(buf, &disk_key, 0);
4233                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4234         }
4235         btrfs_mark_buffer_dirty(buf);
4236         return 0;
4237 }
4238
4239 static int fix_item_offset(struct btrfs_trans_handle *trans,
4240                            struct btrfs_root *root,
4241                            struct btrfs_path *path)
4242 {
4243         struct extent_buffer *buf;
4244         int i;
4245         int ret = 0;
4246
4247         /* We should only get this for leaves */
4248         BUG_ON(path->lowest_level);
4249         buf = path->nodes[0];
4250 again:
4251         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4252                 unsigned int shift = 0, offset;
4253
4254                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4255                     BTRFS_LEAF_DATA_SIZE(root)) {
4256                         if (btrfs_item_end_nr(buf, i) >
4257                             BTRFS_LEAF_DATA_SIZE(root)) {
4258                                 ret = delete_bogus_item(trans, root, path,
4259                                                         buf, i);
4260                                 if (!ret)
4261                                         goto again;
4262                                 fprintf(stderr, "item is off the end of the "
4263                                         "leaf, can't fix\n");
4264                                 ret = -EIO;
4265                                 break;
4266                         }
4267                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4268                                 btrfs_item_end_nr(buf, i);
4269                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4270                            btrfs_item_offset_nr(buf, i - 1)) {
4271                         if (btrfs_item_end_nr(buf, i) >
4272                             btrfs_item_offset_nr(buf, i - 1)) {
4273                                 ret = delete_bogus_item(trans, root, path,
4274                                                         buf, i);
4275                                 if (!ret)
4276                                         goto again;
4277                                 fprintf(stderr, "items overlap, can't fix\n");
4278                                 ret = -EIO;
4279                                 break;
4280                         }
4281                         shift = btrfs_item_offset_nr(buf, i - 1) -
4282                                 btrfs_item_end_nr(buf, i);
4283                 }
4284                 if (!shift)
4285                         continue;
4286
4287                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4288                        i, shift, (unsigned long long)buf->start);
4289                 offset = btrfs_item_offset_nr(buf, i);
4290                 memmove_extent_buffer(buf,
4291                                       btrfs_leaf_data(buf) + offset + shift,
4292                                       btrfs_leaf_data(buf) + offset,
4293                                       btrfs_item_size_nr(buf, i));
4294                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4295                                       offset + shift);
4296                 btrfs_mark_buffer_dirty(buf);
4297         }
4298
4299         /*
4300          * We may have moved things, in which case we want to exit so we don't
4301          * write those changes out.  Once we have proper abort functionality in
4302          * progs this can be changed to something nicer.
4303          */
4304         BUG_ON(ret);
4305         return ret;
4306 }
4307
4308 /*
4309  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4310  * then just return -EIO.
4311  */
4312 static int try_to_fix_bad_block(struct btrfs_root *root,
4313                                 struct extent_buffer *buf,
4314                                 enum btrfs_tree_block_status status)
4315 {
4316         struct btrfs_trans_handle *trans;
4317         struct ulist *roots;
4318         struct ulist_node *node;
4319         struct btrfs_root *search_root;
4320         struct btrfs_path *path;
4321         struct ulist_iterator iter;
4322         struct btrfs_key root_key, key;
4323         int ret;
4324
4325         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4326             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4327                 return -EIO;
4328
4329         path = btrfs_alloc_path();
4330         if (!path)
4331                 return -EIO;
4332
4333         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4334                                    0, &roots);
4335         if (ret) {
4336                 btrfs_free_path(path);
4337                 return -EIO;
4338         }
4339
4340         ULIST_ITER_INIT(&iter);
4341         while ((node = ulist_next(roots, &iter))) {
4342                 root_key.objectid = node->val;
4343                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4344                 root_key.offset = (u64)-1;
4345
4346                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4347                 if (IS_ERR(root)) {
4348                         ret = -EIO;
4349                         break;
4350                 }
4351
4352
4353                 trans = btrfs_start_transaction(search_root, 0);
4354                 if (IS_ERR(trans)) {
4355                         ret = PTR_ERR(trans);
4356                         break;
4357                 }
4358
4359                 path->lowest_level = btrfs_header_level(buf);
4360                 path->skip_check_block = 1;
4361                 if (path->lowest_level)
4362                         btrfs_node_key_to_cpu(buf, &key, 0);
4363                 else
4364                         btrfs_item_key_to_cpu(buf, &key, 0);
4365                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4366                 if (ret) {
4367                         ret = -EIO;
4368                         btrfs_commit_transaction(trans, search_root);
4369                         break;
4370                 }
4371                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4372                         ret = fix_key_order(trans, search_root, path);
4373                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4374                         ret = fix_item_offset(trans, search_root, path);
4375                 if (ret) {
4376                         btrfs_commit_transaction(trans, search_root);
4377                         break;
4378                 }
4379                 btrfs_release_path(path);
4380                 btrfs_commit_transaction(trans, search_root);
4381         }
4382         ulist_free(roots);
4383         btrfs_free_path(path);
4384         return ret;
4385 }
4386
4387 static int check_block(struct btrfs_root *root,
4388                        struct cache_tree *extent_cache,
4389                        struct extent_buffer *buf, u64 flags)
4390 {
4391         struct extent_record *rec;
4392         struct cache_extent *cache;
4393         struct btrfs_key key;
4394         enum btrfs_tree_block_status status;
4395         int ret = 0;
4396         int level;
4397
4398         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4399         if (!cache)
4400                 return 1;
4401         rec = container_of(cache, struct extent_record, cache);
4402         rec->generation = btrfs_header_generation(buf);
4403
4404         level = btrfs_header_level(buf);
4405         if (btrfs_header_nritems(buf) > 0) {
4406
4407                 if (level == 0)
4408                         btrfs_item_key_to_cpu(buf, &key, 0);
4409                 else
4410                         btrfs_node_key_to_cpu(buf, &key, 0);
4411
4412                 rec->info_objectid = key.objectid;
4413         }
4414         rec->info_level = level;
4415
4416         if (btrfs_is_leaf(buf))
4417                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4418         else
4419                 status = btrfs_check_node(root, &rec->parent_key, buf);
4420
4421         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4422                 if (repair)
4423                         status = try_to_fix_bad_block(root, buf, status);
4424                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4425                         ret = -EIO;
4426                         fprintf(stderr, "bad block %llu\n",
4427                                 (unsigned long long)buf->start);
4428                 } else {
4429                         /*
4430                          * Signal to callers we need to start the scan over
4431                          * again since we'll have cowed blocks.
4432                          */
4433                         ret = -EAGAIN;
4434                 }
4435         } else {
4436                 rec->content_checked = 1;
4437                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4438                         rec->owner_ref_checked = 1;
4439                 else {
4440                         ret = check_owner_ref(root, rec, buf);
4441                         if (!ret)
4442                                 rec->owner_ref_checked = 1;
4443                 }
4444         }
4445         if (!ret)
4446                 maybe_free_extent_rec(extent_cache, rec);
4447         return ret;
4448 }
4449
4450 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4451                                                 u64 parent, u64 root)
4452 {
4453         struct list_head *cur = rec->backrefs.next;
4454         struct extent_backref *node;
4455         struct tree_backref *back;
4456
4457         while(cur != &rec->backrefs) {
4458                 node = to_extent_backref(cur);
4459                 cur = cur->next;
4460                 if (node->is_data)
4461                         continue;
4462                 back = to_tree_backref(node);
4463                 if (parent > 0) {
4464                         if (!node->full_backref)
4465                                 continue;
4466                         if (parent == back->parent)
4467                                 return back;
4468                 } else {
4469                         if (node->full_backref)
4470                                 continue;
4471                         if (back->root == root)
4472                                 return back;
4473                 }
4474         }
4475         return NULL;
4476 }
4477
4478 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4479                                                 u64 parent, u64 root)
4480 {
4481         struct tree_backref *ref = malloc(sizeof(*ref));
4482
4483         if (!ref)
4484                 return NULL;
4485         memset(&ref->node, 0, sizeof(ref->node));
4486         if (parent > 0) {
4487                 ref->parent = parent;
4488                 ref->node.full_backref = 1;
4489         } else {
4490                 ref->root = root;
4491                 ref->node.full_backref = 0;
4492         }
4493         list_add_tail(&ref->node.list, &rec->backrefs);
4494
4495         return ref;
4496 }
4497
4498 static struct data_backref *find_data_backref(struct extent_record *rec,
4499                                                 u64 parent, u64 root,
4500                                                 u64 owner, u64 offset,
4501                                                 int found_ref,
4502                                                 u64 disk_bytenr, u64 bytes)
4503 {
4504         struct list_head *cur = rec->backrefs.next;
4505         struct extent_backref *node;
4506         struct data_backref *back;
4507
4508         while(cur != &rec->backrefs) {
4509                 node = to_extent_backref(cur);
4510                 cur = cur->next;
4511                 if (!node->is_data)
4512                         continue;
4513                 back = to_data_backref(node);
4514                 if (parent > 0) {
4515                         if (!node->full_backref)
4516                                 continue;
4517                         if (parent == back->parent)
4518                                 return back;
4519                 } else {
4520                         if (node->full_backref)
4521                                 continue;
4522                         if (back->root == root && back->owner == owner &&
4523                             back->offset == offset) {
4524                                 if (found_ref && node->found_ref &&
4525                                     (back->bytes != bytes ||
4526                                     back->disk_bytenr != disk_bytenr))
4527                                         continue;
4528                                 return back;
4529                         }
4530                 }
4531         }
4532         return NULL;
4533 }
4534
4535 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4536                                                 u64 parent, u64 root,
4537                                                 u64 owner, u64 offset,
4538                                                 u64 max_size)
4539 {
4540         struct data_backref *ref = malloc(sizeof(*ref));
4541
4542         if (!ref)
4543                 return NULL;
4544         memset(&ref->node, 0, sizeof(ref->node));
4545         ref->node.is_data = 1;
4546
4547         if (parent > 0) {
4548                 ref->parent = parent;
4549                 ref->owner = 0;
4550                 ref->offset = 0;
4551                 ref->node.full_backref = 1;
4552         } else {
4553                 ref->root = root;
4554                 ref->owner = owner;
4555                 ref->offset = offset;
4556                 ref->node.full_backref = 0;
4557         }
4558         ref->bytes = max_size;
4559         ref->found_ref = 0;
4560         ref->num_refs = 0;
4561         list_add_tail(&ref->node.list, &rec->backrefs);
4562         if (max_size > rec->max_size)
4563                 rec->max_size = max_size;
4564         return ref;
4565 }
4566
4567 /* Check if the type of extent matches with its chunk */
4568 static void check_extent_type(struct extent_record *rec)
4569 {
4570         struct btrfs_block_group_cache *bg_cache;
4571
4572         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4573         if (!bg_cache)
4574                 return;
4575
4576         /* data extent, check chunk directly*/
4577         if (!rec->metadata) {
4578                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4579                         rec->wrong_chunk_type = 1;
4580                 return;
4581         }
4582
4583         /* metadata extent, check the obvious case first */
4584         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4585                                  BTRFS_BLOCK_GROUP_METADATA))) {
4586                 rec->wrong_chunk_type = 1;
4587                 return;
4588         }
4589
4590         /*
4591          * Check SYSTEM extent, as it's also marked as metadata, we can only
4592          * make sure it's a SYSTEM extent by its backref
4593          */
4594         if (!list_empty(&rec->backrefs)) {
4595                 struct extent_backref *node;
4596                 struct tree_backref *tback;
4597                 u64 bg_type;
4598
4599                 node = to_extent_backref(rec->backrefs.next);
4600                 if (node->is_data) {
4601                         /* tree block shouldn't have data backref */
4602                         rec->wrong_chunk_type = 1;
4603                         return;
4604                 }
4605                 tback = container_of(node, struct tree_backref, node);
4606
4607                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4608                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4609                 else
4610                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4611                 if (!(bg_cache->flags & bg_type))
4612                         rec->wrong_chunk_type = 1;
4613         }
4614 }
4615
4616 /*
4617  * Allocate a new extent record, fill default values from @tmpl and insert int
4618  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4619  * the cache, otherwise it fails.
4620  */
4621 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4622                 struct extent_record *tmpl)
4623 {
4624         struct extent_record *rec;
4625         int ret = 0;
4626
4627         rec = malloc(sizeof(*rec));
4628         if (!rec)
4629                 return -ENOMEM;
4630         rec->start = tmpl->start;
4631         rec->max_size = tmpl->max_size;
4632         rec->nr = max(tmpl->nr, tmpl->max_size);
4633         rec->found_rec = tmpl->found_rec;
4634         rec->content_checked = tmpl->content_checked;
4635         rec->owner_ref_checked = tmpl->owner_ref_checked;
4636         rec->num_duplicates = 0;
4637         rec->metadata = tmpl->metadata;
4638         rec->flag_block_full_backref = FLAG_UNSET;
4639         rec->bad_full_backref = 0;
4640         rec->crossing_stripes = 0;
4641         rec->wrong_chunk_type = 0;
4642         rec->is_root = tmpl->is_root;
4643         rec->refs = tmpl->refs;
4644         rec->extent_item_refs = tmpl->extent_item_refs;
4645         rec->parent_generation = tmpl->parent_generation;
4646         INIT_LIST_HEAD(&rec->backrefs);
4647         INIT_LIST_HEAD(&rec->dups);
4648         INIT_LIST_HEAD(&rec->list);
4649         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4650         rec->cache.start = tmpl->start;
4651         rec->cache.size = tmpl->nr;
4652         ret = insert_cache_extent(extent_cache, &rec->cache);
4653         if (ret) {
4654                 free(rec);
4655                 return ret;
4656         }
4657         bytes_used += rec->nr;
4658
4659         if (tmpl->metadata)
4660                 rec->crossing_stripes = check_crossing_stripes(global_info,
4661                                 rec->start, global_info->tree_root->nodesize);
4662         check_extent_type(rec);
4663         return ret;
4664 }
4665
4666 /*
4667  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4668  * some are hints:
4669  * - refs              - if found, increase refs
4670  * - is_root           - if found, set
4671  * - content_checked   - if found, set
4672  * - owner_ref_checked - if found, set
4673  *
4674  * If not found, create a new one, initialize and insert.
4675  */
4676 static int add_extent_rec(struct cache_tree *extent_cache,
4677                 struct extent_record *tmpl)
4678 {
4679         struct extent_record *rec;
4680         struct cache_extent *cache;
4681         int ret = 0;
4682         int dup = 0;
4683
4684         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4685         if (cache) {
4686                 rec = container_of(cache, struct extent_record, cache);
4687                 if (tmpl->refs)
4688                         rec->refs++;
4689                 if (rec->nr == 1)
4690                         rec->nr = max(tmpl->nr, tmpl->max_size);
4691
4692                 /*
4693                  * We need to make sure to reset nr to whatever the extent
4694                  * record says was the real size, this way we can compare it to
4695                  * the backrefs.
4696                  */
4697                 if (tmpl->found_rec) {
4698                         if (tmpl->start != rec->start || rec->found_rec) {
4699                                 struct extent_record *tmp;
4700
4701                                 dup = 1;
4702                                 if (list_empty(&rec->list))
4703                                         list_add_tail(&rec->list,
4704                                                       &duplicate_extents);
4705
4706                                 /*
4707                                  * We have to do this song and dance in case we
4708                                  * find an extent record that falls inside of
4709                                  * our current extent record but does not have
4710                                  * the same objectid.
4711                                  */
4712                                 tmp = malloc(sizeof(*tmp));
4713                                 if (!tmp)
4714                                         return -ENOMEM;
4715                                 tmp->start = tmpl->start;
4716                                 tmp->max_size = tmpl->max_size;
4717                                 tmp->nr = tmpl->nr;
4718                                 tmp->found_rec = 1;
4719                                 tmp->metadata = tmpl->metadata;
4720                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4721                                 INIT_LIST_HEAD(&tmp->list);
4722                                 list_add_tail(&tmp->list, &rec->dups);
4723                                 rec->num_duplicates++;
4724                         } else {
4725                                 rec->nr = tmpl->nr;
4726                                 rec->found_rec = 1;
4727                         }
4728                 }
4729
4730                 if (tmpl->extent_item_refs && !dup) {
4731                         if (rec->extent_item_refs) {
4732                                 fprintf(stderr, "block %llu rec "
4733                                         "extent_item_refs %llu, passed %llu\n",
4734                                         (unsigned long long)tmpl->start,
4735                                         (unsigned long long)
4736                                                         rec->extent_item_refs,
4737                                         (unsigned long long)tmpl->extent_item_refs);
4738                         }
4739                         rec->extent_item_refs = tmpl->extent_item_refs;
4740                 }
4741                 if (tmpl->is_root)
4742                         rec->is_root = 1;
4743                 if (tmpl->content_checked)
4744                         rec->content_checked = 1;
4745                 if (tmpl->owner_ref_checked)
4746                         rec->owner_ref_checked = 1;
4747                 memcpy(&rec->parent_key, &tmpl->parent_key,
4748                                 sizeof(tmpl->parent_key));
4749                 if (tmpl->parent_generation)
4750                         rec->parent_generation = tmpl->parent_generation;
4751                 if (rec->max_size < tmpl->max_size)
4752                         rec->max_size = tmpl->max_size;
4753
4754                 /*
4755                  * A metadata extent can't cross stripe_len boundary, otherwise
4756                  * kernel scrub won't be able to handle it.
4757                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4758                  * it.
4759                  */
4760                 if (tmpl->metadata)
4761                         rec->crossing_stripes = check_crossing_stripes(
4762                                         global_info, rec->start,
4763                                         global_info->tree_root->nodesize);
4764                 check_extent_type(rec);
4765                 maybe_free_extent_rec(extent_cache, rec);
4766                 return ret;
4767         }
4768
4769         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4770
4771         return ret;
4772 }
4773
4774 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4775                             u64 parent, u64 root, int found_ref)
4776 {
4777         struct extent_record *rec;
4778         struct tree_backref *back;
4779         struct cache_extent *cache;
4780         int ret;
4781
4782         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4783         if (!cache) {
4784                 struct extent_record tmpl;
4785
4786                 memset(&tmpl, 0, sizeof(tmpl));
4787                 tmpl.start = bytenr;
4788                 tmpl.nr = 1;
4789                 tmpl.metadata = 1;
4790
4791                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4792                 if (ret)
4793                         return ret;
4794
4795                 /* really a bug in cache_extent implement now */
4796                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4797                 if (!cache)
4798                         return -ENOENT;
4799         }
4800
4801         rec = container_of(cache, struct extent_record, cache);
4802         if (rec->start != bytenr) {
4803                 /*
4804                  * Several cause, from unaligned bytenr to over lapping extents
4805                  */
4806                 return -EEXIST;
4807         }
4808
4809         back = find_tree_backref(rec, parent, root);
4810         if (!back) {
4811                 back = alloc_tree_backref(rec, parent, root);
4812                 if (!back)
4813                         return -ENOMEM;
4814         }
4815
4816         if (found_ref) {
4817                 if (back->node.found_ref) {
4818                         fprintf(stderr, "Extent back ref already exists "
4819                                 "for %llu parent %llu root %llu \n",
4820                                 (unsigned long long)bytenr,
4821                                 (unsigned long long)parent,
4822                                 (unsigned long long)root);
4823                 }
4824                 back->node.found_ref = 1;
4825         } else {
4826                 if (back->node.found_extent_tree) {
4827                         fprintf(stderr, "Extent back ref already exists "
4828                                 "for %llu parent %llu root %llu \n",
4829                                 (unsigned long long)bytenr,
4830                                 (unsigned long long)parent,
4831                                 (unsigned long long)root);
4832                 }
4833                 back->node.found_extent_tree = 1;
4834         }
4835         check_extent_type(rec);
4836         maybe_free_extent_rec(extent_cache, rec);
4837         return 0;
4838 }
4839
4840 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4841                             u64 parent, u64 root, u64 owner, u64 offset,
4842                             u32 num_refs, int found_ref, u64 max_size)
4843 {
4844         struct extent_record *rec;
4845         struct data_backref *back;
4846         struct cache_extent *cache;
4847         int ret;
4848
4849         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4850         if (!cache) {
4851                 struct extent_record tmpl;
4852
4853                 memset(&tmpl, 0, sizeof(tmpl));
4854                 tmpl.start = bytenr;
4855                 tmpl.nr = 1;
4856                 tmpl.max_size = max_size;
4857
4858                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4859                 if (ret)
4860                         return ret;
4861
4862                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4863                 if (!cache)
4864                         abort();
4865         }
4866
4867         rec = container_of(cache, struct extent_record, cache);
4868         if (rec->max_size < max_size)
4869                 rec->max_size = max_size;
4870
4871         /*
4872          * If found_ref is set then max_size is the real size and must match the
4873          * existing refs.  So if we have already found a ref then we need to
4874          * make sure that this ref matches the existing one, otherwise we need
4875          * to add a new backref so we can notice that the backrefs don't match
4876          * and we need to figure out who is telling the truth.  This is to
4877          * account for that awful fsync bug I introduced where we'd end up with
4878          * a btrfs_file_extent_item that would have its length include multiple
4879          * prealloc extents or point inside of a prealloc extent.
4880          */
4881         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4882                                  bytenr, max_size);
4883         if (!back) {
4884                 back = alloc_data_backref(rec, parent, root, owner, offset,
4885                                           max_size);
4886                 BUG_ON(!back);
4887         }
4888
4889         if (found_ref) {
4890                 BUG_ON(num_refs != 1);
4891                 if (back->node.found_ref)
4892                         BUG_ON(back->bytes != max_size);
4893                 back->node.found_ref = 1;
4894                 back->found_ref += 1;
4895                 back->bytes = max_size;
4896                 back->disk_bytenr = bytenr;
4897                 rec->refs += 1;
4898                 rec->content_checked = 1;
4899                 rec->owner_ref_checked = 1;
4900         } else {
4901                 if (back->node.found_extent_tree) {
4902                         fprintf(stderr, "Extent back ref already exists "
4903                                 "for %llu parent %llu root %llu "
4904                                 "owner %llu offset %llu num_refs %lu\n",
4905                                 (unsigned long long)bytenr,
4906                                 (unsigned long long)parent,
4907                                 (unsigned long long)root,
4908                                 (unsigned long long)owner,
4909                                 (unsigned long long)offset,
4910                                 (unsigned long)num_refs);
4911                 }
4912                 back->num_refs = num_refs;
4913                 back->node.found_extent_tree = 1;
4914         }
4915         maybe_free_extent_rec(extent_cache, rec);
4916         return 0;
4917 }
4918
4919 static int add_pending(struct cache_tree *pending,
4920                        struct cache_tree *seen, u64 bytenr, u32 size)
4921 {
4922         int ret;
4923         ret = add_cache_extent(seen, bytenr, size);
4924         if (ret)
4925                 return ret;
4926         add_cache_extent(pending, bytenr, size);
4927         return 0;
4928 }
4929
4930 static int pick_next_pending(struct cache_tree *pending,
4931                         struct cache_tree *reada,
4932                         struct cache_tree *nodes,
4933                         u64 last, struct block_info *bits, int bits_nr,
4934                         int *reada_bits)
4935 {
4936         unsigned long node_start = last;
4937         struct cache_extent *cache;
4938         int ret;
4939
4940         cache = search_cache_extent(reada, 0);
4941         if (cache) {
4942                 bits[0].start = cache->start;
4943                 bits[0].size = cache->size;
4944                 *reada_bits = 1;
4945                 return 1;
4946         }
4947         *reada_bits = 0;
4948         if (node_start > 32768)
4949                 node_start -= 32768;
4950
4951         cache = search_cache_extent(nodes, node_start);
4952         if (!cache)
4953                 cache = search_cache_extent(nodes, 0);
4954
4955         if (!cache) {
4956                  cache = search_cache_extent(pending, 0);
4957                  if (!cache)
4958                          return 0;
4959                  ret = 0;
4960                  do {
4961                          bits[ret].start = cache->start;
4962                          bits[ret].size = cache->size;
4963                          cache = next_cache_extent(cache);
4964                          ret++;
4965                  } while (cache && ret < bits_nr);
4966                  return ret;
4967         }
4968
4969         ret = 0;
4970         do {
4971                 bits[ret].start = cache->start;
4972                 bits[ret].size = cache->size;
4973                 cache = next_cache_extent(cache);
4974                 ret++;
4975         } while (cache && ret < bits_nr);
4976
4977         if (bits_nr - ret > 8) {
4978                 u64 lookup = bits[0].start + bits[0].size;
4979                 struct cache_extent *next;
4980                 next = search_cache_extent(pending, lookup);
4981                 while(next) {
4982                         if (next->start - lookup > 32768)
4983                                 break;
4984                         bits[ret].start = next->start;
4985                         bits[ret].size = next->size;
4986                         lookup = next->start + next->size;
4987                         ret++;
4988                         if (ret == bits_nr)
4989                                 break;
4990                         next = next_cache_extent(next);
4991                         if (!next)
4992                                 break;
4993                 }
4994         }
4995         return ret;
4996 }
4997
4998 static void free_chunk_record(struct cache_extent *cache)
4999 {
5000         struct chunk_record *rec;
5001
5002         rec = container_of(cache, struct chunk_record, cache);
5003         list_del_init(&rec->list);
5004         list_del_init(&rec->dextents);
5005         free(rec);
5006 }
5007
5008 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5009 {
5010         cache_tree_free_extents(chunk_cache, free_chunk_record);
5011 }
5012
5013 static void free_device_record(struct rb_node *node)
5014 {
5015         struct device_record *rec;
5016
5017         rec = container_of(node, struct device_record, node);
5018         free(rec);
5019 }
5020
5021 FREE_RB_BASED_TREE(device_cache, free_device_record);
5022
5023 int insert_block_group_record(struct block_group_tree *tree,
5024                               struct block_group_record *bg_rec)
5025 {
5026         int ret;
5027
5028         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5029         if (ret)
5030                 return ret;
5031
5032         list_add_tail(&bg_rec->list, &tree->block_groups);
5033         return 0;
5034 }
5035
5036 static void free_block_group_record(struct cache_extent *cache)
5037 {
5038         struct block_group_record *rec;
5039
5040         rec = container_of(cache, struct block_group_record, cache);
5041         list_del_init(&rec->list);
5042         free(rec);
5043 }
5044
5045 void free_block_group_tree(struct block_group_tree *tree)
5046 {
5047         cache_tree_free_extents(&tree->tree, free_block_group_record);
5048 }
5049
5050 int insert_device_extent_record(struct device_extent_tree *tree,
5051                                 struct device_extent_record *de_rec)
5052 {
5053         int ret;
5054
5055         /*
5056          * Device extent is a bit different from the other extents, because
5057          * the extents which belong to the different devices may have the
5058          * same start and size, so we need use the special extent cache
5059          * search/insert functions.
5060          */
5061         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5062         if (ret)
5063                 return ret;
5064
5065         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5066         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5067         return 0;
5068 }
5069
5070 static void free_device_extent_record(struct cache_extent *cache)
5071 {
5072         struct device_extent_record *rec;
5073
5074         rec = container_of(cache, struct device_extent_record, cache);
5075         if (!list_empty(&rec->chunk_list))
5076                 list_del_init(&rec->chunk_list);
5077         if (!list_empty(&rec->device_list))
5078                 list_del_init(&rec->device_list);
5079         free(rec);
5080 }
5081
5082 void free_device_extent_tree(struct device_extent_tree *tree)
5083 {
5084         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5085 }
5086
5087 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5088 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5089                                  struct extent_buffer *leaf, int slot)
5090 {
5091         struct btrfs_extent_ref_v0 *ref0;
5092         struct btrfs_key key;
5093         int ret;
5094
5095         btrfs_item_key_to_cpu(leaf, &key, slot);
5096         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5097         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5098                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5099                                 0, 0);
5100         } else {
5101                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5102                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5103         }
5104         return ret;
5105 }
5106 #endif
5107
5108 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5109                                             struct btrfs_key *key,
5110                                             int slot)
5111 {
5112         struct btrfs_chunk *ptr;
5113         struct chunk_record *rec;
5114         int num_stripes, i;
5115
5116         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5117         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5118
5119         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5120         if (!rec) {
5121                 fprintf(stderr, "memory allocation failed\n");
5122                 exit(-1);
5123         }
5124
5125         INIT_LIST_HEAD(&rec->list);
5126         INIT_LIST_HEAD(&rec->dextents);
5127         rec->bg_rec = NULL;
5128
5129         rec->cache.start = key->offset;
5130         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5131
5132         rec->generation = btrfs_header_generation(leaf);
5133
5134         rec->objectid = key->objectid;
5135         rec->type = key->type;
5136         rec->offset = key->offset;
5137
5138         rec->length = rec->cache.size;
5139         rec->owner = btrfs_chunk_owner(leaf, ptr);
5140         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5141         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5142         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5143         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5144         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5145         rec->num_stripes = num_stripes;
5146         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5147
5148         for (i = 0; i < rec->num_stripes; ++i) {
5149                 rec->stripes[i].devid =
5150                         btrfs_stripe_devid_nr(leaf, ptr, i);
5151                 rec->stripes[i].offset =
5152                         btrfs_stripe_offset_nr(leaf, ptr, i);
5153                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5154                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5155                                 BTRFS_UUID_SIZE);
5156         }
5157
5158         return rec;
5159 }
5160
5161 static int process_chunk_item(struct cache_tree *chunk_cache,
5162                               struct btrfs_key *key, struct extent_buffer *eb,
5163                               int slot)
5164 {
5165         struct chunk_record *rec;
5166         struct btrfs_chunk *chunk;
5167         int ret = 0;
5168
5169         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5170         /*
5171          * Do extra check for this chunk item,
5172          *
5173          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5174          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5175          * and owner<->key_type check.
5176          */
5177         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5178                                       key->offset);
5179         if (ret < 0) {
5180                 error("chunk(%llu, %llu) is not valid, ignore it",
5181                       key->offset, btrfs_chunk_length(eb, chunk));
5182                 return 0;
5183         }
5184         rec = btrfs_new_chunk_record(eb, key, slot);
5185         ret = insert_cache_extent(chunk_cache, &rec->cache);
5186         if (ret) {
5187                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5188                         rec->offset, rec->length);
5189                 free(rec);
5190         }
5191
5192         return ret;
5193 }
5194
5195 static int process_device_item(struct rb_root *dev_cache,
5196                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5197 {
5198         struct btrfs_dev_item *ptr;
5199         struct device_record *rec;
5200         int ret = 0;
5201
5202         ptr = btrfs_item_ptr(eb,
5203                 slot, struct btrfs_dev_item);
5204
5205         rec = malloc(sizeof(*rec));
5206         if (!rec) {
5207                 fprintf(stderr, "memory allocation failed\n");
5208                 return -ENOMEM;
5209         }
5210
5211         rec->devid = key->offset;
5212         rec->generation = btrfs_header_generation(eb);
5213
5214         rec->objectid = key->objectid;
5215         rec->type = key->type;
5216         rec->offset = key->offset;
5217
5218         rec->devid = btrfs_device_id(eb, ptr);
5219         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5220         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5221
5222         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5223         if (ret) {
5224                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5225                 free(rec);
5226         }
5227
5228         return ret;
5229 }
5230
5231 struct block_group_record *
5232 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5233                              int slot)
5234 {
5235         struct btrfs_block_group_item *ptr;
5236         struct block_group_record *rec;
5237
5238         rec = calloc(1, sizeof(*rec));
5239         if (!rec) {
5240                 fprintf(stderr, "memory allocation failed\n");
5241                 exit(-1);
5242         }
5243
5244         rec->cache.start = key->objectid;
5245         rec->cache.size = key->offset;
5246
5247         rec->generation = btrfs_header_generation(leaf);
5248
5249         rec->objectid = key->objectid;
5250         rec->type = key->type;
5251         rec->offset = key->offset;
5252
5253         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5254         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5255
5256         INIT_LIST_HEAD(&rec->list);
5257
5258         return rec;
5259 }
5260
5261 static int process_block_group_item(struct block_group_tree *block_group_cache,
5262                                     struct btrfs_key *key,
5263                                     struct extent_buffer *eb, int slot)
5264 {
5265         struct block_group_record *rec;
5266         int ret = 0;
5267
5268         rec = btrfs_new_block_group_record(eb, key, slot);
5269         ret = insert_block_group_record(block_group_cache, rec);
5270         if (ret) {
5271                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5272                         rec->objectid, rec->offset);
5273                 free(rec);
5274         }
5275
5276         return ret;
5277 }
5278
5279 struct device_extent_record *
5280 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5281                                struct btrfs_key *key, int slot)
5282 {
5283         struct device_extent_record *rec;
5284         struct btrfs_dev_extent *ptr;
5285
5286         rec = calloc(1, sizeof(*rec));
5287         if (!rec) {
5288                 fprintf(stderr, "memory allocation failed\n");
5289                 exit(-1);
5290         }
5291
5292         rec->cache.objectid = key->objectid;
5293         rec->cache.start = key->offset;
5294
5295         rec->generation = btrfs_header_generation(leaf);
5296
5297         rec->objectid = key->objectid;
5298         rec->type = key->type;
5299         rec->offset = key->offset;
5300
5301         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5302         rec->chunk_objecteid =
5303                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5304         rec->chunk_offset =
5305                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5306         rec->length = btrfs_dev_extent_length(leaf, ptr);
5307         rec->cache.size = rec->length;
5308
5309         INIT_LIST_HEAD(&rec->chunk_list);
5310         INIT_LIST_HEAD(&rec->device_list);
5311
5312         return rec;
5313 }
5314
5315 static int
5316 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5317                            struct btrfs_key *key, struct extent_buffer *eb,
5318                            int slot)
5319 {
5320         struct device_extent_record *rec;
5321         int ret;
5322
5323         rec = btrfs_new_device_extent_record(eb, key, slot);
5324         ret = insert_device_extent_record(dev_extent_cache, rec);
5325         if (ret) {
5326                 fprintf(stderr,
5327                         "Device extent[%llu, %llu, %llu] existed.\n",
5328                         rec->objectid, rec->offset, rec->length);
5329                 free(rec);
5330         }
5331
5332         return ret;
5333 }
5334
5335 static int process_extent_item(struct btrfs_root *root,
5336                                struct cache_tree *extent_cache,
5337                                struct extent_buffer *eb, int slot)
5338 {
5339         struct btrfs_extent_item *ei;
5340         struct btrfs_extent_inline_ref *iref;
5341         struct btrfs_extent_data_ref *dref;
5342         struct btrfs_shared_data_ref *sref;
5343         struct btrfs_key key;
5344         struct extent_record tmpl;
5345         unsigned long end;
5346         unsigned long ptr;
5347         int ret;
5348         int type;
5349         u32 item_size = btrfs_item_size_nr(eb, slot);
5350         u64 refs = 0;
5351         u64 offset;
5352         u64 num_bytes;
5353         int metadata = 0;
5354
5355         btrfs_item_key_to_cpu(eb, &key, slot);
5356
5357         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5358                 metadata = 1;
5359                 num_bytes = root->nodesize;
5360         } else {
5361                 num_bytes = key.offset;
5362         }
5363
5364         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5365                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5366                       key.objectid, root->sectorsize);
5367                 return -EIO;
5368         }
5369         if (item_size < sizeof(*ei)) {
5370 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5371                 struct btrfs_extent_item_v0 *ei0;
5372                 BUG_ON(item_size != sizeof(*ei0));
5373                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5374                 refs = btrfs_extent_refs_v0(eb, ei0);
5375 #else
5376                 BUG();
5377 #endif
5378                 memset(&tmpl, 0, sizeof(tmpl));
5379                 tmpl.start = key.objectid;
5380                 tmpl.nr = num_bytes;
5381                 tmpl.extent_item_refs = refs;
5382                 tmpl.metadata = metadata;
5383                 tmpl.found_rec = 1;
5384                 tmpl.max_size = num_bytes;
5385
5386                 return add_extent_rec(extent_cache, &tmpl);
5387         }
5388
5389         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5390         refs = btrfs_extent_refs(eb, ei);
5391         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5392                 metadata = 1;
5393         else
5394                 metadata = 0;
5395         if (metadata && num_bytes != root->nodesize) {
5396                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5397                       num_bytes, root->nodesize);
5398                 return -EIO;
5399         }
5400         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5401                 error("ignore invalid data extent, length %llu is not aligned to %u",
5402                       num_bytes, root->sectorsize);
5403                 return -EIO;
5404         }
5405
5406         memset(&tmpl, 0, sizeof(tmpl));
5407         tmpl.start = key.objectid;
5408         tmpl.nr = num_bytes;
5409         tmpl.extent_item_refs = refs;
5410         tmpl.metadata = metadata;
5411         tmpl.found_rec = 1;
5412         tmpl.max_size = num_bytes;
5413         add_extent_rec(extent_cache, &tmpl);
5414
5415         ptr = (unsigned long)(ei + 1);
5416         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5417             key.type == BTRFS_EXTENT_ITEM_KEY)
5418                 ptr += sizeof(struct btrfs_tree_block_info);
5419
5420         end = (unsigned long)ei + item_size;
5421         while (ptr < end) {
5422                 iref = (struct btrfs_extent_inline_ref *)ptr;
5423                 type = btrfs_extent_inline_ref_type(eb, iref);
5424                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5425                 switch (type) {
5426                 case BTRFS_TREE_BLOCK_REF_KEY:
5427                         ret = add_tree_backref(extent_cache, key.objectid,
5428                                         0, offset, 0);
5429                         if (ret < 0)
5430                                 error("add_tree_backref failed: %s",
5431                                       strerror(-ret));
5432                         break;
5433                 case BTRFS_SHARED_BLOCK_REF_KEY:
5434                         ret = add_tree_backref(extent_cache, key.objectid,
5435                                         offset, 0, 0);
5436                         if (ret < 0)
5437                                 error("add_tree_backref failed: %s",
5438                                       strerror(-ret));
5439                         break;
5440                 case BTRFS_EXTENT_DATA_REF_KEY:
5441                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5442                         add_data_backref(extent_cache, key.objectid, 0,
5443                                         btrfs_extent_data_ref_root(eb, dref),
5444                                         btrfs_extent_data_ref_objectid(eb,
5445                                                                        dref),
5446                                         btrfs_extent_data_ref_offset(eb, dref),
5447                                         btrfs_extent_data_ref_count(eb, dref),
5448                                         0, num_bytes);
5449                         break;
5450                 case BTRFS_SHARED_DATA_REF_KEY:
5451                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5452                         add_data_backref(extent_cache, key.objectid, offset,
5453                                         0, 0, 0,
5454                                         btrfs_shared_data_ref_count(eb, sref),
5455                                         0, num_bytes);
5456                         break;
5457                 default:
5458                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5459                                 key.objectid, key.type, num_bytes);
5460                         goto out;
5461                 }
5462                 ptr += btrfs_extent_inline_ref_size(type);
5463         }
5464         WARN_ON(ptr > end);
5465 out:
5466         return 0;
5467 }
5468
5469 static int check_cache_range(struct btrfs_root *root,
5470                              struct btrfs_block_group_cache *cache,
5471                              u64 offset, u64 bytes)
5472 {
5473         struct btrfs_free_space *entry;
5474         u64 *logical;
5475         u64 bytenr;
5476         int stripe_len;
5477         int i, nr, ret;
5478
5479         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5480                 bytenr = btrfs_sb_offset(i);
5481                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5482                                        cache->key.objectid, bytenr, 0,
5483                                        &logical, &nr, &stripe_len);
5484                 if (ret)
5485                         return ret;
5486
5487                 while (nr--) {
5488                         if (logical[nr] + stripe_len <= offset)
5489                                 continue;
5490                         if (offset + bytes <= logical[nr])
5491                                 continue;
5492                         if (logical[nr] == offset) {
5493                                 if (stripe_len >= bytes) {
5494                                         free(logical);
5495                                         return 0;
5496                                 }
5497                                 bytes -= stripe_len;
5498                                 offset += stripe_len;
5499                         } else if (logical[nr] < offset) {
5500                                 if (logical[nr] + stripe_len >=
5501                                     offset + bytes) {
5502                                         free(logical);
5503                                         return 0;
5504                                 }
5505                                 bytes = (offset + bytes) -
5506                                         (logical[nr] + stripe_len);
5507                                 offset = logical[nr] + stripe_len;
5508                         } else {
5509                                 /*
5510                                  * Could be tricky, the super may land in the
5511                                  * middle of the area we're checking.  First
5512                                  * check the easiest case, it's at the end.
5513                                  */
5514                                 if (logical[nr] + stripe_len >=
5515                                     bytes + offset) {
5516                                         bytes = logical[nr] - offset;
5517                                         continue;
5518                                 }
5519
5520                                 /* Check the left side */
5521                                 ret = check_cache_range(root, cache,
5522                                                         offset,
5523                                                         logical[nr] - offset);
5524                                 if (ret) {
5525                                         free(logical);
5526                                         return ret;
5527                                 }
5528
5529                                 /* Now we continue with the right side */
5530                                 bytes = (offset + bytes) -
5531                                         (logical[nr] + stripe_len);
5532                                 offset = logical[nr] + stripe_len;
5533                         }
5534                 }
5535
5536                 free(logical);
5537         }
5538
5539         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5540         if (!entry) {
5541                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5542                         offset, offset+bytes);
5543                 return -EINVAL;
5544         }
5545
5546         if (entry->offset != offset) {
5547                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5548                         entry->offset);
5549                 return -EINVAL;
5550         }
5551
5552         if (entry->bytes != bytes) {
5553                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5554                         bytes, entry->bytes, offset);
5555                 return -EINVAL;
5556         }
5557
5558         unlink_free_space(cache->free_space_ctl, entry);
5559         free(entry);
5560         return 0;
5561 }
5562
5563 static int verify_space_cache(struct btrfs_root *root,
5564                               struct btrfs_block_group_cache *cache)
5565 {
5566         struct btrfs_path *path;
5567         struct extent_buffer *leaf;
5568         struct btrfs_key key;
5569         u64 last;
5570         int ret = 0;
5571
5572         path = btrfs_alloc_path();
5573         if (!path)
5574                 return -ENOMEM;
5575
5576         root = root->fs_info->extent_root;
5577
5578         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5579
5580         key.objectid = last;
5581         key.offset = 0;
5582         key.type = BTRFS_EXTENT_ITEM_KEY;
5583
5584         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5585         if (ret < 0)
5586                 goto out;
5587         ret = 0;
5588         while (1) {
5589                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5590                         ret = btrfs_next_leaf(root, path);
5591                         if (ret < 0)
5592                                 goto out;
5593                         if (ret > 0) {
5594                                 ret = 0;
5595                                 break;
5596                         }
5597                 }
5598                 leaf = path->nodes[0];
5599                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5600                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5601                         break;
5602                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5603                     key.type != BTRFS_METADATA_ITEM_KEY) {
5604                         path->slots[0]++;
5605                         continue;
5606                 }
5607
5608                 if (last == key.objectid) {
5609                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5610                                 last = key.objectid + key.offset;
5611                         else
5612                                 last = key.objectid + root->nodesize;
5613                         path->slots[0]++;
5614                         continue;
5615                 }
5616
5617                 ret = check_cache_range(root, cache, last,
5618                                         key.objectid - last);
5619                 if (ret)
5620                         break;
5621                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5622                         last = key.objectid + key.offset;
5623                 else
5624                         last = key.objectid + root->nodesize;
5625                 path->slots[0]++;
5626         }
5627
5628         if (last < cache->key.objectid + cache->key.offset)
5629                 ret = check_cache_range(root, cache, last,
5630                                         cache->key.objectid +
5631                                         cache->key.offset - last);
5632
5633 out:
5634         btrfs_free_path(path);
5635
5636         if (!ret &&
5637             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5638                 fprintf(stderr, "There are still entries left in the space "
5639                         "cache\n");
5640                 ret = -EINVAL;
5641         }
5642
5643         return ret;
5644 }
5645
5646 static int check_space_cache(struct btrfs_root *root)
5647 {
5648         struct btrfs_block_group_cache *cache;
5649         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5650         int ret;
5651         int error = 0;
5652
5653         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5654             btrfs_super_generation(root->fs_info->super_copy) !=
5655             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5656                 printf("cache and super generation don't match, space cache "
5657                        "will be invalidated\n");
5658                 return 0;
5659         }
5660
5661         if (ctx.progress_enabled) {
5662                 ctx.tp = TASK_FREE_SPACE;
5663                 task_start(ctx.info);
5664         }
5665
5666         while (1) {
5667                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5668                 if (!cache)
5669                         break;
5670
5671                 start = cache->key.objectid + cache->key.offset;
5672                 if (!cache->free_space_ctl) {
5673                         if (btrfs_init_free_space_ctl(cache,
5674                                                       root->sectorsize)) {
5675                                 ret = -ENOMEM;
5676                                 break;
5677                         }
5678                 } else {
5679                         btrfs_remove_free_space_cache(cache);
5680                 }
5681
5682                 if (btrfs_fs_compat_ro(root->fs_info,
5683                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5684                         ret = exclude_super_stripes(root, cache);
5685                         if (ret) {
5686                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5687                                         strerror(-ret));
5688                                 error++;
5689                                 continue;
5690                         }
5691                         ret = load_free_space_tree(root->fs_info, cache);
5692                         free_excluded_extents(root, cache);
5693                         if (ret < 0) {
5694                                 fprintf(stderr, "could not load free space tree: %s\n",
5695                                         strerror(-ret));
5696                                 error++;
5697                                 continue;
5698                         }
5699                         error += ret;
5700                 } else {
5701                         ret = load_free_space_cache(root->fs_info, cache);
5702                         if (!ret)
5703                                 continue;
5704                 }
5705
5706                 ret = verify_space_cache(root, cache);
5707                 if (ret) {
5708                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5709                                 cache->key.objectid);
5710                         error++;
5711                 }
5712         }
5713
5714         task_stop(ctx.info);
5715
5716         return error ? -EINVAL : 0;
5717 }
5718
5719 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5720                         u64 num_bytes, unsigned long leaf_offset,
5721                         struct extent_buffer *eb) {
5722
5723         u64 offset = 0;
5724         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5725         char *data;
5726         unsigned long csum_offset;
5727         u32 csum;
5728         u32 csum_expected;
5729         u64 read_len;
5730         u64 data_checked = 0;
5731         u64 tmp;
5732         int ret = 0;
5733         int mirror;
5734         int num_copies;
5735
5736         if (num_bytes % root->sectorsize)
5737                 return -EINVAL;
5738
5739         data = malloc(num_bytes);
5740         if (!data)
5741                 return -ENOMEM;
5742
5743         while (offset < num_bytes) {
5744                 mirror = 0;
5745 again:
5746                 read_len = num_bytes - offset;
5747                 /* read as much space once a time */
5748                 ret = read_extent_data(root, data + offset,
5749                                 bytenr + offset, &read_len, mirror);
5750                 if (ret)
5751                         goto out;
5752                 data_checked = 0;
5753                 /* verify every 4k data's checksum */
5754                 while (data_checked < read_len) {
5755                         csum = ~(u32)0;
5756                         tmp = offset + data_checked;
5757
5758                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5759                                                csum, root->sectorsize);
5760                         btrfs_csum_final(csum, (u8 *)&csum);
5761
5762                         csum_offset = leaf_offset +
5763                                  tmp / root->sectorsize * csum_size;
5764                         read_extent_buffer(eb, (char *)&csum_expected,
5765                                            csum_offset, csum_size);
5766                         /* try another mirror */
5767                         if (csum != csum_expected) {
5768                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5769                                                 mirror, bytenr + tmp,
5770                                                 csum, csum_expected);
5771                                 num_copies = btrfs_num_copies(
5772                                                 &root->fs_info->mapping_tree,
5773                                                 bytenr, num_bytes);
5774                                 if (mirror < num_copies - 1) {
5775                                         mirror += 1;
5776                                         goto again;
5777                                 }
5778                         }
5779                         data_checked += root->sectorsize;
5780                 }
5781                 offset += read_len;
5782         }
5783 out:
5784         free(data);
5785         return ret;
5786 }
5787
5788 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5789                                u64 num_bytes)
5790 {
5791         struct btrfs_path *path;
5792         struct extent_buffer *leaf;
5793         struct btrfs_key key;
5794         int ret;
5795
5796         path = btrfs_alloc_path();
5797         if (!path) {
5798                 fprintf(stderr, "Error allocating path\n");
5799                 return -ENOMEM;
5800         }
5801
5802         key.objectid = bytenr;
5803         key.type = BTRFS_EXTENT_ITEM_KEY;
5804         key.offset = (u64)-1;
5805
5806 again:
5807         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5808                                 0, 0);
5809         if (ret < 0) {
5810                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5811                 btrfs_free_path(path);
5812                 return ret;
5813         } else if (ret) {
5814                 if (path->slots[0] > 0) {
5815                         path->slots[0]--;
5816                 } else {
5817                         ret = btrfs_prev_leaf(root, path);
5818                         if (ret < 0) {
5819                                 goto out;
5820                         } else if (ret > 0) {
5821                                 ret = 0;
5822                                 goto out;
5823                         }
5824                 }
5825         }
5826
5827         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5828
5829         /*
5830          * Block group items come before extent items if they have the same
5831          * bytenr, so walk back one more just in case.  Dear future traveller,
5832          * first congrats on mastering time travel.  Now if it's not too much
5833          * trouble could you go back to 2006 and tell Chris to make the
5834          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5835          * EXTENT_ITEM_KEY please?
5836          */
5837         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5838                 if (path->slots[0] > 0) {
5839                         path->slots[0]--;
5840                 } else {
5841                         ret = btrfs_prev_leaf(root, path);
5842                         if (ret < 0) {
5843                                 goto out;
5844                         } else if (ret > 0) {
5845                                 ret = 0;
5846                                 goto out;
5847                         }
5848                 }
5849                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5850         }
5851
5852         while (num_bytes) {
5853                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5854                         ret = btrfs_next_leaf(root, path);
5855                         if (ret < 0) {
5856                                 fprintf(stderr, "Error going to next leaf "
5857                                         "%d\n", ret);
5858                                 btrfs_free_path(path);
5859                                 return ret;
5860                         } else if (ret) {
5861                                 break;
5862                         }
5863                 }
5864                 leaf = path->nodes[0];
5865                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5866                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5867                         path->slots[0]++;
5868                         continue;
5869                 }
5870                 if (key.objectid + key.offset < bytenr) {
5871                         path->slots[0]++;
5872                         continue;
5873                 }
5874                 if (key.objectid > bytenr + num_bytes)
5875                         break;
5876
5877                 if (key.objectid == bytenr) {
5878                         if (key.offset >= num_bytes) {
5879                                 num_bytes = 0;
5880                                 break;
5881                         }
5882                         num_bytes -= key.offset;
5883                         bytenr += key.offset;
5884                 } else if (key.objectid < bytenr) {
5885                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5886                                 num_bytes = 0;
5887                                 break;
5888                         }
5889                         num_bytes = (bytenr + num_bytes) -
5890                                 (key.objectid + key.offset);
5891                         bytenr = key.objectid + key.offset;
5892                 } else {
5893                         if (key.objectid + key.offset < bytenr + num_bytes) {
5894                                 u64 new_start = key.objectid + key.offset;
5895                                 u64 new_bytes = bytenr + num_bytes - new_start;
5896
5897                                 /*
5898                                  * Weird case, the extent is in the middle of
5899                                  * our range, we'll have to search one side
5900                                  * and then the other.  Not sure if this happens
5901                                  * in real life, but no harm in coding it up
5902                                  * anyway just in case.
5903                                  */
5904                                 btrfs_release_path(path);
5905                                 ret = check_extent_exists(root, new_start,
5906                                                           new_bytes);
5907                                 if (ret) {
5908                                         fprintf(stderr, "Right section didn't "
5909                                                 "have a record\n");
5910                                         break;
5911                                 }
5912                                 num_bytes = key.objectid - bytenr;
5913                                 goto again;
5914                         }
5915                         num_bytes = key.objectid - bytenr;
5916                 }
5917                 path->slots[0]++;
5918         }
5919         ret = 0;
5920
5921 out:
5922         if (num_bytes && !ret) {
5923                 fprintf(stderr, "There are no extents for csum range "
5924                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5925                 ret = 1;
5926         }
5927
5928         btrfs_free_path(path);
5929         return ret;
5930 }
5931
5932 static int check_csums(struct btrfs_root *root)
5933 {
5934         struct btrfs_path *path;
5935         struct extent_buffer *leaf;
5936         struct btrfs_key key;
5937         u64 offset = 0, num_bytes = 0;
5938         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5939         int errors = 0;
5940         int ret;
5941         u64 data_len;
5942         unsigned long leaf_offset;
5943
5944         root = root->fs_info->csum_root;
5945         if (!extent_buffer_uptodate(root->node)) {
5946                 fprintf(stderr, "No valid csum tree found\n");
5947                 return -ENOENT;
5948         }
5949
5950         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5951         key.type = BTRFS_EXTENT_CSUM_KEY;
5952         key.offset = 0;
5953
5954         path = btrfs_alloc_path();
5955         if (!path)
5956                 return -ENOMEM;
5957
5958         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5959         if (ret < 0) {
5960                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5961                 btrfs_free_path(path);
5962                 return ret;
5963         }
5964
5965         if (ret > 0 && path->slots[0])
5966                 path->slots[0]--;
5967         ret = 0;
5968
5969         while (1) {
5970                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5971                         ret = btrfs_next_leaf(root, path);
5972                         if (ret < 0) {
5973                                 fprintf(stderr, "Error going to next leaf "
5974                                         "%d\n", ret);
5975                                 break;
5976                         }
5977                         if (ret)
5978                                 break;
5979                 }
5980                 leaf = path->nodes[0];
5981
5982                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5983                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5984                         path->slots[0]++;
5985                         continue;
5986                 }
5987
5988                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5989                               csum_size) * root->sectorsize;
5990                 if (!check_data_csum)
5991                         goto skip_csum_check;
5992                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5993                 ret = check_extent_csums(root, key.offset, data_len,
5994                                          leaf_offset, leaf);
5995                 if (ret)
5996                         break;
5997 skip_csum_check:
5998                 if (!num_bytes) {
5999                         offset = key.offset;
6000                 } else if (key.offset != offset + num_bytes) {
6001                         ret = check_extent_exists(root, offset, num_bytes);
6002                         if (ret) {
6003                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6004                                         "there is no extent record\n",
6005                                         offset, offset+num_bytes);
6006                                 errors++;
6007                         }
6008                         offset = key.offset;
6009                         num_bytes = 0;
6010                 }
6011                 num_bytes += data_len;
6012                 path->slots[0]++;
6013         }
6014
6015         btrfs_free_path(path);
6016         return errors;
6017 }
6018
6019 static int is_dropped_key(struct btrfs_key *key,
6020                           struct btrfs_key *drop_key) {
6021         if (key->objectid < drop_key->objectid)
6022                 return 1;
6023         else if (key->objectid == drop_key->objectid) {
6024                 if (key->type < drop_key->type)
6025                         return 1;
6026                 else if (key->type == drop_key->type) {
6027                         if (key->offset < drop_key->offset)
6028                                 return 1;
6029                 }
6030         }
6031         return 0;
6032 }
6033
6034 /*
6035  * Here are the rules for FULL_BACKREF.
6036  *
6037  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6038  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6039  *      FULL_BACKREF set.
6040  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6041  *    if it happened after the relocation occurred since we'll have dropped the
6042  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6043  *    have no real way to know for sure.
6044  *
6045  * We process the blocks one root at a time, and we start from the lowest root
6046  * objectid and go to the highest.  So we can just lookup the owner backref for
6047  * the record and if we don't find it then we know it doesn't exist and we have
6048  * a FULL BACKREF.
6049  *
6050  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6051  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6052  * be set or not and then we can check later once we've gathered all the refs.
6053  */
6054 static int calc_extent_flag(struct btrfs_root *root,
6055                            struct cache_tree *extent_cache,
6056                            struct extent_buffer *buf,
6057                            struct root_item_record *ri,
6058                            u64 *flags)
6059 {
6060         struct extent_record *rec;
6061         struct cache_extent *cache;
6062         struct tree_backref *tback;
6063         u64 owner = 0;
6064
6065         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6066         /* we have added this extent before */
6067         if (!cache)
6068                 return -ENOENT;
6069
6070         rec = container_of(cache, struct extent_record, cache);
6071
6072         /*
6073          * Except file/reloc tree, we can not have
6074          * FULL BACKREF MODE
6075          */
6076         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6077                 goto normal;
6078         /*
6079          * root node
6080          */
6081         if (buf->start == ri->bytenr)
6082                 goto normal;
6083
6084         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6085                 goto full_backref;
6086
6087         owner = btrfs_header_owner(buf);
6088         if (owner == ri->objectid)
6089                 goto normal;
6090
6091         tback = find_tree_backref(rec, 0, owner);
6092         if (!tback)
6093                 goto full_backref;
6094 normal:
6095         *flags = 0;
6096         if (rec->flag_block_full_backref != FLAG_UNSET &&
6097             rec->flag_block_full_backref != 0)
6098                 rec->bad_full_backref = 1;
6099         return 0;
6100 full_backref:
6101         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6102         if (rec->flag_block_full_backref != FLAG_UNSET &&
6103             rec->flag_block_full_backref != 1)
6104                 rec->bad_full_backref = 1;
6105         return 0;
6106 }
6107
6108 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6109 {
6110         fprintf(stderr, "Invalid key type(");
6111         print_key_type(stderr, 0, key_type);
6112         fprintf(stderr, ") found in root(");
6113         print_objectid(stderr, rootid, 0);
6114         fprintf(stderr, ")\n");
6115 }
6116
6117 /*
6118  * Check if the key is valid with its extent buffer.
6119  *
6120  * This is a early check in case invalid key exists in a extent buffer
6121  * This is not comprehensive yet, but should prevent wrong key/item passed
6122  * further
6123  */
6124 static int check_type_with_root(u64 rootid, u8 key_type)
6125 {
6126         switch (key_type) {
6127         /* Only valid in chunk tree */
6128         case BTRFS_DEV_ITEM_KEY:
6129         case BTRFS_CHUNK_ITEM_KEY:
6130                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6131                         goto err;
6132                 break;
6133         /* valid in csum and log tree */
6134         case BTRFS_CSUM_TREE_OBJECTID:
6135                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6136                       is_fstree(rootid)))
6137                         goto err;
6138                 break;
6139         case BTRFS_EXTENT_ITEM_KEY:
6140         case BTRFS_METADATA_ITEM_KEY:
6141         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6142                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6143                         goto err;
6144                 break;
6145         case BTRFS_ROOT_ITEM_KEY:
6146                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6147                         goto err;
6148                 break;
6149         case BTRFS_DEV_EXTENT_KEY:
6150                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6151                         goto err;
6152                 break;
6153         }
6154         return 0;
6155 err:
6156         report_mismatch_key_root(key_type, rootid);
6157         return -EINVAL;
6158 }
6159
6160 static int run_next_block(struct btrfs_root *root,
6161                           struct block_info *bits,
6162                           int bits_nr,
6163                           u64 *last,
6164                           struct cache_tree *pending,
6165                           struct cache_tree *seen,
6166                           struct cache_tree *reada,
6167                           struct cache_tree *nodes,
6168                           struct cache_tree *extent_cache,
6169                           struct cache_tree *chunk_cache,
6170                           struct rb_root *dev_cache,
6171                           struct block_group_tree *block_group_cache,
6172                           struct device_extent_tree *dev_extent_cache,
6173                           struct root_item_record *ri)
6174 {
6175         struct extent_buffer *buf;
6176         struct extent_record *rec = NULL;
6177         u64 bytenr;
6178         u32 size;
6179         u64 parent;
6180         u64 owner;
6181         u64 flags;
6182         u64 ptr;
6183         u64 gen = 0;
6184         int ret = 0;
6185         int i;
6186         int nritems;
6187         struct btrfs_key key;
6188         struct cache_extent *cache;
6189         int reada_bits;
6190
6191         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6192                                     bits_nr, &reada_bits);
6193         if (nritems == 0)
6194                 return 1;
6195
6196         if (!reada_bits) {
6197                 for(i = 0; i < nritems; i++) {
6198                         ret = add_cache_extent(reada, bits[i].start,
6199                                                bits[i].size);
6200                         if (ret == -EEXIST)
6201                                 continue;
6202
6203                         /* fixme, get the parent transid */
6204                         readahead_tree_block(root, bits[i].start,
6205                                              bits[i].size, 0);
6206                 }
6207         }
6208         *last = bits[0].start;
6209         bytenr = bits[0].start;
6210         size = bits[0].size;
6211
6212         cache = lookup_cache_extent(pending, bytenr, size);
6213         if (cache) {
6214                 remove_cache_extent(pending, cache);
6215                 free(cache);
6216         }
6217         cache = lookup_cache_extent(reada, bytenr, size);
6218         if (cache) {
6219                 remove_cache_extent(reada, cache);
6220                 free(cache);
6221         }
6222         cache = lookup_cache_extent(nodes, bytenr, size);
6223         if (cache) {
6224                 remove_cache_extent(nodes, cache);
6225                 free(cache);
6226         }
6227         cache = lookup_cache_extent(extent_cache, bytenr, size);
6228         if (cache) {
6229                 rec = container_of(cache, struct extent_record, cache);
6230                 gen = rec->parent_generation;
6231         }
6232
6233         /* fixme, get the real parent transid */
6234         buf = read_tree_block(root, bytenr, size, gen);
6235         if (!extent_buffer_uptodate(buf)) {
6236                 record_bad_block_io(root->fs_info,
6237                                     extent_cache, bytenr, size);
6238                 goto out;
6239         }
6240
6241         nritems = btrfs_header_nritems(buf);
6242
6243         flags = 0;
6244         if (!init_extent_tree) {
6245                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6246                                        btrfs_header_level(buf), 1, NULL,
6247                                        &flags);
6248                 if (ret < 0) {
6249                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6250                         if (ret < 0) {
6251                                 fprintf(stderr, "Couldn't calc extent flags\n");
6252                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6253                         }
6254                 }
6255         } else {
6256                 flags = 0;
6257                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6258                 if (ret < 0) {
6259                         fprintf(stderr, "Couldn't calc extent flags\n");
6260                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6261                 }
6262         }
6263
6264         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6265                 if (ri != NULL &&
6266                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6267                     ri->objectid == btrfs_header_owner(buf)) {
6268                         /*
6269                          * Ok we got to this block from it's original owner and
6270                          * we have FULL_BACKREF set.  Relocation can leave
6271                          * converted blocks over so this is altogether possible,
6272                          * however it's not possible if the generation > the
6273                          * last snapshot, so check for this case.
6274                          */
6275                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6276                             btrfs_header_generation(buf) > ri->last_snapshot) {
6277                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6278                                 rec->bad_full_backref = 1;
6279                         }
6280                 }
6281         } else {
6282                 if (ri != NULL &&
6283                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6284                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6285                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6286                         rec->bad_full_backref = 1;
6287                 }
6288         }
6289
6290         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6291                 rec->flag_block_full_backref = 1;
6292                 parent = bytenr;
6293                 owner = 0;
6294         } else {
6295                 rec->flag_block_full_backref = 0;
6296                 parent = 0;
6297                 owner = btrfs_header_owner(buf);
6298         }
6299
6300         ret = check_block(root, extent_cache, buf, flags);
6301         if (ret)
6302                 goto out;
6303
6304         if (btrfs_is_leaf(buf)) {
6305                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6306                 for (i = 0; i < nritems; i++) {
6307                         struct btrfs_file_extent_item *fi;
6308                         btrfs_item_key_to_cpu(buf, &key, i);
6309                         /*
6310                          * Check key type against the leaf owner.
6311                          * Could filter quite a lot of early error if
6312                          * owner is correct
6313                          */
6314                         if (check_type_with_root(btrfs_header_owner(buf),
6315                                                  key.type)) {
6316                                 fprintf(stderr, "ignoring invalid key\n");
6317                                 continue;
6318                         }
6319                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6320                                 process_extent_item(root, extent_cache, buf,
6321                                                     i);
6322                                 continue;
6323                         }
6324                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6325                                 process_extent_item(root, extent_cache, buf,
6326                                                     i);
6327                                 continue;
6328                         }
6329                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6330                                 total_csum_bytes +=
6331                                         btrfs_item_size_nr(buf, i);
6332                                 continue;
6333                         }
6334                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6335                                 process_chunk_item(chunk_cache, &key, buf, i);
6336                                 continue;
6337                         }
6338                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6339                                 process_device_item(dev_cache, &key, buf, i);
6340                                 continue;
6341                         }
6342                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6343                                 process_block_group_item(block_group_cache,
6344                                         &key, buf, i);
6345                                 continue;
6346                         }
6347                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6348                                 process_device_extent_item(dev_extent_cache,
6349                                         &key, buf, i);
6350                                 continue;
6351
6352                         }
6353                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6354 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6355                                 process_extent_ref_v0(extent_cache, buf, i);
6356 #else
6357                                 BUG();
6358 #endif
6359                                 continue;
6360                         }
6361
6362                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6363                                 ret = add_tree_backref(extent_cache,
6364                                                 key.objectid, 0, key.offset, 0);
6365                                 if (ret < 0)
6366                                         error("add_tree_backref failed: %s",
6367                                               strerror(-ret));
6368                                 continue;
6369                         }
6370                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6371                                 ret = add_tree_backref(extent_cache,
6372                                                 key.objectid, key.offset, 0, 0);
6373                                 if (ret < 0)
6374                                         error("add_tree_backref failed: %s",
6375                                               strerror(-ret));
6376                                 continue;
6377                         }
6378                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6379                                 struct btrfs_extent_data_ref *ref;
6380                                 ref = btrfs_item_ptr(buf, i,
6381                                                 struct btrfs_extent_data_ref);
6382                                 add_data_backref(extent_cache,
6383                                         key.objectid, 0,
6384                                         btrfs_extent_data_ref_root(buf, ref),
6385                                         btrfs_extent_data_ref_objectid(buf,
6386                                                                        ref),
6387                                         btrfs_extent_data_ref_offset(buf, ref),
6388                                         btrfs_extent_data_ref_count(buf, ref),
6389                                         0, root->sectorsize);
6390                                 continue;
6391                         }
6392                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6393                                 struct btrfs_shared_data_ref *ref;
6394                                 ref = btrfs_item_ptr(buf, i,
6395                                                 struct btrfs_shared_data_ref);
6396                                 add_data_backref(extent_cache,
6397                                         key.objectid, key.offset, 0, 0, 0,
6398                                         btrfs_shared_data_ref_count(buf, ref),
6399                                         0, root->sectorsize);
6400                                 continue;
6401                         }
6402                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6403                                 struct bad_item *bad;
6404
6405                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6406                                         continue;
6407                                 if (!owner)
6408                                         continue;
6409                                 bad = malloc(sizeof(struct bad_item));
6410                                 if (!bad)
6411                                         continue;
6412                                 INIT_LIST_HEAD(&bad->list);
6413                                 memcpy(&bad->key, &key,
6414                                        sizeof(struct btrfs_key));
6415                                 bad->root_id = owner;
6416                                 list_add_tail(&bad->list, &delete_items);
6417                                 continue;
6418                         }
6419                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6420                                 continue;
6421                         fi = btrfs_item_ptr(buf, i,
6422                                             struct btrfs_file_extent_item);
6423                         if (btrfs_file_extent_type(buf, fi) ==
6424                             BTRFS_FILE_EXTENT_INLINE)
6425                                 continue;
6426                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6427                                 continue;
6428
6429                         data_bytes_allocated +=
6430                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6431                         if (data_bytes_allocated < root->sectorsize) {
6432                                 abort();
6433                         }
6434                         data_bytes_referenced +=
6435                                 btrfs_file_extent_num_bytes(buf, fi);
6436                         add_data_backref(extent_cache,
6437                                 btrfs_file_extent_disk_bytenr(buf, fi),
6438                                 parent, owner, key.objectid, key.offset -
6439                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6440                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6441                 }
6442         } else {
6443                 int level;
6444                 struct btrfs_key first_key;
6445
6446                 first_key.objectid = 0;
6447
6448                 if (nritems > 0)
6449                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6450                 level = btrfs_header_level(buf);
6451                 for (i = 0; i < nritems; i++) {
6452                         struct extent_record tmpl;
6453
6454                         ptr = btrfs_node_blockptr(buf, i);
6455                         size = root->nodesize;
6456                         btrfs_node_key_to_cpu(buf, &key, i);
6457                         if (ri != NULL) {
6458                                 if ((level == ri->drop_level)
6459                                     && is_dropped_key(&key, &ri->drop_key)) {
6460                                         continue;
6461                                 }
6462                         }
6463
6464                         memset(&tmpl, 0, sizeof(tmpl));
6465                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6466                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6467                         tmpl.start = ptr;
6468                         tmpl.nr = size;
6469                         tmpl.refs = 1;
6470                         tmpl.metadata = 1;
6471                         tmpl.max_size = size;
6472                         ret = add_extent_rec(extent_cache, &tmpl);
6473                         if (ret < 0)
6474                                 goto out;
6475
6476                         ret = add_tree_backref(extent_cache, ptr, parent,
6477                                         owner, 1);
6478                         if (ret < 0) {
6479                                 error("add_tree_backref failed: %s",
6480                                       strerror(-ret));
6481                                 continue;
6482                         }
6483
6484                         if (level > 1) {
6485                                 add_pending(nodes, seen, ptr, size);
6486                         } else {
6487                                 add_pending(pending, seen, ptr, size);
6488                         }
6489                 }
6490                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6491                                       nritems) * sizeof(struct btrfs_key_ptr);
6492         }
6493         total_btree_bytes += buf->len;
6494         if (fs_root_objectid(btrfs_header_owner(buf)))
6495                 total_fs_tree_bytes += buf->len;
6496         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6497                 total_extent_tree_bytes += buf->len;
6498         if (!found_old_backref &&
6499             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6500             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6501             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6502                 found_old_backref = 1;
6503 out:
6504         free_extent_buffer(buf);
6505         return ret;
6506 }
6507
6508 static int add_root_to_pending(struct extent_buffer *buf,
6509                                struct cache_tree *extent_cache,
6510                                struct cache_tree *pending,
6511                                struct cache_tree *seen,
6512                                struct cache_tree *nodes,
6513                                u64 objectid)
6514 {
6515         struct extent_record tmpl;
6516         int ret;
6517
6518         if (btrfs_header_level(buf) > 0)
6519                 add_pending(nodes, seen, buf->start, buf->len);
6520         else
6521                 add_pending(pending, seen, buf->start, buf->len);
6522
6523         memset(&tmpl, 0, sizeof(tmpl));
6524         tmpl.start = buf->start;
6525         tmpl.nr = buf->len;
6526         tmpl.is_root = 1;
6527         tmpl.refs = 1;
6528         tmpl.metadata = 1;
6529         tmpl.max_size = buf->len;
6530         add_extent_rec(extent_cache, &tmpl);
6531
6532         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6533             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6534                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6535                                 0, 1);
6536         else
6537                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6538                                 1);
6539         return ret;
6540 }
6541
6542 /* as we fix the tree, we might be deleting blocks that
6543  * we're tracking for repair.  This hook makes sure we
6544  * remove any backrefs for blocks as we are fixing them.
6545  */
6546 static int free_extent_hook(struct btrfs_trans_handle *trans,
6547                             struct btrfs_root *root,
6548                             u64 bytenr, u64 num_bytes, u64 parent,
6549                             u64 root_objectid, u64 owner, u64 offset,
6550                             int refs_to_drop)
6551 {
6552         struct extent_record *rec;
6553         struct cache_extent *cache;
6554         int is_data;
6555         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6556
6557         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6558         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6559         if (!cache)
6560                 return 0;
6561
6562         rec = container_of(cache, struct extent_record, cache);
6563         if (is_data) {
6564                 struct data_backref *back;
6565                 back = find_data_backref(rec, parent, root_objectid, owner,
6566                                          offset, 1, bytenr, num_bytes);
6567                 if (!back)
6568                         goto out;
6569                 if (back->node.found_ref) {
6570                         back->found_ref -= refs_to_drop;
6571                         if (rec->refs)
6572                                 rec->refs -= refs_to_drop;
6573                 }
6574                 if (back->node.found_extent_tree) {
6575                         back->num_refs -= refs_to_drop;
6576                         if (rec->extent_item_refs)
6577                                 rec->extent_item_refs -= refs_to_drop;
6578                 }
6579                 if (back->found_ref == 0)
6580                         back->node.found_ref = 0;
6581                 if (back->num_refs == 0)
6582                         back->node.found_extent_tree = 0;
6583
6584                 if (!back->node.found_extent_tree && back->node.found_ref) {
6585                         list_del(&back->node.list);
6586                         free(back);
6587                 }
6588         } else {
6589                 struct tree_backref *back;
6590                 back = find_tree_backref(rec, parent, root_objectid);
6591                 if (!back)
6592                         goto out;
6593                 if (back->node.found_ref) {
6594                         if (rec->refs)
6595                                 rec->refs--;
6596                         back->node.found_ref = 0;
6597                 }
6598                 if (back->node.found_extent_tree) {
6599                         if (rec->extent_item_refs)
6600                                 rec->extent_item_refs--;
6601                         back->node.found_extent_tree = 0;
6602                 }
6603                 if (!back->node.found_extent_tree && back->node.found_ref) {
6604                         list_del(&back->node.list);
6605                         free(back);
6606                 }
6607         }
6608         maybe_free_extent_rec(extent_cache, rec);
6609 out:
6610         return 0;
6611 }
6612
6613 static int delete_extent_records(struct btrfs_trans_handle *trans,
6614                                  struct btrfs_root *root,
6615                                  struct btrfs_path *path,
6616                                  u64 bytenr, u64 new_len)
6617 {
6618         struct btrfs_key key;
6619         struct btrfs_key found_key;
6620         struct extent_buffer *leaf;
6621         int ret;
6622         int slot;
6623
6624
6625         key.objectid = bytenr;
6626         key.type = (u8)-1;
6627         key.offset = (u64)-1;
6628
6629         while(1) {
6630                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6631                                         &key, path, 0, 1);
6632                 if (ret < 0)
6633                         break;
6634
6635                 if (ret > 0) {
6636                         ret = 0;
6637                         if (path->slots[0] == 0)
6638                                 break;
6639                         path->slots[0]--;
6640                 }
6641                 ret = 0;
6642
6643                 leaf = path->nodes[0];
6644                 slot = path->slots[0];
6645
6646                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6647                 if (found_key.objectid != bytenr)
6648                         break;
6649
6650                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6651                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6652                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6653                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6654                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6655                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6656                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6657                         btrfs_release_path(path);
6658                         if (found_key.type == 0) {
6659                                 if (found_key.offset == 0)
6660                                         break;
6661                                 key.offset = found_key.offset - 1;
6662                                 key.type = found_key.type;
6663                         }
6664                         key.type = found_key.type - 1;
6665                         key.offset = (u64)-1;
6666                         continue;
6667                 }
6668
6669                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6670                         found_key.objectid, found_key.type, found_key.offset);
6671
6672                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6673                 if (ret)
6674                         break;
6675                 btrfs_release_path(path);
6676
6677                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6678                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6679                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6680                                 found_key.offset : root->nodesize;
6681
6682                         ret = btrfs_update_block_group(trans, root, bytenr,
6683                                                        bytes, 0, 0);
6684                         if (ret)
6685                                 break;
6686                 }
6687         }
6688
6689         btrfs_release_path(path);
6690         return ret;
6691 }
6692
6693 /*
6694  * for a single backref, this will allocate a new extent
6695  * and add the backref to it.
6696  */
6697 static int record_extent(struct btrfs_trans_handle *trans,
6698                          struct btrfs_fs_info *info,
6699                          struct btrfs_path *path,
6700                          struct extent_record *rec,
6701                          struct extent_backref *back,
6702                          int allocated, u64 flags)
6703 {
6704         int ret;
6705         struct btrfs_root *extent_root = info->extent_root;
6706         struct extent_buffer *leaf;
6707         struct btrfs_key ins_key;
6708         struct btrfs_extent_item *ei;
6709         struct tree_backref *tback;
6710         struct data_backref *dback;
6711         struct btrfs_tree_block_info *bi;
6712
6713         if (!back->is_data)
6714                 rec->max_size = max_t(u64, rec->max_size,
6715                                     info->extent_root->nodesize);
6716
6717         if (!allocated) {
6718                 u32 item_size = sizeof(*ei);
6719
6720                 if (!back->is_data)
6721                         item_size += sizeof(*bi);
6722
6723                 ins_key.objectid = rec->start;
6724                 ins_key.offset = rec->max_size;
6725                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6726
6727                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6728                                         &ins_key, item_size);
6729                 if (ret)
6730                         goto fail;
6731
6732                 leaf = path->nodes[0];
6733                 ei = btrfs_item_ptr(leaf, path->slots[0],
6734                                     struct btrfs_extent_item);
6735
6736                 btrfs_set_extent_refs(leaf, ei, 0);
6737                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6738
6739                 if (back->is_data) {
6740                         btrfs_set_extent_flags(leaf, ei,
6741                                                BTRFS_EXTENT_FLAG_DATA);
6742                 } else {
6743                         struct btrfs_disk_key copy_key;;
6744
6745                         tback = to_tree_backref(back);
6746                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6747                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6748                                              sizeof(*bi));
6749
6750                         btrfs_set_disk_key_objectid(&copy_key,
6751                                                     rec->info_objectid);
6752                         btrfs_set_disk_key_type(&copy_key, 0);
6753                         btrfs_set_disk_key_offset(&copy_key, 0);
6754
6755                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6756                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6757
6758                         btrfs_set_extent_flags(leaf, ei,
6759                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6760                 }
6761
6762                 btrfs_mark_buffer_dirty(leaf);
6763                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6764                                                rec->max_size, 1, 0);
6765                 if (ret)
6766                         goto fail;
6767                 btrfs_release_path(path);
6768         }
6769
6770         if (back->is_data) {
6771                 u64 parent;
6772                 int i;
6773
6774                 dback = to_data_backref(back);
6775                 if (back->full_backref)
6776                         parent = dback->parent;
6777                 else
6778                         parent = 0;
6779
6780                 for (i = 0; i < dback->found_ref; i++) {
6781                         /* if parent != 0, we're doing a full backref
6782                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6783                          * just makes the backref allocator create a data
6784                          * backref
6785                          */
6786                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6787                                                    rec->start, rec->max_size,
6788                                                    parent,
6789                                                    dback->root,
6790                                                    parent ?
6791                                                    BTRFS_FIRST_FREE_OBJECTID :
6792                                                    dback->owner,
6793                                                    dback->offset);
6794                         if (ret)
6795                                 break;
6796                 }
6797                 fprintf(stderr, "adding new data backref"
6798                                 " on %llu %s %llu owner %llu"
6799                                 " offset %llu found %d\n",
6800                                 (unsigned long long)rec->start,
6801                                 back->full_backref ?
6802                                 "parent" : "root",
6803                                 back->full_backref ?
6804                                 (unsigned long long)parent :
6805                                 (unsigned long long)dback->root,
6806                                 (unsigned long long)dback->owner,
6807                                 (unsigned long long)dback->offset,
6808                                 dback->found_ref);
6809         } else {
6810                 u64 parent;
6811
6812                 tback = to_tree_backref(back);
6813                 if (back->full_backref)
6814                         parent = tback->parent;
6815                 else
6816                         parent = 0;
6817
6818                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6819                                            rec->start, rec->max_size,
6820                                            parent, tback->root, 0, 0);
6821                 fprintf(stderr, "adding new tree backref on "
6822                         "start %llu len %llu parent %llu root %llu\n",
6823                         rec->start, rec->max_size, parent, tback->root);
6824         }
6825 fail:
6826         btrfs_release_path(path);
6827         return ret;
6828 }
6829
6830 static struct extent_entry *find_entry(struct list_head *entries,
6831                                        u64 bytenr, u64 bytes)
6832 {
6833         struct extent_entry *entry = NULL;
6834
6835         list_for_each_entry(entry, entries, list) {
6836                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6837                         return entry;
6838         }
6839
6840         return NULL;
6841 }
6842
6843 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6844 {
6845         struct extent_entry *entry, *best = NULL, *prev = NULL;
6846
6847         list_for_each_entry(entry, entries, list) {
6848                 if (!prev) {
6849                         prev = entry;
6850                         continue;
6851                 }
6852
6853                 /*
6854                  * If there are as many broken entries as entries then we know
6855                  * not to trust this particular entry.
6856                  */
6857                 if (entry->broken == entry->count)
6858                         continue;
6859
6860                 /*
6861                  * If our current entry == best then we can't be sure our best
6862                  * is really the best, so we need to keep searching.
6863                  */
6864                 if (best && best->count == entry->count) {
6865                         prev = entry;
6866                         best = NULL;
6867                         continue;
6868                 }
6869
6870                 /* Prev == entry, not good enough, have to keep searching */
6871                 if (!prev->broken && prev->count == entry->count)
6872                         continue;
6873
6874                 if (!best)
6875                         best = (prev->count > entry->count) ? prev : entry;
6876                 else if (best->count < entry->count)
6877                         best = entry;
6878                 prev = entry;
6879         }
6880
6881         return best;
6882 }
6883
6884 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6885                       struct data_backref *dback, struct extent_entry *entry)
6886 {
6887         struct btrfs_trans_handle *trans;
6888         struct btrfs_root *root;
6889         struct btrfs_file_extent_item *fi;
6890         struct extent_buffer *leaf;
6891         struct btrfs_key key;
6892         u64 bytenr, bytes;
6893         int ret, err;
6894
6895         key.objectid = dback->root;
6896         key.type = BTRFS_ROOT_ITEM_KEY;
6897         key.offset = (u64)-1;
6898         root = btrfs_read_fs_root(info, &key);
6899         if (IS_ERR(root)) {
6900                 fprintf(stderr, "Couldn't find root for our ref\n");
6901                 return -EINVAL;
6902         }
6903
6904         /*
6905          * The backref points to the original offset of the extent if it was
6906          * split, so we need to search down to the offset we have and then walk
6907          * forward until we find the backref we're looking for.
6908          */
6909         key.objectid = dback->owner;
6910         key.type = BTRFS_EXTENT_DATA_KEY;
6911         key.offset = dback->offset;
6912         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6913         if (ret < 0) {
6914                 fprintf(stderr, "Error looking up ref %d\n", ret);
6915                 return ret;
6916         }
6917
6918         while (1) {
6919                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6920                         ret = btrfs_next_leaf(root, path);
6921                         if (ret) {
6922                                 fprintf(stderr, "Couldn't find our ref, next\n");
6923                                 return -EINVAL;
6924                         }
6925                 }
6926                 leaf = path->nodes[0];
6927                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6928                 if (key.objectid != dback->owner ||
6929                     key.type != BTRFS_EXTENT_DATA_KEY) {
6930                         fprintf(stderr, "Couldn't find our ref, search\n");
6931                         return -EINVAL;
6932                 }
6933                 fi = btrfs_item_ptr(leaf, path->slots[0],
6934                                     struct btrfs_file_extent_item);
6935                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6936                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6937
6938                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6939                         break;
6940                 path->slots[0]++;
6941         }
6942
6943         btrfs_release_path(path);
6944
6945         trans = btrfs_start_transaction(root, 1);
6946         if (IS_ERR(trans))
6947                 return PTR_ERR(trans);
6948
6949         /*
6950          * Ok we have the key of the file extent we want to fix, now we can cow
6951          * down to the thing and fix it.
6952          */
6953         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6954         if (ret < 0) {
6955                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6956                         key.objectid, key.type, key.offset, ret);
6957                 goto out;
6958         }
6959         if (ret > 0) {
6960                 fprintf(stderr, "Well that's odd, we just found this key "
6961                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6962                         key.offset);
6963                 ret = -EINVAL;
6964                 goto out;
6965         }
6966         leaf = path->nodes[0];
6967         fi = btrfs_item_ptr(leaf, path->slots[0],
6968                             struct btrfs_file_extent_item);
6969
6970         if (btrfs_file_extent_compression(leaf, fi) &&
6971             dback->disk_bytenr != entry->bytenr) {
6972                 fprintf(stderr, "Ref doesn't match the record start and is "
6973                         "compressed, please take a btrfs-image of this file "
6974                         "system and send it to a btrfs developer so they can "
6975                         "complete this functionality for bytenr %Lu\n",
6976                         dback->disk_bytenr);
6977                 ret = -EINVAL;
6978                 goto out;
6979         }
6980
6981         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6982                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6983         } else if (dback->disk_bytenr > entry->bytenr) {
6984                 u64 off_diff, offset;
6985
6986                 off_diff = dback->disk_bytenr - entry->bytenr;
6987                 offset = btrfs_file_extent_offset(leaf, fi);
6988                 if (dback->disk_bytenr + offset +
6989                     btrfs_file_extent_num_bytes(leaf, fi) >
6990                     entry->bytenr + entry->bytes) {
6991                         fprintf(stderr, "Ref is past the entry end, please "
6992                                 "take a btrfs-image of this file system and "
6993                                 "send it to a btrfs developer, ref %Lu\n",
6994                                 dback->disk_bytenr);
6995                         ret = -EINVAL;
6996                         goto out;
6997                 }
6998                 offset += off_diff;
6999                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7000                 btrfs_set_file_extent_offset(leaf, fi, offset);
7001         } else if (dback->disk_bytenr < entry->bytenr) {
7002                 u64 offset;
7003
7004                 offset = btrfs_file_extent_offset(leaf, fi);
7005                 if (dback->disk_bytenr + offset < entry->bytenr) {
7006                         fprintf(stderr, "Ref is before the entry start, please"
7007                                 " take a btrfs-image of this file system and "
7008                                 "send it to a btrfs developer, ref %Lu\n",
7009                                 dback->disk_bytenr);
7010                         ret = -EINVAL;
7011                         goto out;
7012                 }
7013
7014                 offset += dback->disk_bytenr;
7015                 offset -= entry->bytenr;
7016                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7017                 btrfs_set_file_extent_offset(leaf, fi, offset);
7018         }
7019
7020         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7021
7022         /*
7023          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7024          * only do this if we aren't using compression, otherwise it's a
7025          * trickier case.
7026          */
7027         if (!btrfs_file_extent_compression(leaf, fi))
7028                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7029         else
7030                 printf("ram bytes may be wrong?\n");
7031         btrfs_mark_buffer_dirty(leaf);
7032 out:
7033         err = btrfs_commit_transaction(trans, root);
7034         btrfs_release_path(path);
7035         return ret ? ret : err;
7036 }
7037
7038 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7039                            struct extent_record *rec)
7040 {
7041         struct extent_backref *back;
7042         struct data_backref *dback;
7043         struct extent_entry *entry, *best = NULL;
7044         LIST_HEAD(entries);
7045         int nr_entries = 0;
7046         int broken_entries = 0;
7047         int ret = 0;
7048         short mismatch = 0;
7049
7050         /*
7051          * Metadata is easy and the backrefs should always agree on bytenr and
7052          * size, if not we've got bigger issues.
7053          */
7054         if (rec->metadata)
7055                 return 0;
7056
7057         list_for_each_entry(back, &rec->backrefs, list) {
7058                 if (back->full_backref || !back->is_data)
7059                         continue;
7060
7061                 dback = to_data_backref(back);
7062
7063                 /*
7064                  * We only pay attention to backrefs that we found a real
7065                  * backref for.
7066                  */
7067                 if (dback->found_ref == 0)
7068                         continue;
7069
7070                 /*
7071                  * For now we only catch when the bytes don't match, not the
7072                  * bytenr.  We can easily do this at the same time, but I want
7073                  * to have a fs image to test on before we just add repair
7074                  * functionality willy-nilly so we know we won't screw up the
7075                  * repair.
7076                  */
7077
7078                 entry = find_entry(&entries, dback->disk_bytenr,
7079                                    dback->bytes);
7080                 if (!entry) {
7081                         entry = malloc(sizeof(struct extent_entry));
7082                         if (!entry) {
7083                                 ret = -ENOMEM;
7084                                 goto out;
7085                         }
7086                         memset(entry, 0, sizeof(*entry));
7087                         entry->bytenr = dback->disk_bytenr;
7088                         entry->bytes = dback->bytes;
7089                         list_add_tail(&entry->list, &entries);
7090                         nr_entries++;
7091                 }
7092
7093                 /*
7094                  * If we only have on entry we may think the entries agree when
7095                  * in reality they don't so we have to do some extra checking.
7096                  */
7097                 if (dback->disk_bytenr != rec->start ||
7098                     dback->bytes != rec->nr || back->broken)
7099                         mismatch = 1;
7100
7101                 if (back->broken) {
7102                         entry->broken++;
7103                         broken_entries++;
7104                 }
7105
7106                 entry->count++;
7107         }
7108
7109         /* Yay all the backrefs agree, carry on good sir */
7110         if (nr_entries <= 1 && !mismatch)
7111                 goto out;
7112
7113         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7114                 "%Lu\n", rec->start);
7115
7116         /*
7117          * First we want to see if the backrefs can agree amongst themselves who
7118          * is right, so figure out which one of the entries has the highest
7119          * count.
7120          */
7121         best = find_most_right_entry(&entries);
7122
7123         /*
7124          * Ok so we may have an even split between what the backrefs think, so
7125          * this is where we use the extent ref to see what it thinks.
7126          */
7127         if (!best) {
7128                 entry = find_entry(&entries, rec->start, rec->nr);
7129                 if (!entry && (!broken_entries || !rec->found_rec)) {
7130                         fprintf(stderr, "Backrefs don't agree with each other "
7131                                 "and extent record doesn't agree with anybody,"
7132                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7133                                 rec->start, rec->nr);
7134                         ret = -EINVAL;
7135                         goto out;
7136                 } else if (!entry) {
7137                         /*
7138                          * Ok our backrefs were broken, we'll assume this is the
7139                          * correct value and add an entry for this range.
7140                          */
7141                         entry = malloc(sizeof(struct extent_entry));
7142                         if (!entry) {
7143                                 ret = -ENOMEM;
7144                                 goto out;
7145                         }
7146                         memset(entry, 0, sizeof(*entry));
7147                         entry->bytenr = rec->start;
7148                         entry->bytes = rec->nr;
7149                         list_add_tail(&entry->list, &entries);
7150                         nr_entries++;
7151                 }
7152                 entry->count++;
7153                 best = find_most_right_entry(&entries);
7154                 if (!best) {
7155                         fprintf(stderr, "Backrefs and extent record evenly "
7156                                 "split on who is right, this is going to "
7157                                 "require user input to fix bytenr %Lu bytes "
7158                                 "%Lu\n", rec->start, rec->nr);
7159                         ret = -EINVAL;
7160                         goto out;
7161                 }
7162         }
7163
7164         /*
7165          * I don't think this can happen currently as we'll abort() if we catch
7166          * this case higher up, but in case somebody removes that we still can't
7167          * deal with it properly here yet, so just bail out of that's the case.
7168          */
7169         if (best->bytenr != rec->start) {
7170                 fprintf(stderr, "Extent start and backref starts don't match, "
7171                         "please use btrfs-image on this file system and send "
7172                         "it to a btrfs developer so they can make fsck fix "
7173                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7174                         rec->start, rec->nr);
7175                 ret = -EINVAL;
7176                 goto out;
7177         }
7178
7179         /*
7180          * Ok great we all agreed on an extent record, let's go find the real
7181          * references and fix up the ones that don't match.
7182          */
7183         list_for_each_entry(back, &rec->backrefs, list) {
7184                 if (back->full_backref || !back->is_data)
7185                         continue;
7186
7187                 dback = to_data_backref(back);
7188
7189                 /*
7190                  * Still ignoring backrefs that don't have a real ref attached
7191                  * to them.
7192                  */
7193                 if (dback->found_ref == 0)
7194                         continue;
7195
7196                 if (dback->bytes == best->bytes &&
7197                     dback->disk_bytenr == best->bytenr)
7198                         continue;
7199
7200                 ret = repair_ref(info, path, dback, best);
7201                 if (ret)
7202                         goto out;
7203         }
7204
7205         /*
7206          * Ok we messed with the actual refs, which means we need to drop our
7207          * entire cache and go back and rescan.  I know this is a huge pain and
7208          * adds a lot of extra work, but it's the only way to be safe.  Once all
7209          * the backrefs agree we may not need to do anything to the extent
7210          * record itself.
7211          */
7212         ret = -EAGAIN;
7213 out:
7214         while (!list_empty(&entries)) {
7215                 entry = list_entry(entries.next, struct extent_entry, list);
7216                 list_del_init(&entry->list);
7217                 free(entry);
7218         }
7219         return ret;
7220 }
7221
7222 static int process_duplicates(struct btrfs_root *root,
7223                               struct cache_tree *extent_cache,
7224                               struct extent_record *rec)
7225 {
7226         struct extent_record *good, *tmp;
7227         struct cache_extent *cache;
7228         int ret;
7229
7230         /*
7231          * If we found a extent record for this extent then return, or if we
7232          * have more than one duplicate we are likely going to need to delete
7233          * something.
7234          */
7235         if (rec->found_rec || rec->num_duplicates > 1)
7236                 return 0;
7237
7238         /* Shouldn't happen but just in case */
7239         BUG_ON(!rec->num_duplicates);
7240
7241         /*
7242          * So this happens if we end up with a backref that doesn't match the
7243          * actual extent entry.  So either the backref is bad or the extent
7244          * entry is bad.  Either way we want to have the extent_record actually
7245          * reflect what we found in the extent_tree, so we need to take the
7246          * duplicate out and use that as the extent_record since the only way we
7247          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7248          */
7249         remove_cache_extent(extent_cache, &rec->cache);
7250
7251         good = to_extent_record(rec->dups.next);
7252         list_del_init(&good->list);
7253         INIT_LIST_HEAD(&good->backrefs);
7254         INIT_LIST_HEAD(&good->dups);
7255         good->cache.start = good->start;
7256         good->cache.size = good->nr;
7257         good->content_checked = 0;
7258         good->owner_ref_checked = 0;
7259         good->num_duplicates = 0;
7260         good->refs = rec->refs;
7261         list_splice_init(&rec->backrefs, &good->backrefs);
7262         while (1) {
7263                 cache = lookup_cache_extent(extent_cache, good->start,
7264                                             good->nr);
7265                 if (!cache)
7266                         break;
7267                 tmp = container_of(cache, struct extent_record, cache);
7268
7269                 /*
7270                  * If we find another overlapping extent and it's found_rec is
7271                  * set then it's a duplicate and we need to try and delete
7272                  * something.
7273                  */
7274                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7275                         if (list_empty(&good->list))
7276                                 list_add_tail(&good->list,
7277                                               &duplicate_extents);
7278                         good->num_duplicates += tmp->num_duplicates + 1;
7279                         list_splice_init(&tmp->dups, &good->dups);
7280                         list_del_init(&tmp->list);
7281                         list_add_tail(&tmp->list, &good->dups);
7282                         remove_cache_extent(extent_cache, &tmp->cache);
7283                         continue;
7284                 }
7285
7286                 /*
7287                  * Ok we have another non extent item backed extent rec, so lets
7288                  * just add it to this extent and carry on like we did above.
7289                  */
7290                 good->refs += tmp->refs;
7291                 list_splice_init(&tmp->backrefs, &good->backrefs);
7292                 remove_cache_extent(extent_cache, &tmp->cache);
7293                 free(tmp);
7294         }
7295         ret = insert_cache_extent(extent_cache, &good->cache);
7296         BUG_ON(ret);
7297         free(rec);
7298         return good->num_duplicates ? 0 : 1;
7299 }
7300
7301 static int delete_duplicate_records(struct btrfs_root *root,
7302                                     struct extent_record *rec)
7303 {
7304         struct btrfs_trans_handle *trans;
7305         LIST_HEAD(delete_list);
7306         struct btrfs_path *path;
7307         struct extent_record *tmp, *good, *n;
7308         int nr_del = 0;
7309         int ret = 0, err;
7310         struct btrfs_key key;
7311
7312         path = btrfs_alloc_path();
7313         if (!path) {
7314                 ret = -ENOMEM;
7315                 goto out;
7316         }
7317
7318         good = rec;
7319         /* Find the record that covers all of the duplicates. */
7320         list_for_each_entry(tmp, &rec->dups, list) {
7321                 if (good->start < tmp->start)
7322                         continue;
7323                 if (good->nr > tmp->nr)
7324                         continue;
7325
7326                 if (tmp->start + tmp->nr < good->start + good->nr) {
7327                         fprintf(stderr, "Ok we have overlapping extents that "
7328                                 "aren't completely covered by each other, this "
7329                                 "is going to require more careful thought.  "
7330                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7331                                 tmp->start, tmp->nr, good->start, good->nr);
7332                         abort();
7333                 }
7334                 good = tmp;
7335         }
7336
7337         if (good != rec)
7338                 list_add_tail(&rec->list, &delete_list);
7339
7340         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7341                 if (tmp == good)
7342                         continue;
7343                 list_move_tail(&tmp->list, &delete_list);
7344         }
7345
7346         root = root->fs_info->extent_root;
7347         trans = btrfs_start_transaction(root, 1);
7348         if (IS_ERR(trans)) {
7349                 ret = PTR_ERR(trans);
7350                 goto out;
7351         }
7352
7353         list_for_each_entry(tmp, &delete_list, list) {
7354                 if (tmp->found_rec == 0)
7355                         continue;
7356                 key.objectid = tmp->start;
7357                 key.type = BTRFS_EXTENT_ITEM_KEY;
7358                 key.offset = tmp->nr;
7359
7360                 /* Shouldn't happen but just in case */
7361                 if (tmp->metadata) {
7362                         fprintf(stderr, "Well this shouldn't happen, extent "
7363                                 "record overlaps but is metadata? "
7364                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7365                         abort();
7366                 }
7367
7368                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7369                 if (ret) {
7370                         if (ret > 0)
7371                                 ret = -EINVAL;
7372                         break;
7373                 }
7374                 ret = btrfs_del_item(trans, root, path);
7375                 if (ret)
7376                         break;
7377                 btrfs_release_path(path);
7378                 nr_del++;
7379         }
7380         err = btrfs_commit_transaction(trans, root);
7381         if (err && !ret)
7382                 ret = err;
7383 out:
7384         while (!list_empty(&delete_list)) {
7385                 tmp = to_extent_record(delete_list.next);
7386                 list_del_init(&tmp->list);
7387                 if (tmp == rec)
7388                         continue;
7389                 free(tmp);
7390         }
7391
7392         while (!list_empty(&rec->dups)) {
7393                 tmp = to_extent_record(rec->dups.next);
7394                 list_del_init(&tmp->list);
7395                 free(tmp);
7396         }
7397
7398         btrfs_free_path(path);
7399
7400         if (!ret && !nr_del)
7401                 rec->num_duplicates = 0;
7402
7403         return ret ? ret : nr_del;
7404 }
7405
7406 static int find_possible_backrefs(struct btrfs_fs_info *info,
7407                                   struct btrfs_path *path,
7408                                   struct cache_tree *extent_cache,
7409                                   struct extent_record *rec)
7410 {
7411         struct btrfs_root *root;
7412         struct extent_backref *back;
7413         struct data_backref *dback;
7414         struct cache_extent *cache;
7415         struct btrfs_file_extent_item *fi;
7416         struct btrfs_key key;
7417         u64 bytenr, bytes;
7418         int ret;
7419
7420         list_for_each_entry(back, &rec->backrefs, list) {
7421                 /* Don't care about full backrefs (poor unloved backrefs) */
7422                 if (back->full_backref || !back->is_data)
7423                         continue;
7424
7425                 dback = to_data_backref(back);
7426
7427                 /* We found this one, we don't need to do a lookup */
7428                 if (dback->found_ref)
7429                         continue;
7430
7431                 key.objectid = dback->root;
7432                 key.type = BTRFS_ROOT_ITEM_KEY;
7433                 key.offset = (u64)-1;
7434
7435                 root = btrfs_read_fs_root(info, &key);
7436
7437                 /* No root, definitely a bad ref, skip */
7438                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7439                         continue;
7440                 /* Other err, exit */
7441                 if (IS_ERR(root))
7442                         return PTR_ERR(root);
7443
7444                 key.objectid = dback->owner;
7445                 key.type = BTRFS_EXTENT_DATA_KEY;
7446                 key.offset = dback->offset;
7447                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7448                 if (ret) {
7449                         btrfs_release_path(path);
7450                         if (ret < 0)
7451                                 return ret;
7452                         /* Didn't find it, we can carry on */
7453                         ret = 0;
7454                         continue;
7455                 }
7456
7457                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7458                                     struct btrfs_file_extent_item);
7459                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7460                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7461                 btrfs_release_path(path);
7462                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7463                 if (cache) {
7464                         struct extent_record *tmp;
7465                         tmp = container_of(cache, struct extent_record, cache);
7466
7467                         /*
7468                          * If we found an extent record for the bytenr for this
7469                          * particular backref then we can't add it to our
7470                          * current extent record.  We only want to add backrefs
7471                          * that don't have a corresponding extent item in the
7472                          * extent tree since they likely belong to this record
7473                          * and we need to fix it if it doesn't match bytenrs.
7474                          */
7475                         if  (tmp->found_rec)
7476                                 continue;
7477                 }
7478
7479                 dback->found_ref += 1;
7480                 dback->disk_bytenr = bytenr;
7481                 dback->bytes = bytes;
7482
7483                 /*
7484                  * Set this so the verify backref code knows not to trust the
7485                  * values in this backref.
7486                  */
7487                 back->broken = 1;
7488         }
7489
7490         return 0;
7491 }
7492
7493 /*
7494  * Record orphan data ref into corresponding root.
7495  *
7496  * Return 0 if the extent item contains data ref and recorded.
7497  * Return 1 if the extent item contains no useful data ref
7498  *   On that case, it may contains only shared_dataref or metadata backref
7499  *   or the file extent exists(this should be handled by the extent bytenr
7500  *   recovery routine)
7501  * Return <0 if something goes wrong.
7502  */
7503 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7504                                       struct extent_record *rec)
7505 {
7506         struct btrfs_key key;
7507         struct btrfs_root *dest_root;
7508         struct extent_backref *back;
7509         struct data_backref *dback;
7510         struct orphan_data_extent *orphan;
7511         struct btrfs_path *path;
7512         int recorded_data_ref = 0;
7513         int ret = 0;
7514
7515         if (rec->metadata)
7516                 return 1;
7517         path = btrfs_alloc_path();
7518         if (!path)
7519                 return -ENOMEM;
7520         list_for_each_entry(back, &rec->backrefs, list) {
7521                 if (back->full_backref || !back->is_data ||
7522                     !back->found_extent_tree)
7523                         continue;
7524                 dback = to_data_backref(back);
7525                 if (dback->found_ref)
7526                         continue;
7527                 key.objectid = dback->root;
7528                 key.type = BTRFS_ROOT_ITEM_KEY;
7529                 key.offset = (u64)-1;
7530
7531                 dest_root = btrfs_read_fs_root(fs_info, &key);
7532
7533                 /* For non-exist root we just skip it */
7534                 if (IS_ERR(dest_root) || !dest_root)
7535                         continue;
7536
7537                 key.objectid = dback->owner;
7538                 key.type = BTRFS_EXTENT_DATA_KEY;
7539                 key.offset = dback->offset;
7540
7541                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7542                 btrfs_release_path(path);
7543                 /*
7544                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7545                  * we need to record it for inode/file extent rebuild.
7546                  * For ret > 0, we record it only for file extent rebuild.
7547                  * For ret == 0, the file extent exists but only bytenr
7548                  * mismatch, let the original bytenr fix routine to handle,
7549                  * don't record it.
7550                  */
7551                 if (ret == 0)
7552                         continue;
7553                 ret = 0;
7554                 orphan = malloc(sizeof(*orphan));
7555                 if (!orphan) {
7556                         ret = -ENOMEM;
7557                         goto out;
7558                 }
7559                 INIT_LIST_HEAD(&orphan->list);
7560                 orphan->root = dback->root;
7561                 orphan->objectid = dback->owner;
7562                 orphan->offset = dback->offset;
7563                 orphan->disk_bytenr = rec->cache.start;
7564                 orphan->disk_len = rec->cache.size;
7565                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7566                 recorded_data_ref = 1;
7567         }
7568 out:
7569         btrfs_free_path(path);
7570         if (!ret)
7571                 return !recorded_data_ref;
7572         else
7573                 return ret;
7574 }
7575
7576 /*
7577  * when an incorrect extent item is found, this will delete
7578  * all of the existing entries for it and recreate them
7579  * based on what the tree scan found.
7580  */
7581 static int fixup_extent_refs(struct btrfs_fs_info *info,
7582                              struct cache_tree *extent_cache,
7583                              struct extent_record *rec)
7584 {
7585         struct btrfs_trans_handle *trans = NULL;
7586         int ret;
7587         struct btrfs_path *path;
7588         struct list_head *cur = rec->backrefs.next;
7589         struct cache_extent *cache;
7590         struct extent_backref *back;
7591         int allocated = 0;
7592         u64 flags = 0;
7593
7594         if (rec->flag_block_full_backref)
7595                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7596
7597         path = btrfs_alloc_path();
7598         if (!path)
7599                 return -ENOMEM;
7600
7601         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7602                 /*
7603                  * Sometimes the backrefs themselves are so broken they don't
7604                  * get attached to any meaningful rec, so first go back and
7605                  * check any of our backrefs that we couldn't find and throw
7606                  * them into the list if we find the backref so that
7607                  * verify_backrefs can figure out what to do.
7608                  */
7609                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7610                 if (ret < 0)
7611                         goto out;
7612         }
7613
7614         /* step one, make sure all of the backrefs agree */
7615         ret = verify_backrefs(info, path, rec);
7616         if (ret < 0)
7617                 goto out;
7618
7619         trans = btrfs_start_transaction(info->extent_root, 1);
7620         if (IS_ERR(trans)) {
7621                 ret = PTR_ERR(trans);
7622                 goto out;
7623         }
7624
7625         /* step two, delete all the existing records */
7626         ret = delete_extent_records(trans, info->extent_root, path,
7627                                     rec->start, rec->max_size);
7628
7629         if (ret < 0)
7630                 goto out;
7631
7632         /* was this block corrupt?  If so, don't add references to it */
7633         cache = lookup_cache_extent(info->corrupt_blocks,
7634                                     rec->start, rec->max_size);
7635         if (cache) {
7636                 ret = 0;
7637                 goto out;
7638         }
7639
7640         /* step three, recreate all the refs we did find */
7641         while(cur != &rec->backrefs) {
7642                 back = to_extent_backref(cur);
7643                 cur = cur->next;
7644
7645                 /*
7646                  * if we didn't find any references, don't create a
7647                  * new extent record
7648                  */
7649                 if (!back->found_ref)
7650                         continue;
7651
7652                 rec->bad_full_backref = 0;
7653                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7654                 allocated = 1;
7655
7656                 if (ret)
7657                         goto out;
7658         }
7659 out:
7660         if (trans) {
7661                 int err = btrfs_commit_transaction(trans, info->extent_root);
7662                 if (!ret)
7663                         ret = err;
7664         }
7665
7666         btrfs_free_path(path);
7667         return ret;
7668 }
7669
7670 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7671                               struct extent_record *rec)
7672 {
7673         struct btrfs_trans_handle *trans;
7674         struct btrfs_root *root = fs_info->extent_root;
7675         struct btrfs_path *path;
7676         struct btrfs_extent_item *ei;
7677         struct btrfs_key key;
7678         u64 flags;
7679         int ret = 0;
7680
7681         key.objectid = rec->start;
7682         if (rec->metadata) {
7683                 key.type = BTRFS_METADATA_ITEM_KEY;
7684                 key.offset = rec->info_level;
7685         } else {
7686                 key.type = BTRFS_EXTENT_ITEM_KEY;
7687                 key.offset = rec->max_size;
7688         }
7689
7690         path = btrfs_alloc_path();
7691         if (!path)
7692                 return -ENOMEM;
7693
7694         trans = btrfs_start_transaction(root, 0);
7695         if (IS_ERR(trans)) {
7696                 btrfs_free_path(path);
7697                 return PTR_ERR(trans);
7698         }
7699
7700         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7701         if (ret < 0) {
7702                 btrfs_free_path(path);
7703                 btrfs_commit_transaction(trans, root);
7704                 return ret;
7705         } else if (ret) {
7706                 fprintf(stderr, "Didn't find extent for %llu\n",
7707                         (unsigned long long)rec->start);
7708                 btrfs_free_path(path);
7709                 btrfs_commit_transaction(trans, root);
7710                 return -ENOENT;
7711         }
7712
7713         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7714                             struct btrfs_extent_item);
7715         flags = btrfs_extent_flags(path->nodes[0], ei);
7716         if (rec->flag_block_full_backref) {
7717                 fprintf(stderr, "setting full backref on %llu\n",
7718                         (unsigned long long)key.objectid);
7719                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7720         } else {
7721                 fprintf(stderr, "clearing full backref on %llu\n",
7722                         (unsigned long long)key.objectid);
7723                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7724         }
7725         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7726         btrfs_mark_buffer_dirty(path->nodes[0]);
7727         btrfs_free_path(path);
7728         return btrfs_commit_transaction(trans, root);
7729 }
7730
7731 /* right now we only prune from the extent allocation tree */
7732 static int prune_one_block(struct btrfs_trans_handle *trans,
7733                            struct btrfs_fs_info *info,
7734                            struct btrfs_corrupt_block *corrupt)
7735 {
7736         int ret;
7737         struct btrfs_path path;
7738         struct extent_buffer *eb;
7739         u64 found;
7740         int slot;
7741         int nritems;
7742         int level = corrupt->level + 1;
7743
7744         btrfs_init_path(&path);
7745 again:
7746         /* we want to stop at the parent to our busted block */
7747         path.lowest_level = level;
7748
7749         ret = btrfs_search_slot(trans, info->extent_root,
7750                                 &corrupt->key, &path, -1, 1);
7751
7752         if (ret < 0)
7753                 goto out;
7754
7755         eb = path.nodes[level];
7756         if (!eb) {
7757                 ret = -ENOENT;
7758                 goto out;
7759         }
7760
7761         /*
7762          * hopefully the search gave us the block we want to prune,
7763          * lets try that first
7764          */
7765         slot = path.slots[level];
7766         found =  btrfs_node_blockptr(eb, slot);
7767         if (found == corrupt->cache.start)
7768                 goto del_ptr;
7769
7770         nritems = btrfs_header_nritems(eb);
7771
7772         /* the search failed, lets scan this node and hope we find it */
7773         for (slot = 0; slot < nritems; slot++) {
7774                 found =  btrfs_node_blockptr(eb, slot);
7775                 if (found == corrupt->cache.start)
7776                         goto del_ptr;
7777         }
7778         /*
7779          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7780          * to this block
7781          */
7782         if (eb == info->extent_root->node) {
7783                 ret = -ENOENT;
7784                 goto out;
7785         } else {
7786                 level++;
7787                 btrfs_release_path(&path);
7788                 goto again;
7789         }
7790
7791 del_ptr:
7792         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7793         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7794
7795 out:
7796         btrfs_release_path(&path);
7797         return ret;
7798 }
7799
7800 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7801 {
7802         struct btrfs_trans_handle *trans = NULL;
7803         struct cache_extent *cache;
7804         struct btrfs_corrupt_block *corrupt;
7805
7806         while (1) {
7807                 cache = search_cache_extent(info->corrupt_blocks, 0);
7808                 if (!cache)
7809                         break;
7810                 if (!trans) {
7811                         trans = btrfs_start_transaction(info->extent_root, 1);
7812                         if (IS_ERR(trans))
7813                                 return PTR_ERR(trans);
7814                 }
7815                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7816                 prune_one_block(trans, info, corrupt);
7817                 remove_cache_extent(info->corrupt_blocks, cache);
7818         }
7819         if (trans)
7820                 return btrfs_commit_transaction(trans, info->extent_root);
7821         return 0;
7822 }
7823
7824 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7825 {
7826         struct btrfs_block_group_cache *cache;
7827         u64 start, end;
7828         int ret;
7829
7830         while (1) {
7831                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7832                                             &start, &end, EXTENT_DIRTY);
7833                 if (ret)
7834                         break;
7835                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7836                                    GFP_NOFS);
7837         }
7838
7839         start = 0;
7840         while (1) {
7841                 cache = btrfs_lookup_first_block_group(fs_info, start);
7842                 if (!cache)
7843                         break;
7844                 if (cache->cached)
7845                         cache->cached = 0;
7846                 start = cache->key.objectid + cache->key.offset;
7847         }
7848 }
7849
7850 static int check_extent_refs(struct btrfs_root *root,
7851                              struct cache_tree *extent_cache)
7852 {
7853         struct extent_record *rec;
7854         struct cache_extent *cache;
7855         int err = 0;
7856         int ret = 0;
7857         int fixed = 0;
7858         int had_dups = 0;
7859         int recorded = 0;
7860
7861         if (repair) {
7862                 /*
7863                  * if we're doing a repair, we have to make sure
7864                  * we don't allocate from the problem extents.
7865                  * In the worst case, this will be all the
7866                  * extents in the FS
7867                  */
7868                 cache = search_cache_extent(extent_cache, 0);
7869                 while(cache) {
7870                         rec = container_of(cache, struct extent_record, cache);
7871                         set_extent_dirty(root->fs_info->excluded_extents,
7872                                          rec->start,
7873                                          rec->start + rec->max_size - 1,
7874                                          GFP_NOFS);
7875                         cache = next_cache_extent(cache);
7876                 }
7877
7878                 /* pin down all the corrupted blocks too */
7879                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7880                 while(cache) {
7881                         set_extent_dirty(root->fs_info->excluded_extents,
7882                                          cache->start,
7883                                          cache->start + cache->size - 1,
7884                                          GFP_NOFS);
7885                         cache = next_cache_extent(cache);
7886                 }
7887                 prune_corrupt_blocks(root->fs_info);
7888                 reset_cached_block_groups(root->fs_info);
7889         }
7890
7891         reset_cached_block_groups(root->fs_info);
7892
7893         /*
7894          * We need to delete any duplicate entries we find first otherwise we
7895          * could mess up the extent tree when we have backrefs that actually
7896          * belong to a different extent item and not the weird duplicate one.
7897          */
7898         while (repair && !list_empty(&duplicate_extents)) {
7899                 rec = to_extent_record(duplicate_extents.next);
7900                 list_del_init(&rec->list);
7901
7902                 /* Sometimes we can find a backref before we find an actual
7903                  * extent, so we need to process it a little bit to see if there
7904                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7905                  * if this is a backref screwup.  If we need to delete stuff
7906                  * process_duplicates() will return 0, otherwise it will return
7907                  * 1 and we
7908                  */
7909                 if (process_duplicates(root, extent_cache, rec))
7910                         continue;
7911                 ret = delete_duplicate_records(root, rec);
7912                 if (ret < 0)
7913                         return ret;
7914                 /*
7915                  * delete_duplicate_records will return the number of entries
7916                  * deleted, so if it's greater than 0 then we know we actually
7917                  * did something and we need to remove.
7918                  */
7919                 if (ret)
7920                         had_dups = 1;
7921         }
7922
7923         if (had_dups)
7924                 return -EAGAIN;
7925
7926         while(1) {
7927                 int cur_err = 0;
7928
7929                 fixed = 0;
7930                 recorded = 0;
7931                 cache = search_cache_extent(extent_cache, 0);
7932                 if (!cache)
7933                         break;
7934                 rec = container_of(cache, struct extent_record, cache);
7935                 if (rec->num_duplicates) {
7936                         fprintf(stderr, "extent item %llu has multiple extent "
7937                                 "items\n", (unsigned long long)rec->start);
7938                         err = 1;
7939                         cur_err = 1;
7940                 }
7941
7942                 if (rec->refs != rec->extent_item_refs) {
7943                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7944                                 (unsigned long long)rec->start,
7945                                 (unsigned long long)rec->nr);
7946                         fprintf(stderr, "extent item %llu, found %llu\n",
7947                                 (unsigned long long)rec->extent_item_refs,
7948                                 (unsigned long long)rec->refs);
7949                         ret = record_orphan_data_extents(root->fs_info, rec);
7950                         if (ret < 0)
7951                                 goto repair_abort;
7952                         if (ret == 0) {
7953                                 recorded = 1;
7954                         } else {
7955                                 /*
7956                                  * we can't use the extent to repair file
7957                                  * extent, let the fallback method handle it.
7958                                  */
7959                                 if (!fixed && repair) {
7960                                         ret = fixup_extent_refs(
7961                                                         root->fs_info,
7962                                                         extent_cache, rec);
7963                                         if (ret)
7964                                                 goto repair_abort;
7965                                         fixed = 1;
7966                                 }
7967                         }
7968                         err = 1;
7969                         cur_err = 1;
7970                 }
7971                 if (all_backpointers_checked(rec, 1)) {
7972                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7973                                 (unsigned long long)rec->start,
7974                                 (unsigned long long)rec->nr);
7975
7976                         if (!fixed && !recorded && repair) {
7977                                 ret = fixup_extent_refs(root->fs_info,
7978                                                         extent_cache, rec);
7979                                 if (ret)
7980                                         goto repair_abort;
7981                                 fixed = 1;
7982                         }
7983                         cur_err = 1;
7984                         err = 1;
7985                 }
7986                 if (!rec->owner_ref_checked) {
7987                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7988                                 (unsigned long long)rec->start,
7989                                 (unsigned long long)rec->nr);
7990                         if (!fixed && !recorded && repair) {
7991                                 ret = fixup_extent_refs(root->fs_info,
7992                                                         extent_cache, rec);
7993                                 if (ret)
7994                                         goto repair_abort;
7995                                 fixed = 1;
7996                         }
7997                         err = 1;
7998                         cur_err = 1;
7999                 }
8000                 if (rec->bad_full_backref) {
8001                         fprintf(stderr, "bad full backref, on [%llu]\n",
8002                                 (unsigned long long)rec->start);
8003                         if (repair) {
8004                                 ret = fixup_extent_flags(root->fs_info, rec);
8005                                 if (ret)
8006                                         goto repair_abort;
8007                                 fixed = 1;
8008                         }
8009                         err = 1;
8010                         cur_err = 1;
8011                 }
8012                 /*
8013                  * Although it's not a extent ref's problem, we reuse this
8014                  * routine for error reporting.
8015                  * No repair function yet.
8016                  */
8017                 if (rec->crossing_stripes) {
8018                         fprintf(stderr,
8019                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8020                                 rec->start, rec->start + rec->max_size);
8021                         err = 1;
8022                         cur_err = 1;
8023                 }
8024
8025                 if (rec->wrong_chunk_type) {
8026                         fprintf(stderr,
8027                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8028                                 rec->start, rec->start + rec->max_size);
8029                         err = 1;
8030                         cur_err = 1;
8031                 }
8032
8033                 remove_cache_extent(extent_cache, cache);
8034                 free_all_extent_backrefs(rec);
8035                 if (!init_extent_tree && repair && (!cur_err || fixed))
8036                         clear_extent_dirty(root->fs_info->excluded_extents,
8037                                            rec->start,
8038                                            rec->start + rec->max_size - 1,
8039                                            GFP_NOFS);
8040                 free(rec);
8041         }
8042 repair_abort:
8043         if (repair) {
8044                 if (ret && ret != -EAGAIN) {
8045                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8046                         exit(1);
8047                 } else if (!ret) {
8048                         struct btrfs_trans_handle *trans;
8049
8050                         root = root->fs_info->extent_root;
8051                         trans = btrfs_start_transaction(root, 1);
8052                         if (IS_ERR(trans)) {
8053                                 ret = PTR_ERR(trans);
8054                                 goto repair_abort;
8055                         }
8056
8057                         btrfs_fix_block_accounting(trans, root);
8058                         ret = btrfs_commit_transaction(trans, root);
8059                         if (ret)
8060                                 goto repair_abort;
8061                 }
8062                 if (err)
8063                         fprintf(stderr, "repaired damaged extent references\n");
8064                 return ret;
8065         }
8066         return err;
8067 }
8068
8069 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8070 {
8071         u64 stripe_size;
8072
8073         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8074                 stripe_size = length;
8075                 stripe_size /= num_stripes;
8076         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8077                 stripe_size = length * 2;
8078                 stripe_size /= num_stripes;
8079         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8080                 stripe_size = length;
8081                 stripe_size /= (num_stripes - 1);
8082         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8083                 stripe_size = length;
8084                 stripe_size /= (num_stripes - 2);
8085         } else {
8086                 stripe_size = length;
8087         }
8088         return stripe_size;
8089 }
8090
8091 /*
8092  * Check the chunk with its block group/dev list ref:
8093  * Return 0 if all refs seems valid.
8094  * Return 1 if part of refs seems valid, need later check for rebuild ref
8095  * like missing block group and needs to search extent tree to rebuild them.
8096  * Return -1 if essential refs are missing and unable to rebuild.
8097  */
8098 static int check_chunk_refs(struct chunk_record *chunk_rec,
8099                             struct block_group_tree *block_group_cache,
8100                             struct device_extent_tree *dev_extent_cache,
8101                             int silent)
8102 {
8103         struct cache_extent *block_group_item;
8104         struct block_group_record *block_group_rec;
8105         struct cache_extent *dev_extent_item;
8106         struct device_extent_record *dev_extent_rec;
8107         u64 devid;
8108         u64 offset;
8109         u64 length;
8110         int metadump_v2 = 0;
8111         int i;
8112         int ret = 0;
8113
8114         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8115                                                chunk_rec->offset,
8116                                                chunk_rec->length);
8117         if (block_group_item) {
8118                 block_group_rec = container_of(block_group_item,
8119                                                struct block_group_record,
8120                                                cache);
8121                 if (chunk_rec->length != block_group_rec->offset ||
8122                     chunk_rec->offset != block_group_rec->objectid ||
8123                     (!metadump_v2 &&
8124                      chunk_rec->type_flags != block_group_rec->flags)) {
8125                         if (!silent)
8126                                 fprintf(stderr,
8127                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8128                                         chunk_rec->objectid,
8129                                         chunk_rec->type,
8130                                         chunk_rec->offset,
8131                                         chunk_rec->length,
8132                                         chunk_rec->offset,
8133                                         chunk_rec->type_flags,
8134                                         block_group_rec->objectid,
8135                                         block_group_rec->type,
8136                                         block_group_rec->offset,
8137                                         block_group_rec->offset,
8138                                         block_group_rec->objectid,
8139                                         block_group_rec->flags);
8140                         ret = -1;
8141                 } else {
8142                         list_del_init(&block_group_rec->list);
8143                         chunk_rec->bg_rec = block_group_rec;
8144                 }
8145         } else {
8146                 if (!silent)
8147                         fprintf(stderr,
8148                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8149                                 chunk_rec->objectid,
8150                                 chunk_rec->type,
8151                                 chunk_rec->offset,
8152                                 chunk_rec->length,
8153                                 chunk_rec->offset,
8154                                 chunk_rec->type_flags);
8155                 ret = 1;
8156         }
8157
8158         if (metadump_v2)
8159                 return ret;
8160
8161         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8162                                     chunk_rec->num_stripes);
8163         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8164                 devid = chunk_rec->stripes[i].devid;
8165                 offset = chunk_rec->stripes[i].offset;
8166                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8167                                                        devid, offset, length);
8168                 if (dev_extent_item) {
8169                         dev_extent_rec = container_of(dev_extent_item,
8170                                                 struct device_extent_record,
8171                                                 cache);
8172                         if (dev_extent_rec->objectid != devid ||
8173                             dev_extent_rec->offset != offset ||
8174                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8175                             dev_extent_rec->length != length) {
8176                                 if (!silent)
8177                                         fprintf(stderr,
8178                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8179                                                 chunk_rec->objectid,
8180                                                 chunk_rec->type,
8181                                                 chunk_rec->offset,
8182                                                 chunk_rec->stripes[i].devid,
8183                                                 chunk_rec->stripes[i].offset,
8184                                                 dev_extent_rec->objectid,
8185                                                 dev_extent_rec->offset,
8186                                                 dev_extent_rec->length);
8187                                 ret = -1;
8188                         } else {
8189                                 list_move(&dev_extent_rec->chunk_list,
8190                                           &chunk_rec->dextents);
8191                         }
8192                 } else {
8193                         if (!silent)
8194                                 fprintf(stderr,
8195                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8196                                         chunk_rec->objectid,
8197                                         chunk_rec->type,
8198                                         chunk_rec->offset,
8199                                         chunk_rec->stripes[i].devid,
8200                                         chunk_rec->stripes[i].offset);
8201                         ret = -1;
8202                 }
8203         }
8204         return ret;
8205 }
8206
8207 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8208 int check_chunks(struct cache_tree *chunk_cache,
8209                  struct block_group_tree *block_group_cache,
8210                  struct device_extent_tree *dev_extent_cache,
8211                  struct list_head *good, struct list_head *bad,
8212                  struct list_head *rebuild, int silent)
8213 {
8214         struct cache_extent *chunk_item;
8215         struct chunk_record *chunk_rec;
8216         struct block_group_record *bg_rec;
8217         struct device_extent_record *dext_rec;
8218         int err;
8219         int ret = 0;
8220
8221         chunk_item = first_cache_extent(chunk_cache);
8222         while (chunk_item) {
8223                 chunk_rec = container_of(chunk_item, struct chunk_record,
8224                                          cache);
8225                 err = check_chunk_refs(chunk_rec, block_group_cache,
8226                                        dev_extent_cache, silent);
8227                 if (err < 0)
8228                         ret = err;
8229                 if (err == 0 && good)
8230                         list_add_tail(&chunk_rec->list, good);
8231                 if (err > 0 && rebuild)
8232                         list_add_tail(&chunk_rec->list, rebuild);
8233                 if (err < 0 && bad)
8234                         list_add_tail(&chunk_rec->list, bad);
8235                 chunk_item = next_cache_extent(chunk_item);
8236         }
8237
8238         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8239                 if (!silent)
8240                         fprintf(stderr,
8241                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8242                                 bg_rec->objectid,
8243                                 bg_rec->offset,
8244                                 bg_rec->flags);
8245                 if (!ret)
8246                         ret = 1;
8247         }
8248
8249         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8250                             chunk_list) {
8251                 if (!silent)
8252                         fprintf(stderr,
8253                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8254                                 dext_rec->objectid,
8255                                 dext_rec->offset,
8256                                 dext_rec->length);
8257                 if (!ret)
8258                         ret = 1;
8259         }
8260         return ret;
8261 }
8262
8263
8264 static int check_device_used(struct device_record *dev_rec,
8265                              struct device_extent_tree *dext_cache)
8266 {
8267         struct cache_extent *cache;
8268         struct device_extent_record *dev_extent_rec;
8269         u64 total_byte = 0;
8270
8271         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8272         while (cache) {
8273                 dev_extent_rec = container_of(cache,
8274                                               struct device_extent_record,
8275                                               cache);
8276                 if (dev_extent_rec->objectid != dev_rec->devid)
8277                         break;
8278
8279                 list_del_init(&dev_extent_rec->device_list);
8280                 total_byte += dev_extent_rec->length;
8281                 cache = next_cache_extent(cache);
8282         }
8283
8284         if (total_byte != dev_rec->byte_used) {
8285                 fprintf(stderr,
8286                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8287                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8288                         dev_rec->type, dev_rec->offset);
8289                 return -1;
8290         } else {
8291                 return 0;
8292         }
8293 }
8294
8295 /* check btrfs_dev_item -> btrfs_dev_extent */
8296 static int check_devices(struct rb_root *dev_cache,
8297                          struct device_extent_tree *dev_extent_cache)
8298 {
8299         struct rb_node *dev_node;
8300         struct device_record *dev_rec;
8301         struct device_extent_record *dext_rec;
8302         int err;
8303         int ret = 0;
8304
8305         dev_node = rb_first(dev_cache);
8306         while (dev_node) {
8307                 dev_rec = container_of(dev_node, struct device_record, node);
8308                 err = check_device_used(dev_rec, dev_extent_cache);
8309                 if (err)
8310                         ret = err;
8311
8312                 dev_node = rb_next(dev_node);
8313         }
8314         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8315                             device_list) {
8316                 fprintf(stderr,
8317                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8318                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8319                 if (!ret)
8320                         ret = 1;
8321         }
8322         return ret;
8323 }
8324
8325 static int add_root_item_to_list(struct list_head *head,
8326                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8327                                   u8 level, u8 drop_level,
8328                                   int level_size, struct btrfs_key *drop_key)
8329 {
8330
8331         struct root_item_record *ri_rec;
8332         ri_rec = malloc(sizeof(*ri_rec));
8333         if (!ri_rec)
8334                 return -ENOMEM;
8335         ri_rec->bytenr = bytenr;
8336         ri_rec->objectid = objectid;
8337         ri_rec->level = level;
8338         ri_rec->level_size = level_size;
8339         ri_rec->drop_level = drop_level;
8340         ri_rec->last_snapshot = last_snapshot;
8341         if (drop_key)
8342                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8343         list_add_tail(&ri_rec->list, head);
8344
8345         return 0;
8346 }
8347
8348 static void free_root_item_list(struct list_head *list)
8349 {
8350         struct root_item_record *ri_rec;
8351
8352         while (!list_empty(list)) {
8353                 ri_rec = list_first_entry(list, struct root_item_record,
8354                                           list);
8355                 list_del_init(&ri_rec->list);
8356                 free(ri_rec);
8357         }
8358 }
8359
8360 static int deal_root_from_list(struct list_head *list,
8361                                struct btrfs_root *root,
8362                                struct block_info *bits,
8363                                int bits_nr,
8364                                struct cache_tree *pending,
8365                                struct cache_tree *seen,
8366                                struct cache_tree *reada,
8367                                struct cache_tree *nodes,
8368                                struct cache_tree *extent_cache,
8369                                struct cache_tree *chunk_cache,
8370                                struct rb_root *dev_cache,
8371                                struct block_group_tree *block_group_cache,
8372                                struct device_extent_tree *dev_extent_cache)
8373 {
8374         int ret = 0;
8375         u64 last;
8376
8377         while (!list_empty(list)) {
8378                 struct root_item_record *rec;
8379                 struct extent_buffer *buf;
8380                 rec = list_entry(list->next,
8381                                  struct root_item_record, list);
8382                 last = 0;
8383                 buf = read_tree_block(root->fs_info->tree_root,
8384                                       rec->bytenr, rec->level_size, 0);
8385                 if (!extent_buffer_uptodate(buf)) {
8386                         free_extent_buffer(buf);
8387                         ret = -EIO;
8388                         break;
8389                 }
8390                 ret = add_root_to_pending(buf, extent_cache, pending,
8391                                     seen, nodes, rec->objectid);
8392                 if (ret < 0)
8393                         break;
8394                 /*
8395                  * To rebuild extent tree, we need deal with snapshot
8396                  * one by one, otherwise we deal with node firstly which
8397                  * can maximize readahead.
8398                  */
8399                 while (1) {
8400                         ret = run_next_block(root, bits, bits_nr, &last,
8401                                              pending, seen, reada, nodes,
8402                                              extent_cache, chunk_cache,
8403                                              dev_cache, block_group_cache,
8404                                              dev_extent_cache, rec);
8405                         if (ret != 0)
8406                                 break;
8407                 }
8408                 free_extent_buffer(buf);
8409                 list_del(&rec->list);
8410                 free(rec);
8411                 if (ret < 0)
8412                         break;
8413         }
8414         while (ret >= 0) {
8415                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8416                                      reada, nodes, extent_cache, chunk_cache,
8417                                      dev_cache, block_group_cache,
8418                                      dev_extent_cache, NULL);
8419                 if (ret != 0) {
8420                         if (ret > 0)
8421                                 ret = 0;
8422                         break;
8423                 }
8424         }
8425         return ret;
8426 }
8427
8428 static int check_chunks_and_extents(struct btrfs_root *root)
8429 {
8430         struct rb_root dev_cache;
8431         struct cache_tree chunk_cache;
8432         struct block_group_tree block_group_cache;
8433         struct device_extent_tree dev_extent_cache;
8434         struct cache_tree extent_cache;
8435         struct cache_tree seen;
8436         struct cache_tree pending;
8437         struct cache_tree reada;
8438         struct cache_tree nodes;
8439         struct extent_io_tree excluded_extents;
8440         struct cache_tree corrupt_blocks;
8441         struct btrfs_path path;
8442         struct btrfs_key key;
8443         struct btrfs_key found_key;
8444         int ret, err = 0;
8445         struct block_info *bits;
8446         int bits_nr;
8447         struct extent_buffer *leaf;
8448         int slot;
8449         struct btrfs_root_item ri;
8450         struct list_head dropping_trees;
8451         struct list_head normal_trees;
8452         struct btrfs_root *root1;
8453         u64 objectid;
8454         u32 level_size;
8455         u8 level;
8456
8457         dev_cache = RB_ROOT;
8458         cache_tree_init(&chunk_cache);
8459         block_group_tree_init(&block_group_cache);
8460         device_extent_tree_init(&dev_extent_cache);
8461
8462         cache_tree_init(&extent_cache);
8463         cache_tree_init(&seen);
8464         cache_tree_init(&pending);
8465         cache_tree_init(&nodes);
8466         cache_tree_init(&reada);
8467         cache_tree_init(&corrupt_blocks);
8468         extent_io_tree_init(&excluded_extents);
8469         INIT_LIST_HEAD(&dropping_trees);
8470         INIT_LIST_HEAD(&normal_trees);
8471
8472         if (repair) {
8473                 root->fs_info->excluded_extents = &excluded_extents;
8474                 root->fs_info->fsck_extent_cache = &extent_cache;
8475                 root->fs_info->free_extent_hook = free_extent_hook;
8476                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8477         }
8478
8479         bits_nr = 1024;
8480         bits = malloc(bits_nr * sizeof(struct block_info));
8481         if (!bits) {
8482                 perror("malloc");
8483                 exit(1);
8484         }
8485
8486         if (ctx.progress_enabled) {
8487                 ctx.tp = TASK_EXTENTS;
8488                 task_start(ctx.info);
8489         }
8490
8491 again:
8492         root1 = root->fs_info->tree_root;
8493         level = btrfs_header_level(root1->node);
8494         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8495                                     root1->node->start, 0, level, 0,
8496                                     root1->nodesize, NULL);
8497         if (ret < 0)
8498                 goto out;
8499         root1 = root->fs_info->chunk_root;
8500         level = btrfs_header_level(root1->node);
8501         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8502                                     root1->node->start, 0, level, 0,
8503                                     root1->nodesize, NULL);
8504         if (ret < 0)
8505                 goto out;
8506         btrfs_init_path(&path);
8507         key.offset = 0;
8508         key.objectid = 0;
8509         key.type = BTRFS_ROOT_ITEM_KEY;
8510         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8511                                         &key, &path, 0, 0);
8512         if (ret < 0)
8513                 goto out;
8514         while(1) {
8515                 leaf = path.nodes[0];
8516                 slot = path.slots[0];
8517                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8518                         ret = btrfs_next_leaf(root, &path);
8519                         if (ret != 0)
8520                                 break;
8521                         leaf = path.nodes[0];
8522                         slot = path.slots[0];
8523                 }
8524                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8525                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8526                         unsigned long offset;
8527                         u64 last_snapshot;
8528
8529                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8530                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8531                         last_snapshot = btrfs_root_last_snapshot(&ri);
8532                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8533                                 level = btrfs_root_level(&ri);
8534                                 level_size = root->nodesize;
8535                                 ret = add_root_item_to_list(&normal_trees,
8536                                                 found_key.objectid,
8537                                                 btrfs_root_bytenr(&ri),
8538                                                 last_snapshot, level,
8539                                                 0, level_size, NULL);
8540                                 if (ret < 0)
8541                                         goto out;
8542                         } else {
8543                                 level = btrfs_root_level(&ri);
8544                                 level_size = root->nodesize;
8545                                 objectid = found_key.objectid;
8546                                 btrfs_disk_key_to_cpu(&found_key,
8547                                                       &ri.drop_progress);
8548                                 ret = add_root_item_to_list(&dropping_trees,
8549                                                 objectid,
8550                                                 btrfs_root_bytenr(&ri),
8551                                                 last_snapshot, level,
8552                                                 ri.drop_level,
8553                                                 level_size, &found_key);
8554                                 if (ret < 0)
8555                                         goto out;
8556                         }
8557                 }
8558                 path.slots[0]++;
8559         }
8560         btrfs_release_path(&path);
8561
8562         /*
8563          * check_block can return -EAGAIN if it fixes something, please keep
8564          * this in mind when dealing with return values from these functions, if
8565          * we get -EAGAIN we want to fall through and restart the loop.
8566          */
8567         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8568                                   &seen, &reada, &nodes, &extent_cache,
8569                                   &chunk_cache, &dev_cache, &block_group_cache,
8570                                   &dev_extent_cache);
8571         if (ret < 0) {
8572                 if (ret == -EAGAIN)
8573                         goto loop;
8574                 goto out;
8575         }
8576         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8577                                   &pending, &seen, &reada, &nodes,
8578                                   &extent_cache, &chunk_cache, &dev_cache,
8579                                   &block_group_cache, &dev_extent_cache);
8580         if (ret < 0) {
8581                 if (ret == -EAGAIN)
8582                         goto loop;
8583                 goto out;
8584         }
8585
8586         ret = check_chunks(&chunk_cache, &block_group_cache,
8587                            &dev_extent_cache, NULL, NULL, NULL, 0);
8588         if (ret) {
8589                 if (ret == -EAGAIN)
8590                         goto loop;
8591                 err = ret;
8592         }
8593
8594         ret = check_extent_refs(root, &extent_cache);
8595         if (ret < 0) {
8596                 if (ret == -EAGAIN)
8597                         goto loop;
8598                 goto out;
8599         }
8600
8601         ret = check_devices(&dev_cache, &dev_extent_cache);
8602         if (ret && err)
8603                 ret = err;
8604
8605 out:
8606         task_stop(ctx.info);
8607         if (repair) {
8608                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8609                 extent_io_tree_cleanup(&excluded_extents);
8610                 root->fs_info->fsck_extent_cache = NULL;
8611                 root->fs_info->free_extent_hook = NULL;
8612                 root->fs_info->corrupt_blocks = NULL;
8613                 root->fs_info->excluded_extents = NULL;
8614         }
8615         free(bits);
8616         free_chunk_cache_tree(&chunk_cache);
8617         free_device_cache_tree(&dev_cache);
8618         free_block_group_tree(&block_group_cache);
8619         free_device_extent_tree(&dev_extent_cache);
8620         free_extent_cache_tree(&seen);
8621         free_extent_cache_tree(&pending);
8622         free_extent_cache_tree(&reada);
8623         free_extent_cache_tree(&nodes);
8624         return ret;
8625 loop:
8626         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8627         free_extent_cache_tree(&seen);
8628         free_extent_cache_tree(&pending);
8629         free_extent_cache_tree(&reada);
8630         free_extent_cache_tree(&nodes);
8631         free_chunk_cache_tree(&chunk_cache);
8632         free_block_group_tree(&block_group_cache);
8633         free_device_cache_tree(&dev_cache);
8634         free_device_extent_tree(&dev_extent_cache);
8635         free_extent_record_cache(root->fs_info, &extent_cache);
8636         free_root_item_list(&normal_trees);
8637         free_root_item_list(&dropping_trees);
8638         extent_io_tree_cleanup(&excluded_extents);
8639         goto again;
8640 }
8641
8642 /*
8643  * Check backrefs of a tree block given by @bytenr or @eb.
8644  *
8645  * @root:       the root containing the @bytenr or @eb
8646  * @eb:         tree block extent buffer, can be NULL
8647  * @bytenr:     bytenr of the tree block to search
8648  * @level:      tree level of the tree block
8649  * @owner:      owner of the tree block
8650  *
8651  * Return >0 for any error found and output error message
8652  * Return 0 for no error found
8653  */
8654 static int check_tree_block_ref(struct btrfs_root *root,
8655                                 struct extent_buffer *eb, u64 bytenr,
8656                                 int level, u64 owner)
8657 {
8658         struct btrfs_key key;
8659         struct btrfs_root *extent_root = root->fs_info->extent_root;
8660         struct btrfs_path path;
8661         struct btrfs_extent_item *ei;
8662         struct btrfs_extent_inline_ref *iref;
8663         struct extent_buffer *leaf;
8664         unsigned long end;
8665         unsigned long ptr;
8666         int slot;
8667         int skinny_level;
8668         int type;
8669         u32 nodesize = root->nodesize;
8670         u32 item_size;
8671         u64 offset;
8672         int found_ref = 0;
8673         int err = 0;
8674         int ret;
8675
8676         btrfs_init_path(&path);
8677         key.objectid = bytenr;
8678         if (btrfs_fs_incompat(root->fs_info,
8679                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8680                 key.type = BTRFS_METADATA_ITEM_KEY;
8681         else
8682                 key.type = BTRFS_EXTENT_ITEM_KEY;
8683         key.offset = (u64)-1;
8684
8685         /* Search for the backref in extent tree */
8686         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8687         if (ret < 0) {
8688                 err |= BACKREF_MISSING;
8689                 goto out;
8690         }
8691         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8692         if (ret) {
8693                 err |= BACKREF_MISSING;
8694                 goto out;
8695         }
8696
8697         leaf = path.nodes[0];
8698         slot = path.slots[0];
8699         btrfs_item_key_to_cpu(leaf, &key, slot);
8700
8701         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8702
8703         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8704                 skinny_level = (int)key.offset;
8705                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8706         } else {
8707                 struct btrfs_tree_block_info *info;
8708
8709                 info = (struct btrfs_tree_block_info *)(ei + 1);
8710                 skinny_level = btrfs_tree_block_level(leaf, info);
8711                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8712         }
8713
8714         if (eb) {
8715                 u64 header_gen;
8716                 u64 extent_gen;
8717
8718                 if (!(btrfs_extent_flags(leaf, ei) &
8719                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8720                         error(
8721                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8722                                 key.objectid, nodesize,
8723                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8724                         err = BACKREF_MISMATCH;
8725                 }
8726                 header_gen = btrfs_header_generation(eb);
8727                 extent_gen = btrfs_extent_generation(leaf, ei);
8728                 if (header_gen != extent_gen) {
8729                         error(
8730         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8731                                 key.objectid, nodesize, header_gen,
8732                                 extent_gen);
8733                         err = BACKREF_MISMATCH;
8734                 }
8735                 if (level != skinny_level) {
8736                         error(
8737                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8738                                 key.objectid, nodesize, level, skinny_level);
8739                         err = BACKREF_MISMATCH;
8740                 }
8741                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8742                         error(
8743                         "extent[%llu %u] is referred by other roots than %llu",
8744                                 key.objectid, nodesize, root->objectid);
8745                         err = BACKREF_MISMATCH;
8746                 }
8747         }
8748
8749         /*
8750          * Iterate the extent/metadata item to find the exact backref
8751          */
8752         item_size = btrfs_item_size_nr(leaf, slot);
8753         ptr = (unsigned long)iref;
8754         end = (unsigned long)ei + item_size;
8755         while (ptr < end) {
8756                 iref = (struct btrfs_extent_inline_ref *)ptr;
8757                 type = btrfs_extent_inline_ref_type(leaf, iref);
8758                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8759
8760                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8761                         (offset == root->objectid || offset == owner)) {
8762                         found_ref = 1;
8763                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8764                         /* Check if the backref points to valid referencer */
8765                         found_ref = !check_tree_block_ref(root, NULL, offset,
8766                                                           level + 1, owner);
8767                 }
8768
8769                 if (found_ref)
8770                         break;
8771                 ptr += btrfs_extent_inline_ref_size(type);
8772         }
8773
8774         /*
8775          * Inlined extent item doesn't have what we need, check
8776          * TREE_BLOCK_REF_KEY
8777          */
8778         if (!found_ref) {
8779                 btrfs_release_path(&path);
8780                 key.objectid = bytenr;
8781                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8782                 key.offset = root->objectid;
8783
8784                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8785                 if (!ret)
8786                         found_ref = 1;
8787         }
8788         if (!found_ref)
8789                 err |= BACKREF_MISSING;
8790 out:
8791         btrfs_release_path(&path);
8792         if (eb && (err & BACKREF_MISSING))
8793                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8794                         bytenr, nodesize, owner, level);
8795         return err;
8796 }
8797
8798 /*
8799  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8800  *
8801  * Return >0 any error found and output error message
8802  * Return 0 for no error found
8803  */
8804 static int check_extent_data_item(struct btrfs_root *root,
8805                                   struct extent_buffer *eb, int slot)
8806 {
8807         struct btrfs_file_extent_item *fi;
8808         struct btrfs_path path;
8809         struct btrfs_root *extent_root = root->fs_info->extent_root;
8810         struct btrfs_key fi_key;
8811         struct btrfs_key dbref_key;
8812         struct extent_buffer *leaf;
8813         struct btrfs_extent_item *ei;
8814         struct btrfs_extent_inline_ref *iref;
8815         struct btrfs_extent_data_ref *dref;
8816         u64 owner;
8817         u64 file_extent_gen;
8818         u64 disk_bytenr;
8819         u64 disk_num_bytes;
8820         u64 extent_num_bytes;
8821         u64 extent_flags;
8822         u64 extent_gen;
8823         u32 item_size;
8824         unsigned long end;
8825         unsigned long ptr;
8826         int type;
8827         u64 ref_root;
8828         int found_dbackref = 0;
8829         int err = 0;
8830         int ret;
8831
8832         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8833         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8834         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8835
8836         /* Nothing to check for hole and inline data extents */
8837         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8838             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8839                 return 0;
8840
8841         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8842         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8843         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8844
8845         /* Check unaligned disk_num_bytes and num_bytes */
8846         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8847                 error(
8848 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8849                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8850                         root->sectorsize);
8851                 err |= BYTES_UNALIGNED;
8852         } else {
8853                 data_bytes_allocated += disk_num_bytes;
8854         }
8855         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8856                 error(
8857 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8858                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8859                         root->sectorsize);
8860                 err |= BYTES_UNALIGNED;
8861         } else {
8862                 data_bytes_referenced += extent_num_bytes;
8863         }
8864         owner = btrfs_header_owner(eb);
8865
8866         /* Check the extent item of the file extent in extent tree */
8867         btrfs_init_path(&path);
8868         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8869         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8870         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8871
8872         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8873         if (ret) {
8874                 err |= BACKREF_MISSING;
8875                 goto error;
8876         }
8877
8878         leaf = path.nodes[0];
8879         slot = path.slots[0];
8880         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8881
8882         extent_flags = btrfs_extent_flags(leaf, ei);
8883         extent_gen = btrfs_extent_generation(leaf, ei);
8884
8885         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8886                 error(
8887                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8888                     disk_bytenr, disk_num_bytes,
8889                     BTRFS_EXTENT_FLAG_DATA);
8890                 err |= BACKREF_MISMATCH;
8891         }
8892
8893         if (file_extent_gen < extent_gen) {
8894                 error(
8895 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8896                         disk_bytenr, disk_num_bytes, file_extent_gen,
8897                         extent_gen);
8898                 err |= BACKREF_MISMATCH;
8899         }
8900
8901         /* Check data backref inside that extent item */
8902         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8903         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8904         ptr = (unsigned long)iref;
8905         end = (unsigned long)ei + item_size;
8906         while (ptr < end) {
8907                 iref = (struct btrfs_extent_inline_ref *)ptr;
8908                 type = btrfs_extent_inline_ref_type(leaf, iref);
8909                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8910
8911                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8912                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8913                         if (ref_root == owner || ref_root == root->objectid)
8914                                 found_dbackref = 1;
8915                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8916                         found_dbackref = !check_tree_block_ref(root, NULL,
8917                                 btrfs_extent_inline_ref_offset(leaf, iref),
8918                                 0, owner);
8919                 }
8920
8921                 if (found_dbackref)
8922                         break;
8923                 ptr += btrfs_extent_inline_ref_size(type);
8924         }
8925
8926         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8927         if (!found_dbackref) {
8928                 btrfs_release_path(&path);
8929
8930                 btrfs_init_path(&path);
8931                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8932                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8933                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8934                                 fi_key.objectid, fi_key.offset);
8935
8936                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8937                                         &dbref_key, &path, 0, 0);
8938                 if (!ret)
8939                         found_dbackref = 1;
8940         }
8941
8942         if (!found_dbackref)
8943                 err |= BACKREF_MISSING;
8944 error:
8945         btrfs_release_path(&path);
8946         if (err & BACKREF_MISSING) {
8947                 error("data extent[%llu %llu] backref lost",
8948                       disk_bytenr, disk_num_bytes);
8949         }
8950         return err;
8951 }
8952
8953 /*
8954  * Get real tree block level for the case like shared block
8955  * Return >= 0 as tree level
8956  * Return <0 for error
8957  */
8958 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8959 {
8960         struct extent_buffer *eb;
8961         struct btrfs_path path;
8962         struct btrfs_key key;
8963         struct btrfs_extent_item *ei;
8964         u64 flags;
8965         u64 transid;
8966         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8967         u8 backref_level;
8968         u8 header_level;
8969         int ret;
8970
8971         /* Search extent tree for extent generation and level */
8972         key.objectid = bytenr;
8973         key.type = BTRFS_METADATA_ITEM_KEY;
8974         key.offset = (u64)-1;
8975
8976         btrfs_init_path(&path);
8977         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8978         if (ret < 0)
8979                 goto release_out;
8980         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8981         if (ret < 0)
8982                 goto release_out;
8983         if (ret > 0) {
8984                 ret = -ENOENT;
8985                 goto release_out;
8986         }
8987
8988         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8989         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8990                             struct btrfs_extent_item);
8991         flags = btrfs_extent_flags(path.nodes[0], ei);
8992         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8993                 ret = -ENOENT;
8994                 goto release_out;
8995         }
8996
8997         /* Get transid for later read_tree_block() check */
8998         transid = btrfs_extent_generation(path.nodes[0], ei);
8999
9000         /* Get backref level as one source */
9001         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9002                 backref_level = key.offset;
9003         } else {
9004                 struct btrfs_tree_block_info *info;
9005
9006                 info = (struct btrfs_tree_block_info *)(ei + 1);
9007                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9008         }
9009         btrfs_release_path(&path);
9010
9011         /* Get level from tree block as an alternative source */
9012         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9013         if (!extent_buffer_uptodate(eb)) {
9014                 free_extent_buffer(eb);
9015                 return -EIO;
9016         }
9017         header_level = btrfs_header_level(eb);
9018         free_extent_buffer(eb);
9019
9020         if (header_level != backref_level)
9021                 return -EIO;
9022         return header_level;
9023
9024 release_out:
9025         btrfs_release_path(&path);
9026         return ret;
9027 }
9028
9029 /*
9030  * Check if a tree block backref is valid (points to a valid tree block)
9031  * if level == -1, level will be resolved
9032  * Return >0 for any error found and print error message
9033  */
9034 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9035                                     u64 bytenr, int level)
9036 {
9037         struct btrfs_root *root;
9038         struct btrfs_key key;
9039         struct btrfs_path path;
9040         struct extent_buffer *eb;
9041         struct extent_buffer *node;
9042         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9043         int err = 0;
9044         int ret;
9045
9046         /* Query level for level == -1 special case */
9047         if (level == -1)
9048                 level = query_tree_block_level(fs_info, bytenr);
9049         if (level < 0) {
9050                 err |= REFERENCER_MISSING;
9051                 goto out;
9052         }
9053
9054         key.objectid = root_id;
9055         key.type = BTRFS_ROOT_ITEM_KEY;
9056         key.offset = (u64)-1;
9057
9058         root = btrfs_read_fs_root(fs_info, &key);
9059         if (IS_ERR(root)) {
9060                 err |= REFERENCER_MISSING;
9061                 goto out;
9062         }
9063
9064         /* Read out the tree block to get item/node key */
9065         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9066         if (!extent_buffer_uptodate(eb)) {
9067                 err |= REFERENCER_MISSING;
9068                 free_extent_buffer(eb);
9069                 goto out;
9070         }
9071
9072         /* Empty tree, no need to check key */
9073         if (!btrfs_header_nritems(eb) && !level) {
9074                 free_extent_buffer(eb);
9075                 goto out;
9076         }
9077
9078         if (level)
9079                 btrfs_node_key_to_cpu(eb, &key, 0);
9080         else
9081                 btrfs_item_key_to_cpu(eb, &key, 0);
9082
9083         free_extent_buffer(eb);
9084
9085         btrfs_init_path(&path);
9086         path.lowest_level = level;
9087         /* Search with the first key, to ensure we can reach it */
9088         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9089         if (ret < 0) {
9090                 err |= REFERENCER_MISSING;
9091                 goto release_out;
9092         }
9093
9094         node = path.nodes[level];
9095         if (btrfs_header_bytenr(node) != bytenr) {
9096                 error(
9097         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9098                         bytenr, nodesize, bytenr,
9099                         btrfs_header_bytenr(node));
9100                 err |= REFERENCER_MISMATCH;
9101         }
9102         if (btrfs_header_level(node) != level) {
9103                 error(
9104         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9105                         bytenr, nodesize, level,
9106                         btrfs_header_level(node));
9107                 err |= REFERENCER_MISMATCH;
9108         }
9109
9110 release_out:
9111         btrfs_release_path(&path);
9112 out:
9113         if (err & REFERENCER_MISSING) {
9114                 if (level < 0)
9115                         error("extent [%llu %d] lost referencer (owner: %llu)",
9116                                 bytenr, nodesize, root_id);
9117                 else
9118                         error(
9119                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9120                                 bytenr, nodesize, root_id, level);
9121         }
9122
9123         return err;
9124 }
9125
9126 /*
9127  * Check referencer for shared block backref
9128  * If level == -1, this function will resolve the level.
9129  */
9130 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9131                                      u64 parent, u64 bytenr, int level)
9132 {
9133         struct extent_buffer *eb;
9134         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9135         u32 nr;
9136         int found_parent = 0;
9137         int i;
9138
9139         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9140         if (!extent_buffer_uptodate(eb))
9141                 goto out;
9142
9143         if (level == -1)
9144                 level = query_tree_block_level(fs_info, bytenr);
9145         if (level < 0)
9146                 goto out;
9147
9148         if (level + 1 != btrfs_header_level(eb))
9149                 goto out;
9150
9151         nr = btrfs_header_nritems(eb);
9152         for (i = 0; i < nr; i++) {
9153                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9154                         found_parent = 1;
9155                         break;
9156                 }
9157         }
9158 out:
9159         free_extent_buffer(eb);
9160         if (!found_parent) {
9161                 error(
9162         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9163                         bytenr, nodesize, parent, level);
9164                 return REFERENCER_MISSING;
9165         }
9166         return 0;
9167 }
9168
9169 /*
9170  * Check referencer for normal (inlined) data ref
9171  * If len == 0, it will be resolved by searching in extent tree
9172  */
9173 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9174                                      u64 root_id, u64 objectid, u64 offset,
9175                                      u64 bytenr, u64 len, u32 count)
9176 {
9177         struct btrfs_root *root;
9178         struct btrfs_root *extent_root = fs_info->extent_root;
9179         struct btrfs_key key;
9180         struct btrfs_path path;
9181         struct extent_buffer *leaf;
9182         struct btrfs_file_extent_item *fi;
9183         u32 found_count = 0;
9184         int slot;
9185         int ret = 0;
9186
9187         if (!len) {
9188                 key.objectid = bytenr;
9189                 key.type = BTRFS_EXTENT_ITEM_KEY;
9190                 key.offset = (u64)-1;
9191
9192                 btrfs_init_path(&path);
9193                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9194                 if (ret < 0)
9195                         goto out;
9196                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9197                 if (ret)
9198                         goto out;
9199                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9200                 if (key.objectid != bytenr ||
9201                     key.type != BTRFS_EXTENT_ITEM_KEY)
9202                         goto out;
9203                 len = key.offset;
9204                 btrfs_release_path(&path);
9205         }
9206         key.objectid = root_id;
9207         key.type = BTRFS_ROOT_ITEM_KEY;
9208         key.offset = (u64)-1;
9209         btrfs_init_path(&path);
9210
9211         root = btrfs_read_fs_root(fs_info, &key);
9212         if (IS_ERR(root))
9213                 goto out;
9214
9215         key.objectid = objectid;
9216         key.type = BTRFS_EXTENT_DATA_KEY;
9217         /*
9218          * It can be nasty as data backref offset is
9219          * file offset - file extent offset, which is smaller or
9220          * equal to original backref offset.  The only special case is
9221          * overflow.  So we need to special check and do further search.
9222          */
9223         key.offset = offset & (1ULL << 63) ? 0 : offset;
9224
9225         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9226         if (ret < 0)
9227                 goto out;
9228
9229         /*
9230          * Search afterwards to get correct one
9231          * NOTE: As we must do a comprehensive check on the data backref to
9232          * make sure the dref count also matches, we must iterate all file
9233          * extents for that inode.
9234          */
9235         while (1) {
9236                 leaf = path.nodes[0];
9237                 slot = path.slots[0];
9238
9239                 btrfs_item_key_to_cpu(leaf, &key, slot);
9240                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9241                         break;
9242                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9243                 /*
9244                  * Except normal disk bytenr and disk num bytes, we still
9245                  * need to do extra check on dbackref offset as
9246                  * dbackref offset = file_offset - file_extent_offset
9247                  */
9248                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9249                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9250                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9251                     offset)
9252                         found_count++;
9253
9254                 ret = btrfs_next_item(root, &path);
9255                 if (ret)
9256                         break;
9257         }
9258 out:
9259         btrfs_release_path(&path);
9260         if (found_count != count) {
9261                 error(
9262 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9263                         bytenr, len, root_id, objectid, offset, count, found_count);
9264                 return REFERENCER_MISSING;
9265         }
9266         return 0;
9267 }
9268
9269 /*
9270  * Check if the referencer of a shared data backref exists
9271  */
9272 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9273                                      u64 parent, u64 bytenr)
9274 {
9275         struct extent_buffer *eb;
9276         struct btrfs_key key;
9277         struct btrfs_file_extent_item *fi;
9278         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9279         u32 nr;
9280         int found_parent = 0;
9281         int i;
9282
9283         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9284         if (!extent_buffer_uptodate(eb))
9285                 goto out;
9286
9287         nr = btrfs_header_nritems(eb);
9288         for (i = 0; i < nr; i++) {
9289                 btrfs_item_key_to_cpu(eb, &key, i);
9290                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9291                         continue;
9292
9293                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9294                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9295                         continue;
9296
9297                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9298                         found_parent = 1;
9299                         break;
9300                 }
9301         }
9302
9303 out:
9304         free_extent_buffer(eb);
9305         if (!found_parent) {
9306                 error("shared extent %llu referencer lost (parent: %llu)",
9307                         bytenr, parent);
9308                 return REFERENCER_MISSING;
9309         }
9310         return 0;
9311 }
9312
9313 /*
9314  * This function will check a given extent item, including its backref and
9315  * itself (like crossing stripe boundary and type)
9316  *
9317  * Since we don't use extent_record anymore, introduce new error bit
9318  */
9319 static int check_extent_item(struct btrfs_fs_info *fs_info,
9320                              struct extent_buffer *eb, int slot)
9321 {
9322         struct btrfs_extent_item *ei;
9323         struct btrfs_extent_inline_ref *iref;
9324         struct btrfs_extent_data_ref *dref;
9325         unsigned long end;
9326         unsigned long ptr;
9327         int type;
9328         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9329         u32 item_size = btrfs_item_size_nr(eb, slot);
9330         u64 flags;
9331         u64 offset;
9332         int metadata = 0;
9333         int level;
9334         struct btrfs_key key;
9335         int ret;
9336         int err = 0;
9337
9338         btrfs_item_key_to_cpu(eb, &key, slot);
9339         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9340                 bytes_used += key.offset;
9341         else
9342                 bytes_used += nodesize;
9343
9344         if (item_size < sizeof(*ei)) {
9345                 /*
9346                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9347                  * old thing when on disk format is still un-determined.
9348                  * No need to care about it anymore
9349                  */
9350                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9351                 return -ENOTTY;
9352         }
9353
9354         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9355         flags = btrfs_extent_flags(eb, ei);
9356
9357         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9358                 metadata = 1;
9359         if (metadata && check_crossing_stripes(global_info, key.objectid,
9360                                                eb->len)) {
9361                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9362                       key.objectid, key.objectid + nodesize);
9363                 err |= CROSSING_STRIPE_BOUNDARY;
9364         }
9365
9366         ptr = (unsigned long)(ei + 1);
9367
9368         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9369                 /* Old EXTENT_ITEM metadata */
9370                 struct btrfs_tree_block_info *info;
9371
9372                 info = (struct btrfs_tree_block_info *)ptr;
9373                 level = btrfs_tree_block_level(eb, info);
9374                 ptr += sizeof(struct btrfs_tree_block_info);
9375         } else {
9376                 /* New METADATA_ITEM */
9377                 level = key.offset;
9378         }
9379         end = (unsigned long)ei + item_size;
9380
9381         if (ptr >= end) {
9382                 err |= ITEM_SIZE_MISMATCH;
9383                 goto out;
9384         }
9385
9386         /* Now check every backref in this extent item */
9387 next:
9388         iref = (struct btrfs_extent_inline_ref *)ptr;
9389         type = btrfs_extent_inline_ref_type(eb, iref);
9390         offset = btrfs_extent_inline_ref_offset(eb, iref);
9391         switch (type) {
9392         case BTRFS_TREE_BLOCK_REF_KEY:
9393                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9394                                                level);
9395                 err |= ret;
9396                 break;
9397         case BTRFS_SHARED_BLOCK_REF_KEY:
9398                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9399                                                  level);
9400                 err |= ret;
9401                 break;
9402         case BTRFS_EXTENT_DATA_REF_KEY:
9403                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9404                 ret = check_extent_data_backref(fs_info,
9405                                 btrfs_extent_data_ref_root(eb, dref),
9406                                 btrfs_extent_data_ref_objectid(eb, dref),
9407                                 btrfs_extent_data_ref_offset(eb, dref),
9408                                 key.objectid, key.offset,
9409                                 btrfs_extent_data_ref_count(eb, dref));
9410                 err |= ret;
9411                 break;
9412         case BTRFS_SHARED_DATA_REF_KEY:
9413                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9414                 err |= ret;
9415                 break;
9416         default:
9417                 error("extent[%llu %d %llu] has unknown ref type: %d",
9418                         key.objectid, key.type, key.offset, type);
9419                 err |= UNKNOWN_TYPE;
9420                 goto out;
9421         }
9422
9423         ptr += btrfs_extent_inline_ref_size(type);
9424         if (ptr < end)
9425                 goto next;
9426
9427 out:
9428         return err;
9429 }
9430
9431 /*
9432  * Check if a dev extent item is referred correctly by its chunk
9433  */
9434 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9435                                  struct extent_buffer *eb, int slot)
9436 {
9437         struct btrfs_root *chunk_root = fs_info->chunk_root;
9438         struct btrfs_dev_extent *ptr;
9439         struct btrfs_path path;
9440         struct btrfs_key chunk_key;
9441         struct btrfs_key devext_key;
9442         struct btrfs_chunk *chunk;
9443         struct extent_buffer *l;
9444         int num_stripes;
9445         u64 length;
9446         int i;
9447         int found_chunk = 0;
9448         int ret;
9449
9450         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9451         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9452         length = btrfs_dev_extent_length(eb, ptr);
9453
9454         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9455         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9456         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9457
9458         btrfs_init_path(&path);
9459         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9460         if (ret)
9461                 goto out;
9462
9463         l = path.nodes[0];
9464         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9465         if (btrfs_chunk_length(l, chunk) != length)
9466                 goto out;
9467
9468         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9469         for (i = 0; i < num_stripes; i++) {
9470                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9471                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9472
9473                 if (devid == devext_key.objectid &&
9474                     offset == devext_key.offset) {
9475                         found_chunk = 1;
9476                         break;
9477                 }
9478         }
9479 out:
9480         btrfs_release_path(&path);
9481         if (!found_chunk) {
9482                 error(
9483                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9484                         devext_key.objectid, devext_key.offset, length);
9485                 return REFERENCER_MISSING;
9486         }
9487         return 0;
9488 }
9489
9490 /*
9491  * Check if the used space is correct with the dev item
9492  */
9493 static int check_dev_item(struct btrfs_fs_info *fs_info,
9494                           struct extent_buffer *eb, int slot)
9495 {
9496         struct btrfs_root *dev_root = fs_info->dev_root;
9497         struct btrfs_dev_item *dev_item;
9498         struct btrfs_path path;
9499         struct btrfs_key key;
9500         struct btrfs_dev_extent *ptr;
9501         u64 dev_id;
9502         u64 used;
9503         u64 total = 0;
9504         int ret;
9505
9506         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9507         dev_id = btrfs_device_id(eb, dev_item);
9508         used = btrfs_device_bytes_used(eb, dev_item);
9509
9510         key.objectid = dev_id;
9511         key.type = BTRFS_DEV_EXTENT_KEY;
9512         key.offset = 0;
9513
9514         btrfs_init_path(&path);
9515         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9516         if (ret < 0) {
9517                 btrfs_item_key_to_cpu(eb, &key, slot);
9518                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9519                         key.objectid, key.type, key.offset);
9520                 btrfs_release_path(&path);
9521                 return REFERENCER_MISSING;
9522         }
9523
9524         /* Iterate dev_extents to calculate the used space of a device */
9525         while (1) {
9526                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9527
9528                 if (key.objectid > dev_id)
9529                         break;
9530                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9531                         goto next;
9532
9533                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9534                                      struct btrfs_dev_extent);
9535                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9536 next:
9537                 ret = btrfs_next_item(dev_root, &path);
9538                 if (ret)
9539                         break;
9540         }
9541         btrfs_release_path(&path);
9542
9543         if (used != total) {
9544                 btrfs_item_key_to_cpu(eb, &key, slot);
9545                 error(
9546 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9547                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9548                         BTRFS_DEV_EXTENT_KEY, dev_id);
9549                 return ACCOUNTING_MISMATCH;
9550         }
9551         return 0;
9552 }
9553
9554 /*
9555  * Check a block group item with its referener (chunk) and its used space
9556  * with extent/metadata item
9557  */
9558 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9559                                   struct extent_buffer *eb, int slot)
9560 {
9561         struct btrfs_root *extent_root = fs_info->extent_root;
9562         struct btrfs_root *chunk_root = fs_info->chunk_root;
9563         struct btrfs_block_group_item *bi;
9564         struct btrfs_block_group_item bg_item;
9565         struct btrfs_path path;
9566         struct btrfs_key bg_key;
9567         struct btrfs_key chunk_key;
9568         struct btrfs_key extent_key;
9569         struct btrfs_chunk *chunk;
9570         struct extent_buffer *leaf;
9571         struct btrfs_extent_item *ei;
9572         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9573         u64 flags;
9574         u64 bg_flags;
9575         u64 used;
9576         u64 total = 0;
9577         int ret;
9578         int err = 0;
9579
9580         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9581         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9582         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9583         used = btrfs_block_group_used(&bg_item);
9584         bg_flags = btrfs_block_group_flags(&bg_item);
9585
9586         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9587         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9588         chunk_key.offset = bg_key.objectid;
9589
9590         btrfs_init_path(&path);
9591         /* Search for the referencer chunk */
9592         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9593         if (ret) {
9594                 error(
9595                 "block group[%llu %llu] did not find the related chunk item",
9596                         bg_key.objectid, bg_key.offset);
9597                 err |= REFERENCER_MISSING;
9598         } else {
9599                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9600                                         struct btrfs_chunk);
9601                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9602                                                 bg_key.offset) {
9603                         error(
9604         "block group[%llu %llu] related chunk item length does not match",
9605                                 bg_key.objectid, bg_key.offset);
9606                         err |= REFERENCER_MISMATCH;
9607                 }
9608         }
9609         btrfs_release_path(&path);
9610
9611         /* Search from the block group bytenr */
9612         extent_key.objectid = bg_key.objectid;
9613         extent_key.type = 0;
9614         extent_key.offset = 0;
9615
9616         btrfs_init_path(&path);
9617         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9618         if (ret < 0)
9619                 goto out;
9620
9621         /* Iterate extent tree to account used space */
9622         while (1) {
9623                 leaf = path.nodes[0];
9624                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9625                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9626                         break;
9627
9628                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9629                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9630                         goto next;
9631                 if (extent_key.objectid < bg_key.objectid)
9632                         goto next;
9633
9634                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9635                         total += nodesize;
9636                 else
9637                         total += extent_key.offset;
9638
9639                 ei = btrfs_item_ptr(leaf, path.slots[0],
9640                                     struct btrfs_extent_item);
9641                 flags = btrfs_extent_flags(leaf, ei);
9642                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9643                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9644                                 error(
9645                         "bad extent[%llu, %llu) type mismatch with chunk",
9646                                         extent_key.objectid,
9647                                         extent_key.objectid + extent_key.offset);
9648                                 err |= CHUNK_TYPE_MISMATCH;
9649                         }
9650                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9651                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9652                                     BTRFS_BLOCK_GROUP_METADATA))) {
9653                                 error(
9654                         "bad extent[%llu, %llu) type mismatch with chunk",
9655                                         extent_key.objectid,
9656                                         extent_key.objectid + nodesize);
9657                                 err |= CHUNK_TYPE_MISMATCH;
9658                         }
9659                 }
9660 next:
9661                 ret = btrfs_next_item(extent_root, &path);
9662                 if (ret)
9663                         break;
9664         }
9665
9666 out:
9667         btrfs_release_path(&path);
9668
9669         if (total != used) {
9670                 error(
9671                 "block group[%llu %llu] used %llu but extent items used %llu",
9672                         bg_key.objectid, bg_key.offset, used, total);
9673                 err |= ACCOUNTING_MISMATCH;
9674         }
9675         return err;
9676 }
9677
9678 /*
9679  * Check a chunk item.
9680  * Including checking all referred dev_extents and block group
9681  */
9682 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9683                             struct extent_buffer *eb, int slot)
9684 {
9685         struct btrfs_root *extent_root = fs_info->extent_root;
9686         struct btrfs_root *dev_root = fs_info->dev_root;
9687         struct btrfs_path path;
9688         struct btrfs_key chunk_key;
9689         struct btrfs_key bg_key;
9690         struct btrfs_key devext_key;
9691         struct btrfs_chunk *chunk;
9692         struct extent_buffer *leaf;
9693         struct btrfs_block_group_item *bi;
9694         struct btrfs_block_group_item bg_item;
9695         struct btrfs_dev_extent *ptr;
9696         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9697         u64 length;
9698         u64 chunk_end;
9699         u64 type;
9700         u64 profile;
9701         int num_stripes;
9702         u64 offset;
9703         u64 objectid;
9704         int i;
9705         int ret;
9706         int err = 0;
9707
9708         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9709         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9710         length = btrfs_chunk_length(eb, chunk);
9711         chunk_end = chunk_key.offset + length;
9712         if (!IS_ALIGNED(length, sectorsize)) {
9713                 error("chunk[%llu %llu) not aligned to %u",
9714                         chunk_key.offset, chunk_end, sectorsize);
9715                 err |= BYTES_UNALIGNED;
9716                 goto out;
9717         }
9718
9719         type = btrfs_chunk_type(eb, chunk);
9720         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9721         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9722                 error("chunk[%llu %llu) has no chunk type",
9723                         chunk_key.offset, chunk_end);
9724                 err |= UNKNOWN_TYPE;
9725         }
9726         if (profile && (profile & (profile - 1))) {
9727                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9728                         chunk_key.offset, chunk_end, profile);
9729                 err |= UNKNOWN_TYPE;
9730         }
9731
9732         bg_key.objectid = chunk_key.offset;
9733         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9734         bg_key.offset = length;
9735
9736         btrfs_init_path(&path);
9737         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9738         if (ret) {
9739                 error(
9740                 "chunk[%llu %llu) did not find the related block group item",
9741                         chunk_key.offset, chunk_end);
9742                 err |= REFERENCER_MISSING;
9743         } else{
9744                 leaf = path.nodes[0];
9745                 bi = btrfs_item_ptr(leaf, path.slots[0],
9746                                     struct btrfs_block_group_item);
9747                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9748                                    sizeof(bg_item));
9749                 if (btrfs_block_group_flags(&bg_item) != type) {
9750                         error(
9751 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9752                                 chunk_key.offset, chunk_end, type,
9753                                 btrfs_block_group_flags(&bg_item));
9754                         err |= REFERENCER_MISSING;
9755                 }
9756         }
9757
9758         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9759         for (i = 0; i < num_stripes; i++) {
9760                 btrfs_release_path(&path);
9761                 btrfs_init_path(&path);
9762                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9763                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9764                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9765
9766                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9767                                         0, 0);
9768                 if (ret)
9769                         goto not_match_dev;
9770
9771                 leaf = path.nodes[0];
9772                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9773                                      struct btrfs_dev_extent);
9774                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9775                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9776                 if (objectid != chunk_key.objectid ||
9777                     offset != chunk_key.offset ||
9778                     btrfs_dev_extent_length(leaf, ptr) != length)
9779                         goto not_match_dev;
9780                 continue;
9781 not_match_dev:
9782                 err |= BACKREF_MISSING;
9783                 error(
9784                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9785                         chunk_key.objectid, chunk_end, i);
9786                 continue;
9787         }
9788         btrfs_release_path(&path);
9789 out:
9790         return err;
9791 }
9792
9793 /*
9794  * Main entry function to check known items and update related accounting info
9795  */
9796 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9797 {
9798         struct btrfs_fs_info *fs_info = root->fs_info;
9799         struct btrfs_key key;
9800         int slot = 0;
9801         int type;
9802         struct btrfs_extent_data_ref *dref;
9803         int ret;
9804         int err = 0;
9805
9806 next:
9807         btrfs_item_key_to_cpu(eb, &key, slot);
9808         type = key.type;
9809
9810         switch (type) {
9811         case BTRFS_EXTENT_DATA_KEY:
9812                 ret = check_extent_data_item(root, eb, slot);
9813                 err |= ret;
9814                 break;
9815         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9816                 ret = check_block_group_item(fs_info, eb, slot);
9817                 err |= ret;
9818                 break;
9819         case BTRFS_DEV_ITEM_KEY:
9820                 ret = check_dev_item(fs_info, eb, slot);
9821                 err |= ret;
9822                 break;
9823         case BTRFS_CHUNK_ITEM_KEY:
9824                 ret = check_chunk_item(fs_info, eb, slot);
9825                 err |= ret;
9826                 break;
9827         case BTRFS_DEV_EXTENT_KEY:
9828                 ret = check_dev_extent_item(fs_info, eb, slot);
9829                 err |= ret;
9830                 break;
9831         case BTRFS_EXTENT_ITEM_KEY:
9832         case BTRFS_METADATA_ITEM_KEY:
9833                 ret = check_extent_item(fs_info, eb, slot);
9834                 err |= ret;
9835                 break;
9836         case BTRFS_EXTENT_CSUM_KEY:
9837                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9838                 break;
9839         case BTRFS_TREE_BLOCK_REF_KEY:
9840                 ret = check_tree_block_backref(fs_info, key.offset,
9841                                                key.objectid, -1);
9842                 err |= ret;
9843                 break;
9844         case BTRFS_EXTENT_DATA_REF_KEY:
9845                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9846                 ret = check_extent_data_backref(fs_info,
9847                                 btrfs_extent_data_ref_root(eb, dref),
9848                                 btrfs_extent_data_ref_objectid(eb, dref),
9849                                 btrfs_extent_data_ref_offset(eb, dref),
9850                                 key.objectid, 0,
9851                                 btrfs_extent_data_ref_count(eb, dref));
9852                 err |= ret;
9853                 break;
9854         case BTRFS_SHARED_BLOCK_REF_KEY:
9855                 ret = check_shared_block_backref(fs_info, key.offset,
9856                                                  key.objectid, -1);
9857                 err |= ret;
9858                 break;
9859         case BTRFS_SHARED_DATA_REF_KEY:
9860                 ret = check_shared_data_backref(fs_info, key.offset,
9861                                                 key.objectid);
9862                 err |= ret;
9863                 break;
9864         default:
9865                 break;
9866         }
9867
9868         if (++slot < btrfs_header_nritems(eb))
9869                 goto next;
9870
9871         return err;
9872 }
9873
9874 /*
9875  * Helper function for later fs/subvol tree check.  To determine if a tree
9876  * block should be checked.
9877  * This function will ensure only the direct referencer with lowest rootid to
9878  * check a fs/subvolume tree block.
9879  *
9880  * Backref check at extent tree would detect errors like missing subvolume
9881  * tree, so we can do aggressive check to reduce duplicated checks.
9882  */
9883 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9884 {
9885         struct btrfs_root *extent_root = root->fs_info->extent_root;
9886         struct btrfs_key key;
9887         struct btrfs_path path;
9888         struct extent_buffer *leaf;
9889         int slot;
9890         struct btrfs_extent_item *ei;
9891         unsigned long ptr;
9892         unsigned long end;
9893         int type;
9894         u32 item_size;
9895         u64 offset;
9896         struct btrfs_extent_inline_ref *iref;
9897         int ret;
9898
9899         btrfs_init_path(&path);
9900         key.objectid = btrfs_header_bytenr(eb);
9901         key.type = BTRFS_METADATA_ITEM_KEY;
9902         key.offset = (u64)-1;
9903
9904         /*
9905          * Any failure in backref resolving means we can't determine
9906          * whom the tree block belongs to.
9907          * So in that case, we need to check that tree block
9908          */
9909         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9910         if (ret < 0)
9911                 goto need_check;
9912
9913         ret = btrfs_previous_extent_item(extent_root, &path,
9914                                          btrfs_header_bytenr(eb));
9915         if (ret)
9916                 goto need_check;
9917
9918         leaf = path.nodes[0];
9919         slot = path.slots[0];
9920         btrfs_item_key_to_cpu(leaf, &key, slot);
9921         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9922
9923         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9924                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9925         } else {
9926                 struct btrfs_tree_block_info *info;
9927
9928                 info = (struct btrfs_tree_block_info *)(ei + 1);
9929                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9930         }
9931
9932         item_size = btrfs_item_size_nr(leaf, slot);
9933         ptr = (unsigned long)iref;
9934         end = (unsigned long)ei + item_size;
9935         while (ptr < end) {
9936                 iref = (struct btrfs_extent_inline_ref *)ptr;
9937                 type = btrfs_extent_inline_ref_type(leaf, iref);
9938                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9939
9940                 /*
9941                  * We only check the tree block if current root is
9942                  * the lowest referencer of it.
9943                  */
9944                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9945                     offset < root->objectid) {
9946                         btrfs_release_path(&path);
9947                         return 0;
9948                 }
9949
9950                 ptr += btrfs_extent_inline_ref_size(type);
9951         }
9952         /*
9953          * Normally we should also check keyed tree block ref, but that may be
9954          * very time consuming.  Inlined ref should already make us skip a lot
9955          * of refs now.  So skip search keyed tree block ref.
9956          */
9957
9958 need_check:
9959         btrfs_release_path(&path);
9960         return 1;
9961 }
9962
9963 /*
9964  * Traversal function for tree block. We will do:
9965  * 1) Skip shared fs/subvolume tree blocks
9966  * 2) Update related bytes accounting
9967  * 3) Pre-order traversal
9968  */
9969 static int traverse_tree_block(struct btrfs_root *root,
9970                                 struct extent_buffer *node)
9971 {
9972         struct extent_buffer *eb;
9973         struct btrfs_key key;
9974         struct btrfs_key drop_key;
9975         int level;
9976         u64 nr;
9977         int i;
9978         int err = 0;
9979         int ret;
9980
9981         /*
9982          * Skip shared fs/subvolume tree block, in that case they will
9983          * be checked by referencer with lowest rootid
9984          */
9985         if (is_fstree(root->objectid) && !should_check(root, node))
9986                 return 0;
9987
9988         /* Update bytes accounting */
9989         total_btree_bytes += node->len;
9990         if (fs_root_objectid(btrfs_header_owner(node)))
9991                 total_fs_tree_bytes += node->len;
9992         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9993                 total_extent_tree_bytes += node->len;
9994         if (!found_old_backref &&
9995             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9996             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9997             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9998                 found_old_backref = 1;
9999
10000         /* pre-order tranversal, check itself first */
10001         level = btrfs_header_level(node);
10002         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10003                                    btrfs_header_level(node),
10004                                    btrfs_header_owner(node));
10005         err |= ret;
10006         if (err)
10007                 error(
10008         "check %s failed root %llu bytenr %llu level %d, force continue check",
10009                         level ? "node":"leaf", root->objectid,
10010                         btrfs_header_bytenr(node), btrfs_header_level(node));
10011
10012         if (!level) {
10013                 btree_space_waste += btrfs_leaf_free_space(root, node);
10014                 ret = check_leaf_items(root, node);
10015                 err |= ret;
10016                 return err;
10017         }
10018
10019         nr = btrfs_header_nritems(node);
10020         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10021         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10022                 sizeof(struct btrfs_key_ptr);
10023
10024         /* Then check all its children */
10025         for (i = 0; i < nr; i++) {
10026                 u64 blocknr = btrfs_node_blockptr(node, i);
10027
10028                 btrfs_node_key_to_cpu(node, &key, i);
10029                 if (level == root->root_item.drop_level &&
10030                     is_dropped_key(&key, &drop_key))
10031                         continue;
10032
10033                 /*
10034                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10035                  * to call the function itself.
10036                  */
10037                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10038                 if (extent_buffer_uptodate(eb)) {
10039                         ret = traverse_tree_block(root, eb);
10040                         err |= ret;
10041                 }
10042                 free_extent_buffer(eb);
10043         }
10044
10045         return err;
10046 }
10047
10048 /*
10049  * Low memory usage version check_chunks_and_extents.
10050  */
10051 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10052 {
10053         struct btrfs_path path;
10054         struct btrfs_key key;
10055         struct btrfs_root *root1;
10056         struct btrfs_root *cur_root;
10057         int err = 0;
10058         int ret;
10059
10060         root1 = root->fs_info->chunk_root;
10061         ret = traverse_tree_block(root1, root1->node);
10062         err |= ret;
10063
10064         root1 = root->fs_info->tree_root;
10065         ret = traverse_tree_block(root1, root1->node);
10066         err |= ret;
10067
10068         btrfs_init_path(&path);
10069         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10070         key.offset = 0;
10071         key.type = BTRFS_ROOT_ITEM_KEY;
10072
10073         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10074         if (ret) {
10075                 error("cannot find extent treet in tree_root");
10076                 goto out;
10077         }
10078
10079         while (1) {
10080                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10081                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10082                         goto next;
10083                 key.offset = (u64)-1;
10084
10085                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10086                 if (IS_ERR(cur_root) || !cur_root) {
10087                         error("failed to read tree: %lld", key.objectid);
10088                         goto next;
10089                 }
10090
10091                 ret = traverse_tree_block(cur_root, cur_root->node);
10092                 err |= ret;
10093
10094 next:
10095                 ret = btrfs_next_item(root1, &path);
10096                 if (ret)
10097                         goto out;
10098         }
10099
10100 out:
10101         btrfs_release_path(&path);
10102         return err;
10103 }
10104
10105 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10106                            struct btrfs_root *root, int overwrite)
10107 {
10108         struct extent_buffer *c;
10109         struct extent_buffer *old = root->node;
10110         int level;
10111         int ret;
10112         struct btrfs_disk_key disk_key = {0,0,0};
10113
10114         level = 0;
10115
10116         if (overwrite) {
10117                 c = old;
10118                 extent_buffer_get(c);
10119                 goto init;
10120         }
10121         c = btrfs_alloc_free_block(trans, root,
10122                                    root->nodesize,
10123                                    root->root_key.objectid,
10124                                    &disk_key, level, 0, 0);
10125         if (IS_ERR(c)) {
10126                 c = old;
10127                 extent_buffer_get(c);
10128                 overwrite = 1;
10129         }
10130 init:
10131         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10132         btrfs_set_header_level(c, level);
10133         btrfs_set_header_bytenr(c, c->start);
10134         btrfs_set_header_generation(c, trans->transid);
10135         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10136         btrfs_set_header_owner(c, root->root_key.objectid);
10137
10138         write_extent_buffer(c, root->fs_info->fsid,
10139                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10140
10141         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10142                             btrfs_header_chunk_tree_uuid(c),
10143                             BTRFS_UUID_SIZE);
10144
10145         btrfs_mark_buffer_dirty(c);
10146         /*
10147          * this case can happen in the following case:
10148          *
10149          * 1.overwrite previous root.
10150          *
10151          * 2.reinit reloc data root, this is because we skip pin
10152          * down reloc data tree before which means we can allocate
10153          * same block bytenr here.
10154          */
10155         if (old->start == c->start) {
10156                 btrfs_set_root_generation(&root->root_item,
10157                                           trans->transid);
10158                 root->root_item.level = btrfs_header_level(root->node);
10159                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10160                                         &root->root_key, &root->root_item);
10161                 if (ret) {
10162                         free_extent_buffer(c);
10163                         return ret;
10164                 }
10165         }
10166         free_extent_buffer(old);
10167         root->node = c;
10168         add_root_to_dirty_list(root);
10169         return 0;
10170 }
10171
10172 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10173                                 struct extent_buffer *eb, int tree_root)
10174 {
10175         struct extent_buffer *tmp;
10176         struct btrfs_root_item *ri;
10177         struct btrfs_key key;
10178         u64 bytenr;
10179         u32 nodesize;
10180         int level = btrfs_header_level(eb);
10181         int nritems;
10182         int ret;
10183         int i;
10184
10185         /*
10186          * If we have pinned this block before, don't pin it again.
10187          * This can not only avoid forever loop with broken filesystem
10188          * but also give us some speedups.
10189          */
10190         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10191                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10192                 return 0;
10193
10194         btrfs_pin_extent(fs_info, eb->start, eb->len);
10195
10196         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10197         nritems = btrfs_header_nritems(eb);
10198         for (i = 0; i < nritems; i++) {
10199                 if (level == 0) {
10200                         btrfs_item_key_to_cpu(eb, &key, i);
10201                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10202                                 continue;
10203                         /* Skip the extent root and reloc roots */
10204                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10205                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10206                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10207                                 continue;
10208                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10209                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10210
10211                         /*
10212                          * If at any point we start needing the real root we
10213                          * will have to build a stump root for the root we are
10214                          * in, but for now this doesn't actually use the root so
10215                          * just pass in extent_root.
10216                          */
10217                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10218                                               nodesize, 0);
10219                         if (!extent_buffer_uptodate(tmp)) {
10220                                 fprintf(stderr, "Error reading root block\n");
10221                                 return -EIO;
10222                         }
10223                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10224                         free_extent_buffer(tmp);
10225                         if (ret)
10226                                 return ret;
10227                 } else {
10228                         bytenr = btrfs_node_blockptr(eb, i);
10229
10230                         /* If we aren't the tree root don't read the block */
10231                         if (level == 1 && !tree_root) {
10232                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10233                                 continue;
10234                         }
10235
10236                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10237                                               nodesize, 0);
10238                         if (!extent_buffer_uptodate(tmp)) {
10239                                 fprintf(stderr, "Error reading tree block\n");
10240                                 return -EIO;
10241                         }
10242                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10243                         free_extent_buffer(tmp);
10244                         if (ret)
10245                                 return ret;
10246                 }
10247         }
10248
10249         return 0;
10250 }
10251
10252 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10253 {
10254         int ret;
10255
10256         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10257         if (ret)
10258                 return ret;
10259
10260         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10261 }
10262
10263 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10264 {
10265         struct btrfs_block_group_cache *cache;
10266         struct btrfs_path *path;
10267         struct extent_buffer *leaf;
10268         struct btrfs_chunk *chunk;
10269         struct btrfs_key key;
10270         int ret;
10271         u64 start;
10272
10273         path = btrfs_alloc_path();
10274         if (!path)
10275                 return -ENOMEM;
10276
10277         key.objectid = 0;
10278         key.type = BTRFS_CHUNK_ITEM_KEY;
10279         key.offset = 0;
10280
10281         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10282         if (ret < 0) {
10283                 btrfs_free_path(path);
10284                 return ret;
10285         }
10286
10287         /*
10288          * We do this in case the block groups were screwed up and had alloc
10289          * bits that aren't actually set on the chunks.  This happens with
10290          * restored images every time and could happen in real life I guess.
10291          */
10292         fs_info->avail_data_alloc_bits = 0;
10293         fs_info->avail_metadata_alloc_bits = 0;
10294         fs_info->avail_system_alloc_bits = 0;
10295
10296         /* First we need to create the in-memory block groups */
10297         while (1) {
10298                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10299                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10300                         if (ret < 0) {
10301                                 btrfs_free_path(path);
10302                                 return ret;
10303                         }
10304                         if (ret) {
10305                                 ret = 0;
10306                                 break;
10307                         }
10308                 }
10309                 leaf = path->nodes[0];
10310                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10311                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10312                         path->slots[0]++;
10313                         continue;
10314                 }
10315
10316                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10317                                        struct btrfs_chunk);
10318                 btrfs_add_block_group(fs_info, 0,
10319                                       btrfs_chunk_type(leaf, chunk),
10320                                       key.objectid, key.offset,
10321                                       btrfs_chunk_length(leaf, chunk));
10322                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10323                                  key.offset + btrfs_chunk_length(leaf, chunk),
10324                                  GFP_NOFS);
10325                 path->slots[0]++;
10326         }
10327         start = 0;
10328         while (1) {
10329                 cache = btrfs_lookup_first_block_group(fs_info, start);
10330                 if (!cache)
10331                         break;
10332                 cache->cached = 1;
10333                 start = cache->key.objectid + cache->key.offset;
10334         }
10335
10336         btrfs_free_path(path);
10337         return 0;
10338 }
10339
10340 static int reset_balance(struct btrfs_trans_handle *trans,
10341                          struct btrfs_fs_info *fs_info)
10342 {
10343         struct btrfs_root *root = fs_info->tree_root;
10344         struct btrfs_path *path;
10345         struct extent_buffer *leaf;
10346         struct btrfs_key key;
10347         int del_slot, del_nr = 0;
10348         int ret;
10349         int found = 0;
10350
10351         path = btrfs_alloc_path();
10352         if (!path)
10353                 return -ENOMEM;
10354
10355         key.objectid = BTRFS_BALANCE_OBJECTID;
10356         key.type = BTRFS_BALANCE_ITEM_KEY;
10357         key.offset = 0;
10358
10359         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10360         if (ret) {
10361                 if (ret > 0)
10362                         ret = 0;
10363                 if (!ret)
10364                         goto reinit_data_reloc;
10365                 else
10366                         goto out;
10367         }
10368
10369         ret = btrfs_del_item(trans, root, path);
10370         if (ret)
10371                 goto out;
10372         btrfs_release_path(path);
10373
10374         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10375         key.type = BTRFS_ROOT_ITEM_KEY;
10376         key.offset = 0;
10377
10378         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10379         if (ret < 0)
10380                 goto out;
10381         while (1) {
10382                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10383                         if (!found)
10384                                 break;
10385
10386                         if (del_nr) {
10387                                 ret = btrfs_del_items(trans, root, path,
10388                                                       del_slot, del_nr);
10389                                 del_nr = 0;
10390                                 if (ret)
10391                                         goto out;
10392                         }
10393                         key.offset++;
10394                         btrfs_release_path(path);
10395
10396                         found = 0;
10397                         ret = btrfs_search_slot(trans, root, &key, path,
10398                                                 -1, 1);
10399                         if (ret < 0)
10400                                 goto out;
10401                         continue;
10402                 }
10403                 found = 1;
10404                 leaf = path->nodes[0];
10405                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10406                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10407                         break;
10408                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10409                         path->slots[0]++;
10410                         continue;
10411                 }
10412                 if (!del_nr) {
10413                         del_slot = path->slots[0];
10414                         del_nr = 1;
10415                 } else {
10416                         del_nr++;
10417                 }
10418                 path->slots[0]++;
10419         }
10420
10421         if (del_nr) {
10422                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10423                 if (ret)
10424                         goto out;
10425         }
10426         btrfs_release_path(path);
10427
10428 reinit_data_reloc:
10429         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10430         key.type = BTRFS_ROOT_ITEM_KEY;
10431         key.offset = (u64)-1;
10432         root = btrfs_read_fs_root(fs_info, &key);
10433         if (IS_ERR(root)) {
10434                 fprintf(stderr, "Error reading data reloc tree\n");
10435                 ret = PTR_ERR(root);
10436                 goto out;
10437         }
10438         record_root_in_trans(trans, root);
10439         ret = btrfs_fsck_reinit_root(trans, root, 0);
10440         if (ret)
10441                 goto out;
10442         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10443 out:
10444         btrfs_free_path(path);
10445         return ret;
10446 }
10447
10448 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10449                               struct btrfs_fs_info *fs_info)
10450 {
10451         u64 start = 0;
10452         int ret;
10453
10454         /*
10455          * The only reason we don't do this is because right now we're just
10456          * walking the trees we find and pinning down their bytes, we don't look
10457          * at any of the leaves.  In order to do mixed groups we'd have to check
10458          * the leaves of any fs roots and pin down the bytes for any file
10459          * extents we find.  Not hard but why do it if we don't have to?
10460          */
10461         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10462                 fprintf(stderr, "We don't support re-initing the extent tree "
10463                         "for mixed block groups yet, please notify a btrfs "
10464                         "developer you want to do this so they can add this "
10465                         "functionality.\n");
10466                 return -EINVAL;
10467         }
10468
10469         /*
10470          * first we need to walk all of the trees except the extent tree and pin
10471          * down the bytes that are in use so we don't overwrite any existing
10472          * metadata.
10473          */
10474         ret = pin_metadata_blocks(fs_info);
10475         if (ret) {
10476                 fprintf(stderr, "error pinning down used bytes\n");
10477                 return ret;
10478         }
10479
10480         /*
10481          * Need to drop all the block groups since we're going to recreate all
10482          * of them again.
10483          */
10484         btrfs_free_block_groups(fs_info);
10485         ret = reset_block_groups(fs_info);
10486         if (ret) {
10487                 fprintf(stderr, "error resetting the block groups\n");
10488                 return ret;
10489         }
10490
10491         /* Ok we can allocate now, reinit the extent root */
10492         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10493         if (ret) {
10494                 fprintf(stderr, "extent root initialization failed\n");
10495                 /*
10496                  * When the transaction code is updated we should end the
10497                  * transaction, but for now progs only knows about commit so
10498                  * just return an error.
10499                  */
10500                 return ret;
10501         }
10502
10503         /*
10504          * Now we have all the in-memory block groups setup so we can make
10505          * allocations properly, and the metadata we care about is safe since we
10506          * pinned all of it above.
10507          */
10508         while (1) {
10509                 struct btrfs_block_group_cache *cache;
10510
10511                 cache = btrfs_lookup_first_block_group(fs_info, start);
10512                 if (!cache)
10513                         break;
10514                 start = cache->key.objectid + cache->key.offset;
10515                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10516                                         &cache->key, &cache->item,
10517                                         sizeof(cache->item));
10518                 if (ret) {
10519                         fprintf(stderr, "Error adding block group\n");
10520                         return ret;
10521                 }
10522                 btrfs_extent_post_op(trans, fs_info->extent_root);
10523         }
10524
10525         ret = reset_balance(trans, fs_info);
10526         if (ret)
10527                 fprintf(stderr, "error resetting the pending balance\n");
10528
10529         return ret;
10530 }
10531
10532 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10533 {
10534         struct btrfs_path *path;
10535         struct btrfs_trans_handle *trans;
10536         struct btrfs_key key;
10537         int ret;
10538
10539         printf("Recowing metadata block %llu\n", eb->start);
10540         key.objectid = btrfs_header_owner(eb);
10541         key.type = BTRFS_ROOT_ITEM_KEY;
10542         key.offset = (u64)-1;
10543
10544         root = btrfs_read_fs_root(root->fs_info, &key);
10545         if (IS_ERR(root)) {
10546                 fprintf(stderr, "Couldn't find owner root %llu\n",
10547                         key.objectid);
10548                 return PTR_ERR(root);
10549         }
10550
10551         path = btrfs_alloc_path();
10552         if (!path)
10553                 return -ENOMEM;
10554
10555         trans = btrfs_start_transaction(root, 1);
10556         if (IS_ERR(trans)) {
10557                 btrfs_free_path(path);
10558                 return PTR_ERR(trans);
10559         }
10560
10561         path->lowest_level = btrfs_header_level(eb);
10562         if (path->lowest_level)
10563                 btrfs_node_key_to_cpu(eb, &key, 0);
10564         else
10565                 btrfs_item_key_to_cpu(eb, &key, 0);
10566
10567         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10568         btrfs_commit_transaction(trans, root);
10569         btrfs_free_path(path);
10570         return ret;
10571 }
10572
10573 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10574 {
10575         struct btrfs_path *path;
10576         struct btrfs_trans_handle *trans;
10577         struct btrfs_key key;
10578         int ret;
10579
10580         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10581                bad->key.type, bad->key.offset);
10582         key.objectid = bad->root_id;
10583         key.type = BTRFS_ROOT_ITEM_KEY;
10584         key.offset = (u64)-1;
10585
10586         root = btrfs_read_fs_root(root->fs_info, &key);
10587         if (IS_ERR(root)) {
10588                 fprintf(stderr, "Couldn't find owner root %llu\n",
10589                         key.objectid);
10590                 return PTR_ERR(root);
10591         }
10592
10593         path = btrfs_alloc_path();
10594         if (!path)
10595                 return -ENOMEM;
10596
10597         trans = btrfs_start_transaction(root, 1);
10598         if (IS_ERR(trans)) {
10599                 btrfs_free_path(path);
10600                 return PTR_ERR(trans);
10601         }
10602
10603         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10604         if (ret) {
10605                 if (ret > 0)
10606                         ret = 0;
10607                 goto out;
10608         }
10609         ret = btrfs_del_item(trans, root, path);
10610 out:
10611         btrfs_commit_transaction(trans, root);
10612         btrfs_free_path(path);
10613         return ret;
10614 }
10615
10616 static int zero_log_tree(struct btrfs_root *root)
10617 {
10618         struct btrfs_trans_handle *trans;
10619         int ret;
10620
10621         trans = btrfs_start_transaction(root, 1);
10622         if (IS_ERR(trans)) {
10623                 ret = PTR_ERR(trans);
10624                 return ret;
10625         }
10626         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10627         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10628         ret = btrfs_commit_transaction(trans, root);
10629         return ret;
10630 }
10631
10632 static int populate_csum(struct btrfs_trans_handle *trans,
10633                          struct btrfs_root *csum_root, char *buf, u64 start,
10634                          u64 len)
10635 {
10636         u64 offset = 0;
10637         u64 sectorsize;
10638         int ret = 0;
10639
10640         while (offset < len) {
10641                 sectorsize = csum_root->sectorsize;
10642                 ret = read_extent_data(csum_root, buf, start + offset,
10643                                        &sectorsize, 0);
10644                 if (ret)
10645                         break;
10646                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10647                                             start + offset, buf, sectorsize);
10648                 if (ret)
10649                         break;
10650                 offset += sectorsize;
10651         }
10652         return ret;
10653 }
10654
10655 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10656                                       struct btrfs_root *csum_root,
10657                                       struct btrfs_root *cur_root)
10658 {
10659         struct btrfs_path *path;
10660         struct btrfs_key key;
10661         struct extent_buffer *node;
10662         struct btrfs_file_extent_item *fi;
10663         char *buf = NULL;
10664         u64 start = 0;
10665         u64 len = 0;
10666         int slot = 0;
10667         int ret = 0;
10668
10669         path = btrfs_alloc_path();
10670         if (!path)
10671                 return -ENOMEM;
10672         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10673         if (!buf) {
10674                 ret = -ENOMEM;
10675                 goto out;
10676         }
10677
10678         key.objectid = 0;
10679         key.offset = 0;
10680         key.type = 0;
10681
10682         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10683         if (ret < 0)
10684                 goto out;
10685         /* Iterate all regular file extents and fill its csum */
10686         while (1) {
10687                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10688
10689                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10690                         goto next;
10691                 node = path->nodes[0];
10692                 slot = path->slots[0];
10693                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10694                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10695                         goto next;
10696                 start = btrfs_file_extent_disk_bytenr(node, fi);
10697                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10698
10699                 ret = populate_csum(trans, csum_root, buf, start, len);
10700                 if (ret == -EEXIST)
10701                         ret = 0;
10702                 if (ret < 0)
10703                         goto out;
10704 next:
10705                 /*
10706                  * TODO: if next leaf is corrupted, jump to nearest next valid
10707                  * leaf.
10708                  */
10709                 ret = btrfs_next_item(cur_root, path);
10710                 if (ret < 0)
10711                         goto out;
10712                 if (ret > 0) {
10713                         ret = 0;
10714                         goto out;
10715                 }
10716         }
10717
10718 out:
10719         btrfs_free_path(path);
10720         free(buf);
10721         return ret;
10722 }
10723
10724 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10725                                   struct btrfs_root *csum_root)
10726 {
10727         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10728         struct btrfs_path *path;
10729         struct btrfs_root *tree_root = fs_info->tree_root;
10730         struct btrfs_root *cur_root;
10731         struct extent_buffer *node;
10732         struct btrfs_key key;
10733         int slot = 0;
10734         int ret = 0;
10735
10736         path = btrfs_alloc_path();
10737         if (!path)
10738                 return -ENOMEM;
10739
10740         key.objectid = BTRFS_FS_TREE_OBJECTID;
10741         key.offset = 0;
10742         key.type = BTRFS_ROOT_ITEM_KEY;
10743
10744         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10745         if (ret < 0)
10746                 goto out;
10747         if (ret > 0) {
10748                 ret = -ENOENT;
10749                 goto out;
10750         }
10751
10752         while (1) {
10753                 node = path->nodes[0];
10754                 slot = path->slots[0];
10755                 btrfs_item_key_to_cpu(node, &key, slot);
10756                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10757                         goto out;
10758                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10759                         goto next;
10760                 if (!is_fstree(key.objectid))
10761                         goto next;
10762                 key.offset = (u64)-1;
10763
10764                 cur_root = btrfs_read_fs_root(fs_info, &key);
10765                 if (IS_ERR(cur_root) || !cur_root) {
10766                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10767                                 key.objectid);
10768                         goto out;
10769                 }
10770                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10771                                 cur_root);
10772                 if (ret < 0)
10773                         goto out;
10774 next:
10775                 ret = btrfs_next_item(tree_root, path);
10776                 if (ret > 0) {
10777                         ret = 0;
10778                         goto out;
10779                 }
10780                 if (ret < 0)
10781                         goto out;
10782         }
10783
10784 out:
10785         btrfs_free_path(path);
10786         return ret;
10787 }
10788
10789 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10790                                       struct btrfs_root *csum_root)
10791 {
10792         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10793         struct btrfs_path *path;
10794         struct btrfs_extent_item *ei;
10795         struct extent_buffer *leaf;
10796         char *buf;
10797         struct btrfs_key key;
10798         int ret;
10799
10800         path = btrfs_alloc_path();
10801         if (!path)
10802                 return -ENOMEM;
10803
10804         key.objectid = 0;
10805         key.type = BTRFS_EXTENT_ITEM_KEY;
10806         key.offset = 0;
10807
10808         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10809         if (ret < 0) {
10810                 btrfs_free_path(path);
10811                 return ret;
10812         }
10813
10814         buf = malloc(csum_root->sectorsize);
10815         if (!buf) {
10816                 btrfs_free_path(path);
10817                 return -ENOMEM;
10818         }
10819
10820         while (1) {
10821                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10822                         ret = btrfs_next_leaf(extent_root, path);
10823                         if (ret < 0)
10824                                 break;
10825                         if (ret) {
10826                                 ret = 0;
10827                                 break;
10828                         }
10829                 }
10830                 leaf = path->nodes[0];
10831
10832                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10833                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10834                         path->slots[0]++;
10835                         continue;
10836                 }
10837
10838                 ei = btrfs_item_ptr(leaf, path->slots[0],
10839                                     struct btrfs_extent_item);
10840                 if (!(btrfs_extent_flags(leaf, ei) &
10841                       BTRFS_EXTENT_FLAG_DATA)) {
10842                         path->slots[0]++;
10843                         continue;
10844                 }
10845
10846                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10847                                     key.offset);
10848                 if (ret)
10849                         break;
10850                 path->slots[0]++;
10851         }
10852
10853         btrfs_free_path(path);
10854         free(buf);
10855         return ret;
10856 }
10857
10858 /*
10859  * Recalculate the csum and put it into the csum tree.
10860  *
10861  * Extent tree init will wipe out all the extent info, so in that case, we
10862  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10863  * will use fs/subvol trees to init the csum tree.
10864  */
10865 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10866                           struct btrfs_root *csum_root,
10867                           int search_fs_tree)
10868 {
10869         if (search_fs_tree)
10870                 return fill_csum_tree_from_fs(trans, csum_root);
10871         else
10872                 return fill_csum_tree_from_extent(trans, csum_root);
10873 }
10874
10875 static void free_roots_info_cache(void)
10876 {
10877         if (!roots_info_cache)
10878                 return;
10879
10880         while (!cache_tree_empty(roots_info_cache)) {
10881                 struct cache_extent *entry;
10882                 struct root_item_info *rii;
10883
10884                 entry = first_cache_extent(roots_info_cache);
10885                 if (!entry)
10886                         break;
10887                 remove_cache_extent(roots_info_cache, entry);
10888                 rii = container_of(entry, struct root_item_info, cache_extent);
10889                 free(rii);
10890         }
10891
10892         free(roots_info_cache);
10893         roots_info_cache = NULL;
10894 }
10895
10896 static int build_roots_info_cache(struct btrfs_fs_info *info)
10897 {
10898         int ret = 0;
10899         struct btrfs_key key;
10900         struct extent_buffer *leaf;
10901         struct btrfs_path *path;
10902
10903         if (!roots_info_cache) {
10904                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10905                 if (!roots_info_cache)
10906                         return -ENOMEM;
10907                 cache_tree_init(roots_info_cache);
10908         }
10909
10910         path = btrfs_alloc_path();
10911         if (!path)
10912                 return -ENOMEM;
10913
10914         key.objectid = 0;
10915         key.type = BTRFS_EXTENT_ITEM_KEY;
10916         key.offset = 0;
10917
10918         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10919         if (ret < 0)
10920                 goto out;
10921         leaf = path->nodes[0];
10922
10923         while (1) {
10924                 struct btrfs_key found_key;
10925                 struct btrfs_extent_item *ei;
10926                 struct btrfs_extent_inline_ref *iref;
10927                 int slot = path->slots[0];
10928                 int type;
10929                 u64 flags;
10930                 u64 root_id;
10931                 u8 level;
10932                 struct cache_extent *entry;
10933                 struct root_item_info *rii;
10934
10935                 if (slot >= btrfs_header_nritems(leaf)) {
10936                         ret = btrfs_next_leaf(info->extent_root, path);
10937                         if (ret < 0) {
10938                                 break;
10939                         } else if (ret) {
10940                                 ret = 0;
10941                                 break;
10942                         }
10943                         leaf = path->nodes[0];
10944                         slot = path->slots[0];
10945                 }
10946
10947                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10948
10949                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10950                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10951                         goto next;
10952
10953                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10954                 flags = btrfs_extent_flags(leaf, ei);
10955
10956                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10957                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10958                         goto next;
10959
10960                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10961                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10962                         level = found_key.offset;
10963                 } else {
10964                         struct btrfs_tree_block_info *binfo;
10965
10966                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10967                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10968                         level = btrfs_tree_block_level(leaf, binfo);
10969                 }
10970
10971                 /*
10972                  * For a root extent, it must be of the following type and the
10973                  * first (and only one) iref in the item.
10974                  */
10975                 type = btrfs_extent_inline_ref_type(leaf, iref);
10976                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10977                         goto next;
10978
10979                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10980                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10981                 if (!entry) {
10982                         rii = malloc(sizeof(struct root_item_info));
10983                         if (!rii) {
10984                                 ret = -ENOMEM;
10985                                 goto out;
10986                         }
10987                         rii->cache_extent.start = root_id;
10988                         rii->cache_extent.size = 1;
10989                         rii->level = (u8)-1;
10990                         entry = &rii->cache_extent;
10991                         ret = insert_cache_extent(roots_info_cache, entry);
10992                         ASSERT(ret == 0);
10993                 } else {
10994                         rii = container_of(entry, struct root_item_info,
10995                                            cache_extent);
10996                 }
10997
10998                 ASSERT(rii->cache_extent.start == root_id);
10999                 ASSERT(rii->cache_extent.size == 1);
11000
11001                 if (level > rii->level || rii->level == (u8)-1) {
11002                         rii->level = level;
11003                         rii->bytenr = found_key.objectid;
11004                         rii->gen = btrfs_extent_generation(leaf, ei);
11005                         rii->node_count = 1;
11006                 } else if (level == rii->level) {
11007                         rii->node_count++;
11008                 }
11009 next:
11010                 path->slots[0]++;
11011         }
11012
11013 out:
11014         btrfs_free_path(path);
11015
11016         return ret;
11017 }
11018
11019 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11020                                   struct btrfs_path *path,
11021                                   const struct btrfs_key *root_key,
11022                                   const int read_only_mode)
11023 {
11024         const u64 root_id = root_key->objectid;
11025         struct cache_extent *entry;
11026         struct root_item_info *rii;
11027         struct btrfs_root_item ri;
11028         unsigned long offset;
11029
11030         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11031         if (!entry) {
11032                 fprintf(stderr,
11033                         "Error: could not find extent items for root %llu\n",
11034                         root_key->objectid);
11035                 return -ENOENT;
11036         }
11037
11038         rii = container_of(entry, struct root_item_info, cache_extent);
11039         ASSERT(rii->cache_extent.start == root_id);
11040         ASSERT(rii->cache_extent.size == 1);
11041
11042         if (rii->node_count != 1) {
11043                 fprintf(stderr,
11044                         "Error: could not find btree root extent for root %llu\n",
11045                         root_id);
11046                 return -ENOENT;
11047         }
11048
11049         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11050         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11051
11052         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11053             btrfs_root_level(&ri) != rii->level ||
11054             btrfs_root_generation(&ri) != rii->gen) {
11055
11056                 /*
11057                  * If we're in repair mode but our caller told us to not update
11058                  * the root item, i.e. just check if it needs to be updated, don't
11059                  * print this message, since the caller will call us again shortly
11060                  * for the same root item without read only mode (the caller will
11061                  * open a transaction first).
11062                  */
11063                 if (!(read_only_mode && repair))
11064                         fprintf(stderr,
11065                                 "%sroot item for root %llu,"
11066                                 " current bytenr %llu, current gen %llu, current level %u,"
11067                                 " new bytenr %llu, new gen %llu, new level %u\n",
11068                                 (read_only_mode ? "" : "fixing "),
11069                                 root_id,
11070                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11071                                 btrfs_root_level(&ri),
11072                                 rii->bytenr, rii->gen, rii->level);
11073
11074                 if (btrfs_root_generation(&ri) > rii->gen) {
11075                         fprintf(stderr,
11076                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11077                                 root_id, btrfs_root_generation(&ri), rii->gen);
11078                         return -EINVAL;
11079                 }
11080
11081                 if (!read_only_mode) {
11082                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11083                         btrfs_set_root_level(&ri, rii->level);
11084                         btrfs_set_root_generation(&ri, rii->gen);
11085                         write_extent_buffer(path->nodes[0], &ri,
11086                                             offset, sizeof(ri));
11087                 }
11088
11089                 return 1;
11090         }
11091
11092         return 0;
11093 }
11094
11095 /*
11096  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11097  * caused read-only snapshots to be corrupted if they were created at a moment
11098  * when the source subvolume/snapshot had orphan items. The issue was that the
11099  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11100  * node instead of the post orphan cleanup root node.
11101  * So this function, and its callees, just detects and fixes those cases. Even
11102  * though the regression was for read-only snapshots, this function applies to
11103  * any snapshot/subvolume root.
11104  * This must be run before any other repair code - not doing it so, makes other
11105  * repair code delete or modify backrefs in the extent tree for example, which
11106  * will result in an inconsistent fs after repairing the root items.
11107  */
11108 static int repair_root_items(struct btrfs_fs_info *info)
11109 {
11110         struct btrfs_path *path = NULL;
11111         struct btrfs_key key;
11112         struct extent_buffer *leaf;
11113         struct btrfs_trans_handle *trans = NULL;
11114         int ret = 0;
11115         int bad_roots = 0;
11116         int need_trans = 0;
11117
11118         ret = build_roots_info_cache(info);
11119         if (ret)
11120                 goto out;
11121
11122         path = btrfs_alloc_path();
11123         if (!path) {
11124                 ret = -ENOMEM;
11125                 goto out;
11126         }
11127
11128         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11129         key.type = BTRFS_ROOT_ITEM_KEY;
11130         key.offset = 0;
11131
11132 again:
11133         /*
11134          * Avoid opening and committing transactions if a leaf doesn't have
11135          * any root items that need to be fixed, so that we avoid rotating
11136          * backup roots unnecessarily.
11137          */
11138         if (need_trans) {
11139                 trans = btrfs_start_transaction(info->tree_root, 1);
11140                 if (IS_ERR(trans)) {
11141                         ret = PTR_ERR(trans);
11142                         goto out;
11143                 }
11144         }
11145
11146         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11147                                 0, trans ? 1 : 0);
11148         if (ret < 0)
11149                 goto out;
11150         leaf = path->nodes[0];
11151
11152         while (1) {
11153                 struct btrfs_key found_key;
11154
11155                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11156                         int no_more_keys = find_next_key(path, &key);
11157
11158                         btrfs_release_path(path);
11159                         if (trans) {
11160                                 ret = btrfs_commit_transaction(trans,
11161                                                                info->tree_root);
11162                                 trans = NULL;
11163                                 if (ret < 0)
11164                                         goto out;
11165                         }
11166                         need_trans = 0;
11167                         if (no_more_keys)
11168                                 break;
11169                         goto again;
11170                 }
11171
11172                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11173
11174                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11175                         goto next;
11176                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11177                         goto next;
11178
11179                 ret = maybe_repair_root_item(info, path, &found_key,
11180                                              trans ? 0 : 1);
11181                 if (ret < 0)
11182                         goto out;
11183                 if (ret) {
11184                         if (!trans && repair) {
11185                                 need_trans = 1;
11186                                 key = found_key;
11187                                 btrfs_release_path(path);
11188                                 goto again;
11189                         }
11190                         bad_roots++;
11191                 }
11192 next:
11193                 path->slots[0]++;
11194         }
11195         ret = 0;
11196 out:
11197         free_roots_info_cache();
11198         btrfs_free_path(path);
11199         if (trans)
11200                 btrfs_commit_transaction(trans, info->tree_root);
11201         if (ret < 0)
11202                 return ret;
11203
11204         return bad_roots;
11205 }
11206
11207 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11208 {
11209         struct btrfs_trans_handle *trans;
11210         struct btrfs_block_group_cache *bg_cache;
11211         u64 current = 0;
11212         int ret = 0;
11213
11214         /* Clear all free space cache inodes and its extent data */
11215         while (1) {
11216                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11217                 if (!bg_cache)
11218                         break;
11219                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11220                 if (ret < 0)
11221                         return ret;
11222                 current = bg_cache->key.objectid + bg_cache->key.offset;
11223         }
11224
11225         /* Don't forget to set cache_generation to -1 */
11226         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11227         if (IS_ERR(trans)) {
11228                 error("failed to update super block cache generation");
11229                 return PTR_ERR(trans);
11230         }
11231         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11232         btrfs_commit_transaction(trans, fs_info->tree_root);
11233
11234         return ret;
11235 }
11236
11237 const char * const cmd_check_usage[] = {
11238         "btrfs check [options] <device>",
11239         "Check structural integrity of a filesystem (unmounted).",
11240         "Check structural integrity of an unmounted filesystem. Verify internal",
11241         "trees' consistency and item connectivity. In the repair mode try to",
11242         "fix the problems found. ",
11243         "WARNING: the repair mode is considered dangerous",
11244         "",
11245         "-s|--super <superblock>     use this superblock copy",
11246         "-b|--backup                 use the first valid backup root copy",
11247         "--repair                    try to repair the filesystem",
11248         "--readonly                  run in read-only mode (default)",
11249         "--init-csum-tree            create a new CRC tree",
11250         "--init-extent-tree          create a new extent tree",
11251         "--mode <MODE>               allows choice of memory/IO trade-offs",
11252         "                            where MODE is one of:",
11253         "                            original - read inodes and extents to memory (requires",
11254         "                                       more memory, does less IO)",
11255         "                            lowmem   - try to use less memory but read blocks again",
11256         "                                       when needed",
11257         "--check-data-csum           verify checksums of data blocks",
11258         "-Q|--qgroup-report          print a report on qgroup consistency",
11259         "-E|--subvol-extents <subvolid>",
11260         "                            print subvolume extents and sharing state",
11261         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11262         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11263         "-p|--progress               indicate progress",
11264         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11265         "                            NOTE: v1 support implemented",
11266         NULL
11267 };
11268
11269 int cmd_check(int argc, char **argv)
11270 {
11271         struct cache_tree root_cache;
11272         struct btrfs_root *root;
11273         struct btrfs_fs_info *info;
11274         u64 bytenr = 0;
11275         u64 subvolid = 0;
11276         u64 tree_root_bytenr = 0;
11277         u64 chunk_root_bytenr = 0;
11278         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11279         int ret;
11280         u64 num;
11281         int init_csum_tree = 0;
11282         int readonly = 0;
11283         int clear_space_cache = 0;
11284         int qgroup_report = 0;
11285         int qgroups_repaired = 0;
11286         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11287
11288         while(1) {
11289                 int c;
11290                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11291                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11292                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11293                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11294                 static const struct option long_options[] = {
11295                         { "super", required_argument, NULL, 's' },
11296                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11297                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11298                         { "init-csum-tree", no_argument, NULL,
11299                                 GETOPT_VAL_INIT_CSUM },
11300                         { "init-extent-tree", no_argument, NULL,
11301                                 GETOPT_VAL_INIT_EXTENT },
11302                         { "check-data-csum", no_argument, NULL,
11303                                 GETOPT_VAL_CHECK_CSUM },
11304                         { "backup", no_argument, NULL, 'b' },
11305                         { "subvol-extents", required_argument, NULL, 'E' },
11306                         { "qgroup-report", no_argument, NULL, 'Q' },
11307                         { "tree-root", required_argument, NULL, 'r' },
11308                         { "chunk-root", required_argument, NULL,
11309                                 GETOPT_VAL_CHUNK_TREE },
11310                         { "progress", no_argument, NULL, 'p' },
11311                         { "mode", required_argument, NULL,
11312                                 GETOPT_VAL_MODE },
11313                         { "clear-space-cache", required_argument, NULL,
11314                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11315                         { NULL, 0, NULL, 0}
11316                 };
11317
11318                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11319                 if (c < 0)
11320                         break;
11321                 switch(c) {
11322                         case 'a': /* ignored */ break;
11323                         case 'b':
11324                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11325                                 break;
11326                         case 's':
11327                                 num = arg_strtou64(optarg);
11328                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11329                                         error(
11330                                         "super mirror should be less than %d",
11331                                                 BTRFS_SUPER_MIRROR_MAX);
11332                                         exit(1);
11333                                 }
11334                                 bytenr = btrfs_sb_offset(((int)num));
11335                                 printf("using SB copy %llu, bytenr %llu\n", num,
11336                                        (unsigned long long)bytenr);
11337                                 break;
11338                         case 'Q':
11339                                 qgroup_report = 1;
11340                                 break;
11341                         case 'E':
11342                                 subvolid = arg_strtou64(optarg);
11343                                 break;
11344                         case 'r':
11345                                 tree_root_bytenr = arg_strtou64(optarg);
11346                                 break;
11347                         case GETOPT_VAL_CHUNK_TREE:
11348                                 chunk_root_bytenr = arg_strtou64(optarg);
11349                                 break;
11350                         case 'p':
11351                                 ctx.progress_enabled = true;
11352                                 break;
11353                         case '?':
11354                         case 'h':
11355                                 usage(cmd_check_usage);
11356                         case GETOPT_VAL_REPAIR:
11357                                 printf("enabling repair mode\n");
11358                                 repair = 1;
11359                                 ctree_flags |= OPEN_CTREE_WRITES;
11360                                 break;
11361                         case GETOPT_VAL_READONLY:
11362                                 readonly = 1;
11363                                 break;
11364                         case GETOPT_VAL_INIT_CSUM:
11365                                 printf("Creating a new CRC tree\n");
11366                                 init_csum_tree = 1;
11367                                 repair = 1;
11368                                 ctree_flags |= OPEN_CTREE_WRITES;
11369                                 break;
11370                         case GETOPT_VAL_INIT_EXTENT:
11371                                 init_extent_tree = 1;
11372                                 ctree_flags |= (OPEN_CTREE_WRITES |
11373                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11374                                 repair = 1;
11375                                 break;
11376                         case GETOPT_VAL_CHECK_CSUM:
11377                                 check_data_csum = 1;
11378                                 break;
11379                         case GETOPT_VAL_MODE:
11380                                 check_mode = parse_check_mode(optarg);
11381                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11382                                         error("unknown mode: %s", optarg);
11383                                         exit(1);
11384                                 }
11385                                 break;
11386                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11387                                 if (strcmp(optarg, "v1") != 0) {
11388                                         error(
11389                         "only v1 support implmented, unrecognized value %s",
11390                         optarg);
11391                                         exit(1);
11392                                 }
11393                                 clear_space_cache = 1;
11394                                 ctree_flags |= OPEN_CTREE_WRITES;
11395                                 break;
11396                 }
11397         }
11398
11399         if (check_argc_exact(argc - optind, 1))
11400                 usage(cmd_check_usage);
11401
11402         if (ctx.progress_enabled) {
11403                 ctx.tp = TASK_NOTHING;
11404                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11405         }
11406
11407         /* This check is the only reason for --readonly to exist */
11408         if (readonly && repair) {
11409                 error("repair options are not compatible with --readonly");
11410                 exit(1);
11411         }
11412
11413         /*
11414          * Not supported yet
11415          */
11416         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11417                 error("low memory mode doesn't support repair yet");
11418                 exit(1);
11419         }
11420
11421         radix_tree_init();
11422         cache_tree_init(&root_cache);
11423
11424         if((ret = check_mounted(argv[optind])) < 0) {
11425                 error("could not check mount status: %s", strerror(-ret));
11426                 goto err_out;
11427         } else if(ret) {
11428                 error("%s is currently mounted, aborting", argv[optind]);
11429                 ret = -EBUSY;
11430                 goto err_out;
11431         }
11432
11433         /* only allow partial opening under repair mode */
11434         if (repair)
11435                 ctree_flags |= OPEN_CTREE_PARTIAL;
11436
11437         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11438                                   chunk_root_bytenr, ctree_flags);
11439         if (!info) {
11440                 error("cannot open file system");
11441                 ret = -EIO;
11442                 goto err_out;
11443         }
11444
11445         global_info = info;
11446         root = info->fs_root;
11447         if (clear_space_cache) {
11448                 if (btrfs_fs_compat_ro(info,
11449                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11450                         error(
11451                         "free space cache v2 detected, clearing not implemented");
11452                         ret = 1;
11453                         goto close_out;
11454                 }
11455                 printf("Clearing free space cache\n");
11456                 ret = clear_free_space_cache(info);
11457                 if (ret) {
11458                         error("failed to clear free space cache");
11459                         ret = 1;
11460                 } else {
11461                         printf("Free space cache cleared\n");
11462                 }
11463                 goto close_out;
11464         }
11465
11466         /*
11467          * repair mode will force us to commit transaction which
11468          * will make us fail to load log tree when mounting.
11469          */
11470         if (repair && btrfs_super_log_root(info->super_copy)) {
11471                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11472                 if (!ret) {
11473                         ret = 1;
11474                         goto close_out;
11475                 }
11476                 ret = zero_log_tree(root);
11477                 if (ret) {
11478                         error("failed to zero log tree: %d", ret);
11479                         goto close_out;
11480                 }
11481         }
11482
11483         uuid_unparse(info->super_copy->fsid, uuidbuf);
11484         if (qgroup_report) {
11485                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11486                        uuidbuf);
11487                 ret = qgroup_verify_all(info);
11488                 if (ret == 0)
11489                         report_qgroups(1);
11490                 goto close_out;
11491         }
11492         if (subvolid) {
11493                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11494                        subvolid, argv[optind], uuidbuf);
11495                 ret = print_extent_state(info, subvolid);
11496                 goto close_out;
11497         }
11498         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11499
11500         if (!extent_buffer_uptodate(info->tree_root->node) ||
11501             !extent_buffer_uptodate(info->dev_root->node) ||
11502             !extent_buffer_uptodate(info->chunk_root->node)) {
11503                 error("critical roots corrupted, unable to check the filesystem");
11504                 ret = -EIO;
11505                 goto close_out;
11506         }
11507
11508         if (init_extent_tree || init_csum_tree) {
11509                 struct btrfs_trans_handle *trans;
11510
11511                 trans = btrfs_start_transaction(info->extent_root, 0);
11512                 if (IS_ERR(trans)) {
11513                         error("error starting transaction");
11514                         ret = PTR_ERR(trans);
11515                         goto close_out;
11516                 }
11517
11518                 if (init_extent_tree) {
11519                         printf("Creating a new extent tree\n");
11520                         ret = reinit_extent_tree(trans, info);
11521                         if (ret)
11522                                 goto close_out;
11523                 }
11524
11525                 if (init_csum_tree) {
11526                         printf("Reinitialize checksum tree\n");
11527                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11528                         if (ret) {
11529                                 error("checksum tree initialization failed: %d",
11530                                                 ret);
11531                                 ret = -EIO;
11532                                 goto close_out;
11533                         }
11534
11535                         ret = fill_csum_tree(trans, info->csum_root,
11536                                              init_extent_tree);
11537                         if (ret) {
11538                                 error("checksum tree refilling failed: %d", ret);
11539                                 return -EIO;
11540                         }
11541                 }
11542                 /*
11543                  * Ok now we commit and run the normal fsck, which will add
11544                  * extent entries for all of the items it finds.
11545                  */
11546                 ret = btrfs_commit_transaction(trans, info->extent_root);
11547                 if (ret)
11548                         goto close_out;
11549         }
11550         if (!extent_buffer_uptodate(info->extent_root->node)) {
11551                 error("critical: extent_root, unable to check the filesystem");
11552                 ret = -EIO;
11553                 goto close_out;
11554         }
11555         if (!extent_buffer_uptodate(info->csum_root->node)) {
11556                 error("critical: csum_root, unable to check the filesystem");
11557                 ret = -EIO;
11558                 goto close_out;
11559         }
11560
11561         if (!ctx.progress_enabled)
11562                 printf("checking extents");
11563         if (check_mode == CHECK_MODE_LOWMEM)
11564                 ret = check_chunks_and_extents_v2(root);
11565         else
11566                 ret = check_chunks_and_extents(root);
11567         if (ret)
11568                 printf("Errors found in extent allocation tree or chunk allocation");
11569
11570         ret = repair_root_items(info);
11571         if (ret < 0)
11572                 goto close_out;
11573         if (repair) {
11574                 fprintf(stderr, "Fixed %d roots.\n", ret);
11575                 ret = 0;
11576         } else if (ret > 0) {
11577                 fprintf(stderr,
11578                        "Found %d roots with an outdated root item.\n",
11579                        ret);
11580                 fprintf(stderr,
11581                         "Please run a filesystem check with the option --repair to fix them.\n");
11582                 ret = 1;
11583                 goto close_out;
11584         }
11585
11586         if (!ctx.progress_enabled) {
11587                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11588                         fprintf(stderr, "checking free space tree\n");
11589                 else
11590                         fprintf(stderr, "checking free space cache\n");
11591         }
11592         ret = check_space_cache(root);
11593         if (ret)
11594                 goto out;
11595
11596         /*
11597          * We used to have to have these hole extents in between our real
11598          * extents so if we don't have this flag set we need to make sure there
11599          * are no gaps in the file extents for inodes, otherwise we can just
11600          * ignore it when this happens.
11601          */
11602         no_holes = btrfs_fs_incompat(root->fs_info,
11603                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11604         if (!ctx.progress_enabled)
11605                 fprintf(stderr, "checking fs roots\n");
11606         ret = check_fs_roots(root, &root_cache);
11607         if (ret)
11608                 goto out;
11609
11610         fprintf(stderr, "checking csums\n");
11611         ret = check_csums(root);
11612         if (ret)
11613                 goto out;
11614
11615         fprintf(stderr, "checking root refs\n");
11616         ret = check_root_refs(root, &root_cache);
11617         if (ret)
11618                 goto out;
11619
11620         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11621                 struct extent_buffer *eb;
11622
11623                 eb = list_first_entry(&root->fs_info->recow_ebs,
11624                                       struct extent_buffer, recow);
11625                 list_del_init(&eb->recow);
11626                 ret = recow_extent_buffer(root, eb);
11627                 if (ret)
11628                         break;
11629         }
11630
11631         while (!list_empty(&delete_items)) {
11632                 struct bad_item *bad;
11633
11634                 bad = list_first_entry(&delete_items, struct bad_item, list);
11635                 list_del_init(&bad->list);
11636                 if (repair)
11637                         ret = delete_bad_item(root, bad);
11638                 free(bad);
11639         }
11640
11641         if (info->quota_enabled) {
11642                 int err;
11643                 fprintf(stderr, "checking quota groups\n");
11644                 err = qgroup_verify_all(info);
11645                 if (err)
11646                         goto out;
11647                 report_qgroups(0);
11648                 err = repair_qgroups(info, &qgroups_repaired);
11649                 if (err)
11650                         goto out;
11651         }
11652
11653         if (!list_empty(&root->fs_info->recow_ebs)) {
11654                 error("transid errors in file system");
11655                 ret = 1;
11656         }
11657 out:
11658         /* Don't override original ret */
11659         if (!ret && qgroups_repaired)
11660                 ret = qgroups_repaired;
11661
11662         if (found_old_backref) { /*
11663                  * there was a disk format change when mixed
11664                  * backref was in testing tree. The old format
11665                  * existed about one week.
11666                  */
11667                 printf("\n * Found old mixed backref format. "
11668                        "The old format is not supported! *"
11669                        "\n * Please mount the FS in readonly mode, "
11670                        "backup data and re-format the FS. *\n\n");
11671                 ret = 1;
11672         }
11673         printf("found %llu bytes used err is %d\n",
11674                (unsigned long long)bytes_used, ret);
11675         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11676         printf("total tree bytes: %llu\n",
11677                (unsigned long long)total_btree_bytes);
11678         printf("total fs tree bytes: %llu\n",
11679                (unsigned long long)total_fs_tree_bytes);
11680         printf("total extent tree bytes: %llu\n",
11681                (unsigned long long)total_extent_tree_bytes);
11682         printf("btree space waste bytes: %llu\n",
11683                (unsigned long long)btree_space_waste);
11684         printf("file data blocks allocated: %llu\n referenced %llu\n",
11685                 (unsigned long long)data_bytes_allocated,
11686                 (unsigned long long)data_bytes_referenced);
11687
11688         free_qgroup_counts();
11689         free_root_recs_tree(&root_cache);
11690 close_out:
11691         close_ctree(root);
11692 err_out:
11693         if (ctx.progress_enabled)
11694                 task_deinit(ctx.info);
11695
11696         return ret;
11697 }