btrfs-progs: check: use on-stack path buffer in find_normal_file_extent
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         u8 filetype;
189         u8 ref_type;
190         int errors;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         struct rb_node *rb;
663         size_t size;
664         int ret;
665
666         rec = malloc(sizeof(*rec));
667         if (!rec)
668                 return ERR_PTR(-ENOMEM);
669         memcpy(rec, orig_rec, sizeof(*rec));
670         rec->refs = 1;
671         INIT_LIST_HEAD(&rec->backrefs);
672         INIT_LIST_HEAD(&rec->orphan_extents);
673         rec->holes = RB_ROOT;
674
675         list_for_each_entry(orig, &orig_rec->backrefs, list) {
676                 size = sizeof(*orig) + orig->namelen + 1;
677                 backref = malloc(size);
678                 if (!backref) {
679                         ret = -ENOMEM;
680                         goto cleanup;
681                 }
682                 memcpy(backref, orig, size);
683                 list_add_tail(&backref->list, &rec->backrefs);
684         }
685         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686                 dst_orphan = malloc(sizeof(*dst_orphan));
687                 if (!dst_orphan) {
688                         ret = -ENOMEM;
689                         goto cleanup;
690                 }
691                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693         }
694         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
695         if (ret < 0)
696                 goto cleanup_rb;
697
698         return rec;
699
700 cleanup_rb:
701         rb = rb_first(&rec->holes);
702         while (rb) {
703                 struct file_extent_hole *hole;
704
705                 hole = rb_entry(rb, struct file_extent_hole, node);
706                 rb = rb_next(rb);
707                 free(hole);
708         }
709
710 cleanup:
711         if (!list_empty(&rec->backrefs))
712                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713                         list_del(&orig->list);
714                         free(orig);
715                 }
716
717         if (!list_empty(&rec->orphan_extents))
718                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719                         list_del(&orig->list);
720                         free(orig);
721                 }
722
723         free(rec);
724
725         return ERR_PTR(ret);
726 }
727
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
729                                       u64 objectid)
730 {
731         struct orphan_data_extent *orphan;
732
733         if (list_empty(orphan_extents))
734                 return;
735         printf("The following data extent is lost in tree %llu:\n",
736                objectid);
737         list_for_each_entry(orphan, orphan_extents, list) {
738                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
740                        orphan->disk_len);
741         }
742 }
743
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
745 {
746         u64 root_objectid = root->root_key.objectid;
747         int errors = rec->errors;
748
749         if (!errors)
750                 return;
751         /* reloc root errors, we print its corresponding fs root objectid*/
752         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753                 root_objectid = root->root_key.offset;
754                 fprintf(stderr, "reloc");
755         }
756         fprintf(stderr, "root %llu inode %llu errors %x",
757                 (unsigned long long) root_objectid,
758                 (unsigned long long) rec->ino, rec->errors);
759
760         if (errors & I_ERR_NO_INODE_ITEM)
761                 fprintf(stderr, ", no inode item");
762         if (errors & I_ERR_NO_ORPHAN_ITEM)
763                 fprintf(stderr, ", no orphan item");
764         if (errors & I_ERR_DUP_INODE_ITEM)
765                 fprintf(stderr, ", dup inode item");
766         if (errors & I_ERR_DUP_DIR_INDEX)
767                 fprintf(stderr, ", dup dir index");
768         if (errors & I_ERR_ODD_DIR_ITEM)
769                 fprintf(stderr, ", odd dir item");
770         if (errors & I_ERR_ODD_FILE_EXTENT)
771                 fprintf(stderr, ", odd file extent");
772         if (errors & I_ERR_BAD_FILE_EXTENT)
773                 fprintf(stderr, ", bad file extent");
774         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775                 fprintf(stderr, ", file extent overlap");
776         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777                 fprintf(stderr, ", file extent discount");
778         if (errors & I_ERR_DIR_ISIZE_WRONG)
779                 fprintf(stderr, ", dir isize wrong");
780         if (errors & I_ERR_FILE_NBYTES_WRONG)
781                 fprintf(stderr, ", nbytes wrong");
782         if (errors & I_ERR_ODD_CSUM_ITEM)
783                 fprintf(stderr, ", odd csum item");
784         if (errors & I_ERR_SOME_CSUM_MISSING)
785                 fprintf(stderr, ", some csum missing");
786         if (errors & I_ERR_LINK_COUNT_WRONG)
787                 fprintf(stderr, ", link count wrong");
788         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789                 fprintf(stderr, ", orphan file extent");
790         fprintf(stderr, "\n");
791         /* Print the orphan extents if needed */
792         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
794
795         /* Print the holes if needed */
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797                 struct file_extent_hole *hole;
798                 struct rb_node *node;
799                 int found = 0;
800
801                 node = rb_first(&rec->holes);
802                 fprintf(stderr, "Found file extent holes:\n");
803                 while (node) {
804                         found = 1;
805                         hole = rb_entry(node, struct file_extent_hole, node);
806                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
807                                 hole->start, hole->len);
808                         node = rb_next(node);
809                 }
810                 if (!found)
811                         fprintf(stderr, "\tstart: 0, len: %llu\n",
812                                 round_up(rec->isize, root->sectorsize));
813         }
814 }
815
816 static void print_ref_error(int errors)
817 {
818         if (errors & REF_ERR_NO_DIR_ITEM)
819                 fprintf(stderr, ", no dir item");
820         if (errors & REF_ERR_NO_DIR_INDEX)
821                 fprintf(stderr, ", no dir index");
822         if (errors & REF_ERR_NO_INODE_REF)
823                 fprintf(stderr, ", no inode ref");
824         if (errors & REF_ERR_DUP_DIR_ITEM)
825                 fprintf(stderr, ", dup dir item");
826         if (errors & REF_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & REF_ERR_DUP_INODE_REF)
829                 fprintf(stderr, ", dup inode ref");
830         if (errors & REF_ERR_INDEX_UNMATCH)
831                 fprintf(stderr, ", index mismatch");
832         if (errors & REF_ERR_FILETYPE_UNMATCH)
833                 fprintf(stderr, ", filetype mismatch");
834         if (errors & REF_ERR_NAME_TOO_LONG)
835                 fprintf(stderr, ", name too long");
836         if (errors & REF_ERR_NO_ROOT_REF)
837                 fprintf(stderr, ", no root ref");
838         if (errors & REF_ERR_NO_ROOT_BACKREF)
839                 fprintf(stderr, ", no root backref");
840         if (errors & REF_ERR_DUP_ROOT_REF)
841                 fprintf(stderr, ", dup root ref");
842         if (errors & REF_ERR_DUP_ROOT_BACKREF)
843                 fprintf(stderr, ", dup root backref");
844         fprintf(stderr, "\n");
845 }
846
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
848                                           u64 ino, int mod)
849 {
850         struct ptr_node *node;
851         struct cache_extent *cache;
852         struct inode_record *rec = NULL;
853         int ret;
854
855         cache = lookup_cache_extent(inode_cache, ino, 1);
856         if (cache) {
857                 node = container_of(cache, struct ptr_node, cache);
858                 rec = node->data;
859                 if (mod && rec->refs > 1) {
860                         node->data = clone_inode_rec(rec);
861                         if (IS_ERR(node->data))
862                                 return node->data;
863                         rec->refs--;
864                         rec = node->data;
865                 }
866         } else if (mod) {
867                 rec = calloc(1, sizeof(*rec));
868                 if (!rec)
869                         return ERR_PTR(-ENOMEM);
870                 rec->ino = ino;
871                 rec->extent_start = (u64)-1;
872                 rec->refs = 1;
873                 INIT_LIST_HEAD(&rec->backrefs);
874                 INIT_LIST_HEAD(&rec->orphan_extents);
875                 rec->holes = RB_ROOT;
876
877                 node = malloc(sizeof(*node));
878                 if (!node) {
879                         free(rec);
880                         return ERR_PTR(-ENOMEM);
881                 }
882                 node->cache.start = ino;
883                 node->cache.size = 1;
884                 node->data = rec;
885
886                 if (ino == BTRFS_FREE_INO_OBJECTID)
887                         rec->found_link = 1;
888
889                 ret = insert_cache_extent(inode_cache, &node->cache);
890                 if (ret)
891                         return ERR_PTR(-EEXIST);
892         }
893         return rec;
894 }
895
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
897 {
898         struct orphan_data_extent *orphan;
899
900         while (!list_empty(orphan_extents)) {
901                 orphan = list_entry(orphan_extents->next,
902                                     struct orphan_data_extent, list);
903                 list_del(&orphan->list);
904                 free(orphan);
905         }
906 }
907
908 static void free_inode_rec(struct inode_record *rec)
909 {
910         struct inode_backref *backref;
911
912         if (--rec->refs > 0)
913                 return;
914
915         while (!list_empty(&rec->backrefs)) {
916                 backref = to_inode_backref(rec->backrefs.next);
917                 list_del(&backref->list);
918                 free(backref);
919         }
920         free_orphan_data_extents(&rec->orphan_extents);
921         free_file_extent_holes(&rec->holes);
922         free(rec);
923 }
924
925 static int can_free_inode_rec(struct inode_record *rec)
926 {
927         if (!rec->errors && rec->checked && rec->found_inode_item &&
928             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
929                 return 1;
930         return 0;
931 }
932
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934                                  struct inode_record *rec)
935 {
936         struct cache_extent *cache;
937         struct inode_backref *tmp, *backref;
938         struct ptr_node *node;
939         u8 filetype;
940
941         if (!rec->found_inode_item)
942                 return;
943
944         filetype = imode_to_type(rec->imode);
945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946                 if (backref->found_dir_item && backref->found_dir_index) {
947                         if (backref->filetype != filetype)
948                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949                         if (!backref->errors && backref->found_inode_ref &&
950                             rec->nlink == rec->found_link) {
951                                 list_del(&backref->list);
952                                 free(backref);
953                         }
954                 }
955         }
956
957         if (!rec->checked || rec->merging)
958                 return;
959
960         if (S_ISDIR(rec->imode)) {
961                 if (rec->found_size != rec->isize)
962                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963                 if (rec->found_file_extent)
964                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
965         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966                 if (rec->found_dir_item)
967                         rec->errors |= I_ERR_ODD_DIR_ITEM;
968                 if (rec->found_size != rec->nbytes)
969                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970                 if (rec->nlink > 0 && !no_holes &&
971                     (rec->extent_end < rec->isize ||
972                      first_extent_gap(&rec->holes) < rec->isize))
973                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
974         }
975
976         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977                 if (rec->found_csum_item && rec->nodatasum)
978                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
979                 if (rec->some_csum_missing && !rec->nodatasum)
980                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
981         }
982
983         BUG_ON(rec->refs != 1);
984         if (can_free_inode_rec(rec)) {
985                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986                 node = container_of(cache, struct ptr_node, cache);
987                 BUG_ON(node->data != rec);
988                 remove_cache_extent(inode_cache, &node->cache);
989                 free(node);
990                 free_inode_rec(rec);
991         }
992 }
993
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
995 {
996         struct btrfs_path path;
997         struct btrfs_key key;
998         int ret;
999
1000         key.objectid = BTRFS_ORPHAN_OBJECTID;
1001         key.type = BTRFS_ORPHAN_ITEM_KEY;
1002         key.offset = ino;
1003
1004         btrfs_init_path(&path);
1005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006         btrfs_release_path(&path);
1007         if (ret > 0)
1008                 ret = -ENOENT;
1009         return ret;
1010 }
1011
1012 static int process_inode_item(struct extent_buffer *eb,
1013                               int slot, struct btrfs_key *key,
1014                               struct shared_node *active_node)
1015 {
1016         struct inode_record *rec;
1017         struct btrfs_inode_item *item;
1018
1019         rec = active_node->current;
1020         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021         if (rec->found_inode_item) {
1022                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1023                 return 1;
1024         }
1025         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026         rec->nlink = btrfs_inode_nlink(eb, item);
1027         rec->isize = btrfs_inode_size(eb, item);
1028         rec->nbytes = btrfs_inode_nbytes(eb, item);
1029         rec->imode = btrfs_inode_mode(eb, item);
1030         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1031                 rec->nodatasum = 1;
1032         rec->found_inode_item = 1;
1033         if (rec->nlink == 0)
1034                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035         maybe_free_inode_rec(&active_node->inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1040                                                 const char *name,
1041                                                 int namelen, u64 dir)
1042 {
1043         struct inode_backref *backref;
1044
1045         list_for_each_entry(backref, &rec->backrefs, list) {
1046                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1047                         break;
1048                 if (backref->dir != dir || backref->namelen != namelen)
1049                         continue;
1050                 if (memcmp(name, backref->name, namelen))
1051                         continue;
1052                 return backref;
1053         }
1054
1055         backref = malloc(sizeof(*backref) + namelen + 1);
1056         if (!backref)
1057                 return NULL;
1058         memset(backref, 0, sizeof(*backref));
1059         backref->dir = dir;
1060         backref->namelen = namelen;
1061         memcpy(backref->name, name, namelen);
1062         backref->name[namelen] = '\0';
1063         list_add_tail(&backref->list, &rec->backrefs);
1064         return backref;
1065 }
1066
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068                              u64 ino, u64 dir, u64 index,
1069                              const char *name, int namelen,
1070                              u8 filetype, u8 itemtype, int errors)
1071 {
1072         struct inode_record *rec;
1073         struct inode_backref *backref;
1074
1075         rec = get_inode_rec(inode_cache, ino, 1);
1076         BUG_ON(IS_ERR(rec));
1077         backref = get_inode_backref(rec, name, namelen, dir);
1078         BUG_ON(!backref);
1079         if (errors)
1080                 backref->errors |= errors;
1081         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082                 if (backref->found_dir_index)
1083                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084                 if (backref->found_inode_ref && backref->index != index)
1085                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1086                 if (backref->found_dir_item && backref->filetype != filetype)
1087                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1088
1089                 backref->index = index;
1090                 backref->filetype = filetype;
1091                 backref->found_dir_index = 1;
1092         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1093                 rec->found_link++;
1094                 if (backref->found_dir_item)
1095                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096                 if (backref->found_dir_index && backref->filetype != filetype)
1097                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1098
1099                 backref->filetype = filetype;
1100                 backref->found_dir_item = 1;
1101         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103                 if (backref->found_inode_ref)
1104                         backref->errors |= REF_ERR_DUP_INODE_REF;
1105                 if (backref->found_dir_index && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 else
1108                         backref->index = index;
1109
1110                 backref->ref_type = itemtype;
1111                 backref->found_inode_ref = 1;
1112         } else {
1113                 BUG_ON(1);
1114         }
1115
1116         maybe_free_inode_rec(inode_cache, rec);
1117         return 0;
1118 }
1119
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121                             struct cache_tree *dst_cache)
1122 {
1123         struct inode_backref *backref;
1124         u32 dir_count = 0;
1125         int ret = 0;
1126
1127         dst->merging = 1;
1128         list_for_each_entry(backref, &src->backrefs, list) {
1129                 if (backref->found_dir_index) {
1130                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1131                                         backref->index, backref->name,
1132                                         backref->namelen, backref->filetype,
1133                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1134                 }
1135                 if (backref->found_dir_item) {
1136                         dir_count++;
1137                         add_inode_backref(dst_cache, dst->ino,
1138                                         backref->dir, 0, backref->name,
1139                                         backref->namelen, backref->filetype,
1140                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1141                 }
1142                 if (backref->found_inode_ref) {
1143                         add_inode_backref(dst_cache, dst->ino,
1144                                         backref->dir, backref->index,
1145                                         backref->name, backref->namelen, 0,
1146                                         backref->ref_type, backref->errors);
1147                 }
1148         }
1149
1150         if (src->found_dir_item)
1151                 dst->found_dir_item = 1;
1152         if (src->found_file_extent)
1153                 dst->found_file_extent = 1;
1154         if (src->found_csum_item)
1155                 dst->found_csum_item = 1;
1156         if (src->some_csum_missing)
1157                 dst->some_csum_missing = 1;
1158         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1160                 if (ret < 0)
1161                         return ret;
1162         }
1163
1164         BUG_ON(src->found_link < dir_count);
1165         dst->found_link += src->found_link - dir_count;
1166         dst->found_size += src->found_size;
1167         if (src->extent_start != (u64)-1) {
1168                 if (dst->extent_start == (u64)-1) {
1169                         dst->extent_start = src->extent_start;
1170                         dst->extent_end = src->extent_end;
1171                 } else {
1172                         if (dst->extent_end > src->extent_start)
1173                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174                         else if (dst->extent_end < src->extent_start) {
1175                                 ret = add_file_extent_hole(&dst->holes,
1176                                         dst->extent_end,
1177                                         src->extent_start - dst->extent_end);
1178                         }
1179                         if (dst->extent_end < src->extent_end)
1180                                 dst->extent_end = src->extent_end;
1181                 }
1182         }
1183
1184         dst->errors |= src->errors;
1185         if (src->found_inode_item) {
1186                 if (!dst->found_inode_item) {
1187                         dst->nlink = src->nlink;
1188                         dst->isize = src->isize;
1189                         dst->nbytes = src->nbytes;
1190                         dst->imode = src->imode;
1191                         dst->nodatasum = src->nodatasum;
1192                         dst->found_inode_item = 1;
1193                 } else {
1194                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1195                 }
1196         }
1197         dst->merging = 0;
1198
1199         return 0;
1200 }
1201
1202 static int splice_shared_node(struct shared_node *src_node,
1203                               struct shared_node *dst_node)
1204 {
1205         struct cache_extent *cache;
1206         struct ptr_node *node, *ins;
1207         struct cache_tree *src, *dst;
1208         struct inode_record *rec, *conflict;
1209         u64 current_ino = 0;
1210         int splice = 0;
1211         int ret;
1212
1213         if (--src_node->refs == 0)
1214                 splice = 1;
1215         if (src_node->current)
1216                 current_ino = src_node->current->ino;
1217
1218         src = &src_node->root_cache;
1219         dst = &dst_node->root_cache;
1220 again:
1221         cache = search_cache_extent(src, 0);
1222         while (cache) {
1223                 node = container_of(cache, struct ptr_node, cache);
1224                 rec = node->data;
1225                 cache = next_cache_extent(cache);
1226
1227                 if (splice) {
1228                         remove_cache_extent(src, &node->cache);
1229                         ins = node;
1230                 } else {
1231                         ins = malloc(sizeof(*ins));
1232                         BUG_ON(!ins);
1233                         ins->cache.start = node->cache.start;
1234                         ins->cache.size = node->cache.size;
1235                         ins->data = rec;
1236                         rec->refs++;
1237                 }
1238                 ret = insert_cache_extent(dst, &ins->cache);
1239                 if (ret == -EEXIST) {
1240                         conflict = get_inode_rec(dst, rec->ino, 1);
1241                         BUG_ON(IS_ERR(conflict));
1242                         merge_inode_recs(rec, conflict, dst);
1243                         if (rec->checked) {
1244                                 conflict->checked = 1;
1245                                 if (dst_node->current == conflict)
1246                                         dst_node->current = NULL;
1247                         }
1248                         maybe_free_inode_rec(dst, conflict);
1249                         free_inode_rec(rec);
1250                         free(ins);
1251                 } else {
1252                         BUG_ON(ret);
1253                 }
1254         }
1255
1256         if (src == &src_node->root_cache) {
1257                 src = &src_node->inode_cache;
1258                 dst = &dst_node->inode_cache;
1259                 goto again;
1260         }
1261
1262         if (current_ino > 0 && (!dst_node->current ||
1263             current_ino > dst_node->current->ino)) {
1264                 if (dst_node->current) {
1265                         dst_node->current->checked = 1;
1266                         maybe_free_inode_rec(dst, dst_node->current);
1267                 }
1268                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269                 BUG_ON(IS_ERR(dst_node->current));
1270         }
1271         return 0;
1272 }
1273
1274 static void free_inode_ptr(struct cache_extent *cache)
1275 {
1276         struct ptr_node *node;
1277         struct inode_record *rec;
1278
1279         node = container_of(cache, struct ptr_node, cache);
1280         rec = node->data;
1281         free_inode_rec(rec);
1282         free(node);
1283 }
1284
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1286
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1288                                             u64 bytenr)
1289 {
1290         struct cache_extent *cache;
1291         struct shared_node *node;
1292
1293         cache = lookup_cache_extent(shared, bytenr, 1);
1294         if (cache) {
1295                 node = container_of(cache, struct shared_node, cache);
1296                 return node;
1297         }
1298         return NULL;
1299 }
1300
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1302 {
1303         int ret;
1304         struct shared_node *node;
1305
1306         node = calloc(1, sizeof(*node));
1307         if (!node)
1308                 return -ENOMEM;
1309         node->cache.start = bytenr;
1310         node->cache.size = 1;
1311         cache_tree_init(&node->root_cache);
1312         cache_tree_init(&node->inode_cache);
1313         node->refs = refs;
1314
1315         ret = insert_cache_extent(shared, &node->cache);
1316
1317         return ret;
1318 }
1319
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321                              struct walk_control *wc, int level)
1322 {
1323         struct shared_node *node;
1324         struct shared_node *dest;
1325         int ret;
1326
1327         if (level == wc->active_node)
1328                 return 0;
1329
1330         BUG_ON(wc->active_node <= level);
1331         node = find_shared_node(&wc->shared, bytenr);
1332         if (!node) {
1333                 ret = add_shared_node(&wc->shared, bytenr, refs);
1334                 BUG_ON(ret);
1335                 node = find_shared_node(&wc->shared, bytenr);
1336                 wc->nodes[level] = node;
1337                 wc->active_node = level;
1338                 return 0;
1339         }
1340
1341         if (wc->root_level == wc->active_node &&
1342             btrfs_root_refs(&root->root_item) == 0) {
1343                 if (--node->refs == 0) {
1344                         free_inode_recs_tree(&node->root_cache);
1345                         free_inode_recs_tree(&node->inode_cache);
1346                         remove_cache_extent(&wc->shared, &node->cache);
1347                         free(node);
1348                 }
1349                 return 1;
1350         }
1351
1352         dest = wc->nodes[wc->active_node];
1353         splice_shared_node(node, dest);
1354         if (node->refs == 0) {
1355                 remove_cache_extent(&wc->shared, &node->cache);
1356                 free(node);
1357         }
1358         return 1;
1359 }
1360
1361 static int leave_shared_node(struct btrfs_root *root,
1362                              struct walk_control *wc, int level)
1363 {
1364         struct shared_node *node;
1365         struct shared_node *dest;
1366         int i;
1367
1368         if (level == wc->root_level)
1369                 return 0;
1370
1371         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1372                 if (wc->nodes[i])
1373                         break;
1374         }
1375         BUG_ON(i >= BTRFS_MAX_LEVEL);
1376
1377         node = wc->nodes[wc->active_node];
1378         wc->nodes[wc->active_node] = NULL;
1379         wc->active_node = i;
1380
1381         dest = wc->nodes[wc->active_node];
1382         if (wc->active_node < wc->root_level ||
1383             btrfs_root_refs(&root->root_item) > 0) {
1384                 BUG_ON(node->refs <= 1);
1385                 splice_shared_node(node, dest);
1386         } else {
1387                 BUG_ON(node->refs < 2);
1388                 node->refs--;
1389         }
1390         return 0;
1391 }
1392
1393 /*
1394  * Returns:
1395  * < 0 - on error
1396  * 1   - if the root with id child_root_id is a child of root parent_root_id
1397  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1398  *       has other root(s) as parent(s)
1399  * 2   - if the root child_root_id doesn't have any parent roots
1400  */
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1402                          u64 child_root_id)
1403 {
1404         struct btrfs_path path;
1405         struct btrfs_key key;
1406         struct extent_buffer *leaf;
1407         int has_parent = 0;
1408         int ret;
1409
1410         btrfs_init_path(&path);
1411
1412         key.objectid = parent_root_id;
1413         key.type = BTRFS_ROOT_REF_KEY;
1414         key.offset = child_root_id;
1415         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1416                                 0, 0);
1417         if (ret < 0)
1418                 return ret;
1419         btrfs_release_path(&path);
1420         if (!ret)
1421                 return 1;
1422
1423         key.objectid = child_root_id;
1424         key.type = BTRFS_ROOT_BACKREF_KEY;
1425         key.offset = 0;
1426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1427                                 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430
1431         while (1) {
1432                 leaf = path.nodes[0];
1433                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1435                         if (ret)
1436                                 break;
1437                         leaf = path.nodes[0];
1438                 }
1439
1440                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441                 if (key.objectid != child_root_id ||
1442                     key.type != BTRFS_ROOT_BACKREF_KEY)
1443                         break;
1444
1445                 has_parent = 1;
1446
1447                 if (key.offset == parent_root_id) {
1448                         btrfs_release_path(&path);
1449                         return 1;
1450                 }
1451
1452                 path.slots[0]++;
1453         }
1454 out:
1455         btrfs_release_path(&path);
1456         if (ret < 0)
1457                 return ret;
1458         return has_parent ? 0 : 2;
1459 }
1460
1461 static int process_dir_item(struct btrfs_root *root,
1462                             struct extent_buffer *eb,
1463                             int slot, struct btrfs_key *key,
1464                             struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u32 data_len;
1471         int error;
1472         int nritems = 0;
1473         u8 filetype;
1474         struct btrfs_dir_item *di;
1475         struct inode_record *rec;
1476         struct cache_tree *root_cache;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_key location;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         root_cache = &active_node->root_cache;
1482         inode_cache = &active_node->inode_cache;
1483         rec = active_node->current;
1484         rec->found_dir_item = 1;
1485
1486         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 nritems++;
1490                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491                 name_len = btrfs_dir_name_len(eb, di);
1492                 data_len = btrfs_dir_data_len(eb, di);
1493                 filetype = btrfs_dir_type(eb, di);
1494
1495                 rec->found_size += name_len;
1496                 if (name_len <= BTRFS_NAME_LEN) {
1497                         len = name_len;
1498                         error = 0;
1499                 } else {
1500                         len = BTRFS_NAME_LEN;
1501                         error = REF_ERR_NAME_TOO_LONG;
1502                 }
1503                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1504
1505                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506                         add_inode_backref(inode_cache, location.objectid,
1507                                           key->objectid, key->offset, namebuf,
1508                                           len, filetype, key->type, error);
1509                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510                         add_inode_backref(root_cache, location.objectid,
1511                                           key->objectid, key->offset,
1512                                           namebuf, len, filetype,
1513                                           key->type, error);
1514                 } else {
1515                         fprintf(stderr, "invalid location in dir item %u\n",
1516                                 location.type);
1517                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518                                           key->objectid, key->offset, namebuf,
1519                                           len, filetype, key->type, error);
1520                 }
1521
1522                 len = sizeof(*di) + name_len + data_len;
1523                 di = (struct btrfs_dir_item *)((char *)di + len);
1524                 cur += len;
1525         }
1526         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1528
1529         return 0;
1530 }
1531
1532 static int process_inode_ref(struct extent_buffer *eb,
1533                              int slot, struct btrfs_key *key,
1534                              struct shared_node *active_node)
1535 {
1536         u32 total;
1537         u32 cur = 0;
1538         u32 len;
1539         u32 name_len;
1540         u64 index;
1541         int error;
1542         struct cache_tree *inode_cache;
1543         struct btrfs_inode_ref *ref;
1544         char namebuf[BTRFS_NAME_LEN];
1545
1546         inode_cache = &active_node->inode_cache;
1547
1548         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549         total = btrfs_item_size_nr(eb, slot);
1550         while (cur < total) {
1551                 name_len = btrfs_inode_ref_name_len(eb, ref);
1552                 index = btrfs_inode_ref_index(eb, ref);
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561                 add_inode_backref(inode_cache, key->objectid, key->offset,
1562                                   index, namebuf, len, 0, key->type, error);
1563
1564                 len = sizeof(*ref) + name_len;
1565                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1566                 cur += len;
1567         }
1568         return 0;
1569 }
1570
1571 static int process_inode_extref(struct extent_buffer *eb,
1572                                 int slot, struct btrfs_key *key,
1573                                 struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u64 index;
1580         u64 parent;
1581         int error;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_inode_extref *extref;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         inode_cache = &active_node->inode_cache;
1587
1588         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589         total = btrfs_item_size_nr(eb, slot);
1590         while (cur < total) {
1591                 name_len = btrfs_inode_extref_name_len(eb, extref);
1592                 index = btrfs_inode_extref_index(eb, extref);
1593                 parent = btrfs_inode_extref_parent(eb, extref);
1594                 if (name_len <= BTRFS_NAME_LEN) {
1595                         len = name_len;
1596                         error = 0;
1597                 } else {
1598                         len = BTRFS_NAME_LEN;
1599                         error = REF_ERR_NAME_TOO_LONG;
1600                 }
1601                 read_extent_buffer(eb, namebuf,
1602                                    (unsigned long)(extref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, parent,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*extref) + name_len;
1607                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611
1612 }
1613
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615                             u64 len, u64 *found)
1616 {
1617         struct btrfs_key key;
1618         struct btrfs_path path;
1619         struct extent_buffer *leaf;
1620         int ret;
1621         size_t size;
1622         *found = 0;
1623         u64 csum_end;
1624         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1625
1626         btrfs_init_path(&path);
1627
1628         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1629         key.offset = start;
1630         key.type = BTRFS_EXTENT_CSUM_KEY;
1631
1632         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1633                                 &key, &path, 0, 0);
1634         if (ret < 0)
1635                 goto out;
1636         if (ret > 0 && path.slots[0] > 0) {
1637                 leaf = path.nodes[0];
1638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640                     key.type == BTRFS_EXTENT_CSUM_KEY)
1641                         path.slots[0]--;
1642         }
1643
1644         while (len > 0) {
1645                 leaf = path.nodes[0];
1646                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1648                         if (ret > 0)
1649                                 break;
1650                         else if (ret < 0)
1651                                 goto out;
1652                         leaf = path.nodes[0];
1653                 }
1654
1655                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657                     key.type != BTRFS_EXTENT_CSUM_KEY)
1658                         break;
1659
1660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661                 if (key.offset >= start + len)
1662                         break;
1663
1664                 if (key.offset > start)
1665                         start = key.offset;
1666
1667                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669                 if (csum_end > start) {
1670                         size = min(csum_end - start, len);
1671                         len -= size;
1672                         start += size;
1673                         *found += size;
1674                 }
1675
1676                 path.slots[0]++;
1677         }
1678 out:
1679         btrfs_release_path(&path);
1680         if (ret < 0)
1681                 return ret;
1682         return 0;
1683 }
1684
1685 static int process_file_extent(struct btrfs_root *root,
1686                                 struct extent_buffer *eb,
1687                                 int slot, struct btrfs_key *key,
1688                                 struct shared_node *active_node)
1689 {
1690         struct inode_record *rec;
1691         struct btrfs_file_extent_item *fi;
1692         u64 num_bytes = 0;
1693         u64 disk_bytenr = 0;
1694         u64 extent_offset = 0;
1695         u64 mask = root->sectorsize - 1;
1696         int extent_type;
1697         int ret;
1698
1699         rec = active_node->current;
1700         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701         rec->found_file_extent = 1;
1702
1703         if (rec->extent_start == (u64)-1) {
1704                 rec->extent_start = key->offset;
1705                 rec->extent_end = key->offset;
1706         }
1707
1708         if (rec->extent_end > key->offset)
1709                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710         else if (rec->extent_end < key->offset) {
1711                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712                                            key->offset - rec->extent_end);
1713                 if (ret < 0)
1714                         return ret;
1715         }
1716
1717         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718         extent_type = btrfs_file_extent_type(eb, fi);
1719
1720         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1722                 if (num_bytes == 0)
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 rec->found_size += num_bytes;
1725                 num_bytes = (num_bytes + mask) & ~mask;
1726         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730                 extent_offset = btrfs_file_extent_offset(eb, fi);
1731                 if (num_bytes == 0 || (num_bytes & mask))
1732                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733                 if (num_bytes + extent_offset >
1734                     btrfs_file_extent_ram_bytes(eb, fi))
1735                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737                     (btrfs_file_extent_compression(eb, fi) ||
1738                      btrfs_file_extent_encryption(eb, fi) ||
1739                      btrfs_file_extent_other_encoding(eb, fi)))
1740                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741                 if (disk_bytenr > 0)
1742                         rec->found_size += num_bytes;
1743         } else {
1744                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1745         }
1746         rec->extent_end = key->offset + num_bytes;
1747
1748         /*
1749          * The data reloc tree will copy full extents into its inode and then
1750          * copy the corresponding csums.  Because the extent it copied could be
1751          * a preallocated extent that hasn't been written to yet there may be no
1752          * csums to copy, ergo we won't have csums for our file extent.  This is
1753          * ok so just don't bother checking csums if the inode belongs to the
1754          * data reloc tree.
1755          */
1756         if (disk_bytenr > 0 &&
1757             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1758                 u64 found;
1759                 if (btrfs_file_extent_compression(eb, fi))
1760                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1761                 else
1762                         disk_bytenr += extent_offset;
1763
1764                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1765                 if (ret < 0)
1766                         return ret;
1767                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1768                         if (found > 0)
1769                                 rec->found_csum_item = 1;
1770                         if (found < num_bytes)
1771                                 rec->some_csum_missing = 1;
1772                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1773                         if (found > 0)
1774                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781                             struct walk_control *wc)
1782 {
1783         struct btrfs_key key;
1784         u32 nritems;
1785         int i;
1786         int ret = 0;
1787         struct cache_tree *inode_cache;
1788         struct shared_node *active_node;
1789
1790         if (wc->root_level == wc->active_node &&
1791             btrfs_root_refs(&root->root_item) == 0)
1792                 return 0;
1793
1794         active_node = wc->nodes[wc->active_node];
1795         inode_cache = &active_node->inode_cache;
1796         nritems = btrfs_header_nritems(eb);
1797         for (i = 0; i < nritems; i++) {
1798                 btrfs_item_key_to_cpu(eb, &key, i);
1799
1800                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1801                         continue;
1802                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1803                         continue;
1804
1805                 if (active_node->current == NULL ||
1806                     active_node->current->ino < key.objectid) {
1807                         if (active_node->current) {
1808                                 active_node->current->checked = 1;
1809                                 maybe_free_inode_rec(inode_cache,
1810                                                      active_node->current);
1811                         }
1812                         active_node->current = get_inode_rec(inode_cache,
1813                                                              key.objectid, 1);
1814                         BUG_ON(IS_ERR(active_node->current));
1815                 }
1816                 switch (key.type) {
1817                 case BTRFS_DIR_ITEM_KEY:
1818                 case BTRFS_DIR_INDEX_KEY:
1819                         ret = process_dir_item(root, eb, i, &key, active_node);
1820                         break;
1821                 case BTRFS_INODE_REF_KEY:
1822                         ret = process_inode_ref(eb, i, &key, active_node);
1823                         break;
1824                 case BTRFS_INODE_EXTREF_KEY:
1825                         ret = process_inode_extref(eb, i, &key, active_node);
1826                         break;
1827                 case BTRFS_INODE_ITEM_KEY:
1828                         ret = process_inode_item(eb, i, &key, active_node);
1829                         break;
1830                 case BTRFS_EXTENT_DATA_KEY:
1831                         ret = process_file_extent(root, eb, i, &key,
1832                                                   active_node);
1833                         break;
1834                 default:
1835                         break;
1836                 };
1837         }
1838         return ret;
1839 }
1840
1841 static void reada_walk_down(struct btrfs_root *root,
1842                             struct extent_buffer *node, int slot)
1843 {
1844         u64 bytenr;
1845         u64 ptr_gen;
1846         u32 nritems;
1847         u32 blocksize;
1848         int i;
1849         int level;
1850
1851         level = btrfs_header_level(node);
1852         if (level != 1)
1853                 return;
1854
1855         nritems = btrfs_header_nritems(node);
1856         blocksize = root->nodesize;
1857         for (i = slot; i < nritems; i++) {
1858                 bytenr = btrfs_node_blockptr(node, i);
1859                 ptr_gen = btrfs_node_ptr_generation(node, i);
1860                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1861         }
1862 }
1863
1864 /*
1865  * Check the child node/leaf by the following condition:
1866  * 1. the first item key of the node/leaf should be the same with the one
1867  *    in parent.
1868  * 2. block in parent node should match the child node/leaf.
1869  * 3. generation of parent node and child's header should be consistent.
1870  *
1871  * Or the child node/leaf pointed by the key in parent is not valid.
1872  *
1873  * We hope to check leaf owner too, but since subvol may share leaves,
1874  * which makes leaf owner check not so strong, key check should be
1875  * sufficient enough for that case.
1876  */
1877 static int check_child_node(struct btrfs_root *root,
1878                             struct extent_buffer *parent, int slot,
1879                             struct extent_buffer *child)
1880 {
1881         struct btrfs_key parent_key;
1882         struct btrfs_key child_key;
1883         int ret = 0;
1884
1885         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886         if (btrfs_header_level(child) == 0)
1887                 btrfs_item_key_to_cpu(child, &child_key, 0);
1888         else
1889                 btrfs_node_key_to_cpu(child, &child_key, 0);
1890
1891         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1892                 ret = -EINVAL;
1893                 fprintf(stderr,
1894                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895                         parent_key.objectid, parent_key.type, parent_key.offset,
1896                         child_key.objectid, child_key.type, child_key.offset);
1897         }
1898         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1899                 ret = -EINVAL;
1900                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901                         btrfs_node_blockptr(parent, slot),
1902                         btrfs_header_bytenr(child));
1903         }
1904         if (btrfs_node_ptr_generation(parent, slot) !=
1905             btrfs_header_generation(child)) {
1906                 ret = -EINVAL;
1907                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908                         btrfs_header_generation(child),
1909                         btrfs_node_ptr_generation(parent, slot));
1910         }
1911         return ret;
1912 }
1913
1914 struct node_refs {
1915         u64 bytenr[BTRFS_MAX_LEVEL];
1916         u64 refs[BTRFS_MAX_LEVEL];
1917 };
1918
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920                           struct walk_control *wc, int *level,
1921                           struct node_refs *nrefs)
1922 {
1923         enum btrfs_tree_block_status status;
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         struct extent_buffer *next;
1927         struct extent_buffer *cur;
1928         u32 blocksize;
1929         int ret, err = 0;
1930         u64 refs;
1931
1932         WARN_ON(*level < 0);
1933         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1934
1935         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936                 refs = nrefs->refs[*level];
1937                 ret = 0;
1938         } else {
1939                 ret = btrfs_lookup_extent_info(NULL, root,
1940                                        path->nodes[*level]->start,
1941                                        *level, 1, &refs, NULL);
1942                 if (ret < 0) {
1943                         err = ret;
1944                         goto out;
1945                 }
1946                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947                 nrefs->refs[*level] = refs;
1948         }
1949
1950         if (refs > 1) {
1951                 ret = enter_shared_node(root, path->nodes[*level]->start,
1952                                         refs, wc, *level);
1953                 if (ret > 0) {
1954                         err = ret;
1955                         goto out;
1956                 }
1957         }
1958
1959         while (*level >= 0) {
1960                 WARN_ON(*level < 0);
1961                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962                 cur = path->nodes[*level];
1963
1964                 if (btrfs_header_level(cur) != *level)
1965                         WARN_ON(1);
1966
1967                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1968                         break;
1969                 if (*level == 0) {
1970                         ret = process_one_leaf(root, cur, wc);
1971                         if (ret < 0)
1972                                 err = ret;
1973                         break;
1974                 }
1975                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977                 blocksize = root->nodesize;
1978
1979                 if (bytenr == nrefs->bytenr[*level - 1]) {
1980                         refs = nrefs->refs[*level - 1];
1981                 } else {
1982                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983                                         *level - 1, 1, &refs, NULL);
1984                         if (ret < 0) {
1985                                 refs = 0;
1986                         } else {
1987                                 nrefs->bytenr[*level - 1] = bytenr;
1988                                 nrefs->refs[*level - 1] = refs;
1989                         }
1990                 }
1991
1992                 if (refs > 1) {
1993                         ret = enter_shared_node(root, bytenr, refs,
1994                                                 wc, *level - 1);
1995                         if (ret > 0) {
1996                                 path->slots[*level]++;
1997                                 continue;
1998                         }
1999                 }
2000
2001                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003                         free_extent_buffer(next);
2004                         reada_walk_down(root, cur, path->slots[*level]);
2005                         next = read_tree_block(root, bytenr, blocksize,
2006                                                ptr_gen);
2007                         if (!extent_buffer_uptodate(next)) {
2008                                 struct btrfs_key node_key;
2009
2010                                 btrfs_node_key_to_cpu(path->nodes[*level],
2011                                                       &node_key,
2012                                                       path->slots[*level]);
2013                                 btrfs_add_corrupt_extent_record(root->fs_info,
2014                                                 &node_key,
2015                                                 path->nodes[*level]->start,
2016                                                 root->nodesize, *level);
2017                                 err = -EIO;
2018                                 goto out;
2019                         }
2020                 }
2021
2022                 ret = check_child_node(root, cur, path->slots[*level], next);
2023                 if (ret) {
2024                         err = ret;
2025                         goto out;
2026                 }
2027
2028                 if (btrfs_is_leaf(next))
2029                         status = btrfs_check_leaf(root, NULL, next);
2030                 else
2031                         status = btrfs_check_node(root, NULL, next);
2032                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033                         free_extent_buffer(next);
2034                         err = -EIO;
2035                         goto out;
2036                 }
2037
2038                 *level = *level - 1;
2039                 free_extent_buffer(path->nodes[*level]);
2040                 path->nodes[*level] = next;
2041                 path->slots[*level] = 0;
2042         }
2043 out:
2044         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2045         return err;
2046 }
2047
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049                         struct walk_control *wc, int *level)
2050 {
2051         int i;
2052         struct extent_buffer *leaf;
2053
2054         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055                 leaf = path->nodes[i];
2056                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2057                         path->slots[i]++;
2058                         *level = i;
2059                         return 0;
2060                 } else {
2061                         free_extent_buffer(path->nodes[*level]);
2062                         path->nodes[*level] = NULL;
2063                         BUG_ON(*level > wc->active_node);
2064                         if (*level == wc->active_node)
2065                                 leave_shared_node(root, wc, *level);
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 static int check_root_dir(struct inode_record *rec)
2073 {
2074         struct inode_backref *backref;
2075         int ret = -1;
2076
2077         if (!rec->found_inode_item || rec->errors)
2078                 goto out;
2079         if (rec->nlink != 1 || rec->found_link != 0)
2080                 goto out;
2081         if (list_empty(&rec->backrefs))
2082                 goto out;
2083         backref = to_inode_backref(rec->backrefs.next);
2084         if (!backref->found_inode_ref)
2085                 goto out;
2086         if (backref->index != 0 || backref->namelen != 2 ||
2087             memcmp(backref->name, "..", 2))
2088                 goto out;
2089         if (backref->found_dir_index || backref->found_dir_item)
2090                 goto out;
2091         ret = 0;
2092 out:
2093         return ret;
2094 }
2095
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097                               struct btrfs_root *root, struct btrfs_path *path,
2098                               struct inode_record *rec)
2099 {
2100         struct btrfs_inode_item *ei;
2101         struct btrfs_key key;
2102         int ret;
2103
2104         key.objectid = rec->ino;
2105         key.type = BTRFS_INODE_ITEM_KEY;
2106         key.offset = (u64)-1;
2107
2108         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2109         if (ret < 0)
2110                 goto out;
2111         if (ret) {
2112                 if (!path->slots[0]) {
2113                         ret = -ENOENT;
2114                         goto out;
2115                 }
2116                 path->slots[0]--;
2117                 ret = 0;
2118         }
2119         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120         if (key.objectid != rec->ino) {
2121                 ret = -ENOENT;
2122                 goto out;
2123         }
2124
2125         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126                             struct btrfs_inode_item);
2127         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128         btrfs_mark_buffer_dirty(path->nodes[0]);
2129         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131                root->root_key.objectid);
2132 out:
2133         btrfs_release_path(path);
2134         return ret;
2135 }
2136
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138                                     struct btrfs_root *root,
2139                                     struct btrfs_path *path,
2140                                     struct inode_record *rec)
2141 {
2142         int ret;
2143
2144         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145         btrfs_release_path(path);
2146         if (!ret)
2147                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2148         return ret;
2149 }
2150
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152                                struct btrfs_root *root,
2153                                struct btrfs_path *path,
2154                                struct inode_record *rec)
2155 {
2156         struct btrfs_inode_item *ei;
2157         struct btrfs_key key;
2158         int ret = 0;
2159
2160         key.objectid = rec->ino;
2161         key.type = BTRFS_INODE_ITEM_KEY;
2162         key.offset = 0;
2163
2164         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2165         if (ret) {
2166                 if (ret > 0)
2167                         ret = -ENOENT;
2168                 goto out;
2169         }
2170
2171         /* Since ret == 0, no need to check anything */
2172         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173                             struct btrfs_inode_item);
2174         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175         btrfs_mark_buffer_dirty(path->nodes[0]);
2176         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177         printf("reset nbytes for ino %llu root %llu\n",
2178                rec->ino, root->root_key.objectid);
2179 out:
2180         btrfs_release_path(path);
2181         return ret;
2182 }
2183
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185                                  struct cache_tree *inode_cache,
2186                                  struct inode_record *rec,
2187                                  struct inode_backref *backref)
2188 {
2189         struct btrfs_path path;
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_dir_item *dir_item;
2192         struct extent_buffer *leaf;
2193         struct btrfs_key key;
2194         struct btrfs_disk_key disk_key;
2195         struct inode_record *dir_rec;
2196         unsigned long name_ptr;
2197         u32 data_size = sizeof(*dir_item) + backref->namelen;
2198         int ret;
2199
2200         trans = btrfs_start_transaction(root, 1);
2201         if (IS_ERR(trans))
2202                 return PTR_ERR(trans);
2203
2204         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205                 (unsigned long long)rec->ino);
2206
2207         btrfs_init_path(&path);
2208         key.objectid = backref->dir;
2209         key.type = BTRFS_DIR_INDEX_KEY;
2210         key.offset = backref->index;
2211         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2212         BUG_ON(ret);
2213
2214         leaf = path.nodes[0];
2215         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2216
2217         disk_key.objectid = cpu_to_le64(rec->ino);
2218         disk_key.type = BTRFS_INODE_ITEM_KEY;
2219         disk_key.offset = 0;
2220
2221         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223         btrfs_set_dir_data_len(leaf, dir_item, 0);
2224         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225         name_ptr = (unsigned long)(dir_item + 1);
2226         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227         btrfs_mark_buffer_dirty(leaf);
2228         btrfs_release_path(&path);
2229         btrfs_commit_transaction(trans, root);
2230
2231         backref->found_dir_index = 1;
2232         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233         BUG_ON(IS_ERR(dir_rec));
2234         if (!dir_rec)
2235                 return 0;
2236         dir_rec->found_size += backref->namelen;
2237         if (dir_rec->found_size == dir_rec->isize &&
2238             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240         if (dir_rec->found_size != dir_rec->isize)
2241                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2242
2243         return 0;
2244 }
2245
2246 static int delete_dir_index(struct btrfs_root *root,
2247                             struct cache_tree *inode_cache,
2248                             struct inode_record *rec,
2249                             struct inode_backref *backref)
2250 {
2251         struct btrfs_trans_handle *trans;
2252         struct btrfs_dir_item *di;
2253         struct btrfs_path path;
2254         int ret = 0;
2255
2256         trans = btrfs_start_transaction(root, 1);
2257         if (IS_ERR(trans))
2258                 return PTR_ERR(trans);
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         btrfs_init_path(&path);
2266         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267                                     backref->name, backref->namelen,
2268                                     backref->index, -1);
2269         if (IS_ERR(di)) {
2270                 ret = PTR_ERR(di);
2271                 btrfs_release_path(&path);
2272                 btrfs_commit_transaction(trans, root);
2273                 if (ret == -ENOENT)
2274                         return 0;
2275                 return ret;
2276         }
2277
2278         if (!di)
2279                 ret = btrfs_del_item(trans, root, &path);
2280         else
2281                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2282         BUG_ON(ret);
2283         btrfs_release_path(&path);
2284         btrfs_commit_transaction(trans, root);
2285         return ret;
2286 }
2287
2288 static int create_inode_item(struct btrfs_root *root,
2289                              struct inode_record *rec,
2290                              struct inode_backref *backref, int root_dir)
2291 {
2292         struct btrfs_trans_handle *trans;
2293         struct btrfs_inode_item inode_item;
2294         time_t now = time(NULL);
2295         int ret;
2296
2297         trans = btrfs_start_transaction(root, 1);
2298         if (IS_ERR(trans)) {
2299                 ret = PTR_ERR(trans);
2300                 return ret;
2301         }
2302
2303         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304                 "be incomplete, please check permissions and content after "
2305                 "the fsck completes.\n", (unsigned long long)root->objectid,
2306                 (unsigned long long)rec->ino);
2307
2308         memset(&inode_item, 0, sizeof(inode_item));
2309         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2310         if (root_dir)
2311                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2312         else
2313                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315         if (rec->found_dir_item) {
2316                 if (rec->found_file_extent)
2317                         fprintf(stderr, "root %llu inode %llu has both a dir "
2318                                 "item and extents, unsure if it is a dir or a "
2319                                 "regular file so setting it as a directory\n",
2320                                 (unsigned long long)root->objectid,
2321                                 (unsigned long long)rec->ino);
2322                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324         } else if (!rec->found_dir_item) {
2325                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2327         }
2328         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2336
2337         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2338         BUG_ON(ret);
2339         btrfs_commit_transaction(trans, root);
2340         return 0;
2341 }
2342
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344                                  struct inode_record *rec,
2345                                  struct cache_tree *inode_cache,
2346                                  int delete)
2347 {
2348         struct inode_backref *tmp, *backref;
2349         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2350         int ret = 0;
2351         int repaired = 0;
2352
2353         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354                 if (!delete && rec->ino == root_dirid) {
2355                         if (!rec->found_inode_item) {
2356                                 ret = create_inode_item(root, rec, backref, 1);
2357                                 if (ret)
2358                                         break;
2359                                 repaired++;
2360                         }
2361                 }
2362
2363                 /* Index 0 for root dir's are special, don't mess with it */
2364                 if (rec->ino == root_dirid && backref->index == 0)
2365                         continue;
2366
2367                 if (delete &&
2368                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2369                      (backref->found_dir_index && backref->found_inode_ref &&
2370                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371                         ret = delete_dir_index(root, inode_cache, rec, backref);
2372                         if (ret)
2373                                 break;
2374                         repaired++;
2375                         list_del(&backref->list);
2376                         free(backref);
2377                 }
2378
2379                 if (!delete && !backref->found_dir_index &&
2380                     backref->found_dir_item && backref->found_inode_ref) {
2381                         ret = add_missing_dir_index(root, inode_cache, rec,
2382                                                     backref);
2383                         if (ret)
2384                                 break;
2385                         repaired++;
2386                         if (backref->found_dir_item &&
2387                             backref->found_dir_index &&
2388                             backref->found_dir_index) {
2389                                 if (!backref->errors &&
2390                                     backref->found_inode_ref) {
2391                                         list_del(&backref->list);
2392                                         free(backref);
2393                                 }
2394                         }
2395                 }
2396
2397                 if (!delete && (!backref->found_dir_index &&
2398                                 !backref->found_dir_item &&
2399                                 backref->found_inode_ref)) {
2400                         struct btrfs_trans_handle *trans;
2401                         struct btrfs_key location;
2402
2403                         ret = check_dir_conflict(root, backref->name,
2404                                                  backref->namelen,
2405                                                  backref->dir,
2406                                                  backref->index);
2407                         if (ret) {
2408                                 /*
2409                                  * let nlink fixing routine to handle it,
2410                                  * which can do it better.
2411                                  */
2412                                 ret = 0;
2413                                 break;
2414                         }
2415                         location.objectid = rec->ino;
2416                         location.type = BTRFS_INODE_ITEM_KEY;
2417                         location.offset = 0;
2418
2419                         trans = btrfs_start_transaction(root, 1);
2420                         if (IS_ERR(trans)) {
2421                                 ret = PTR_ERR(trans);
2422                                 break;
2423                         }
2424                         fprintf(stderr, "adding missing dir index/item pair "
2425                                 "for inode %llu\n",
2426                                 (unsigned long long)rec->ino);
2427                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2428                                                     backref->namelen,
2429                                                     backref->dir, &location,
2430                                                     imode_to_type(rec->imode),
2431                                                     backref->index);
2432                         BUG_ON(ret);
2433                         btrfs_commit_transaction(trans, root);
2434                         repaired++;
2435                 }
2436
2437                 if (!delete && (backref->found_inode_ref &&
2438                                 backref->found_dir_index &&
2439                                 backref->found_dir_item &&
2440                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441                                 !rec->found_inode_item)) {
2442                         ret = create_inode_item(root, rec, backref, 0);
2443                         if (ret)
2444                                 break;
2445                         repaired++;
2446                 }
2447
2448         }
2449         return ret ? ret : repaired;
2450 }
2451
2452 /*
2453  * To determine the file type for nlink/inode_item repair
2454  *
2455  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456  * Return -ENOENT if file type is not found.
2457  */
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2459 {
2460         struct inode_backref *backref;
2461
2462         /* For inode item recovered case */
2463         if (rec->found_inode_item) {
2464                 *type = imode_to_type(rec->imode);
2465                 return 0;
2466         }
2467
2468         list_for_each_entry(backref, &rec->backrefs, list) {
2469                 if (backref->found_dir_index || backref->found_dir_item) {
2470                         *type = backref->filetype;
2471                         return 0;
2472                 }
2473         }
2474         return -ENOENT;
2475 }
2476
2477 /*
2478  * To determine the file name for nlink repair
2479  *
2480  * Return 0 if file name is found, set name and namelen.
2481  * Return -ENOENT if file name is not found.
2482  */
2483 static int find_file_name(struct inode_record *rec,
2484                           char *name, int *namelen)
2485 {
2486         struct inode_backref *backref;
2487
2488         list_for_each_entry(backref, &rec->backrefs, list) {
2489                 if (backref->found_dir_index || backref->found_dir_item ||
2490                     backref->found_inode_ref) {
2491                         memcpy(name, backref->name, backref->namelen);
2492                         *namelen = backref->namelen;
2493                         return 0;
2494                 }
2495         }
2496         return -ENOENT;
2497 }
2498
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501                        struct btrfs_root *root,
2502                        struct btrfs_path *path,
2503                        struct inode_record *rec)
2504 {
2505         struct inode_backref *backref;
2506         struct inode_backref *tmp;
2507         struct btrfs_key key;
2508         struct btrfs_inode_item *inode_item;
2509         int ret = 0;
2510
2511         /* We don't believe this either, reset it and iterate backref */
2512         rec->found_link = 0;
2513
2514         /* Remove all backref including the valid ones */
2515         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517                                    backref->index, backref->name,
2518                                    backref->namelen, 0);
2519                 if (ret < 0)
2520                         goto out;
2521
2522                 /* remove invalid backref, so it won't be added back */
2523                 if (!(backref->found_dir_index &&
2524                       backref->found_dir_item &&
2525                       backref->found_inode_ref)) {
2526                         list_del(&backref->list);
2527                         free(backref);
2528                 } else {
2529                         rec->found_link++;
2530                 }
2531         }
2532
2533         /* Set nlink to 0 */
2534         key.objectid = rec->ino;
2535         key.type = BTRFS_INODE_ITEM_KEY;
2536         key.offset = 0;
2537         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2538         if (ret < 0)
2539                 goto out;
2540         if (ret > 0) {
2541                 ret = -ENOENT;
2542                 goto out;
2543         }
2544         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545                                     struct btrfs_inode_item);
2546         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547         btrfs_mark_buffer_dirty(path->nodes[0]);
2548         btrfs_release_path(path);
2549
2550         /*
2551          * Add back valid inode_ref/dir_item/dir_index,
2552          * add_link() will handle the nlink inc, so new nlink must be correct
2553          */
2554         list_for_each_entry(backref, &rec->backrefs, list) {
2555                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556                                      backref->name, backref->namelen,
2557                                      backref->filetype, &backref->index, 1);
2558                 if (ret < 0)
2559                         goto out;
2560         }
2561 out:
2562         btrfs_release_path(path);
2563         return ret;
2564 }
2565
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567                                struct btrfs_root *root,
2568                                struct btrfs_path *path,
2569                                struct inode_record *rec)
2570 {
2571         char *dir_name = "lost+found";
2572         char namebuf[BTRFS_NAME_LEN] = {0};
2573         u64 lost_found_ino;
2574         u32 mode = 0700;
2575         u8 type = 0;
2576         int namelen = 0;
2577         int name_recovered = 0;
2578         int type_recovered = 0;
2579         int ret = 0;
2580
2581         /*
2582          * Get file name and type first before these invalid inode ref
2583          * are deleted by remove_all_invalid_backref()
2584          */
2585         name_recovered = !find_file_name(rec, namebuf, &namelen);
2586         type_recovered = !find_file_type(rec, &type);
2587
2588         if (!name_recovered) {
2589                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590                        rec->ino, rec->ino);
2591                 namelen = count_digits(rec->ino);
2592                 sprintf(namebuf, "%llu", rec->ino);
2593                 name_recovered = 1;
2594         }
2595         if (!type_recovered) {
2596                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2597                        rec->ino);
2598                 type = BTRFS_FT_REG_FILE;
2599                 type_recovered = 1;
2600         }
2601
2602         ret = reset_nlink(trans, root, path, rec);
2603         if (ret < 0) {
2604                 fprintf(stderr,
2605                         "Failed to reset nlink for inode %llu: %s\n",
2606                         rec->ino, strerror(-ret));
2607                 goto out;
2608         }
2609
2610         if (rec->found_link == 0) {
2611                 lost_found_ino = root->highest_inode;
2612                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2613                         ret = -EOVERFLOW;
2614                         goto out;
2615                 }
2616                 lost_found_ino++;
2617                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2619                                   mode);
2620                 if (ret < 0) {
2621                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622                                 dir_name, strerror(-ret));
2623                         goto out;
2624                 }
2625                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626                                      namebuf, namelen, type, NULL, 1);
2627                 /*
2628                  * Add ".INO" suffix several times to handle case where
2629                  * "FILENAME.INO" is already taken by another file.
2630                  */
2631                 while (ret == -EEXIST) {
2632                         /*
2633                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2634                          */
2635                         if (namelen + count_digits(rec->ino) + 1 >
2636                             BTRFS_NAME_LEN) {
2637                                 ret = -EFBIG;
2638                                 goto out;
2639                         }
2640                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2641                                  ".%llu", rec->ino);
2642                         namelen += count_digits(rec->ino) + 1;
2643                         ret = btrfs_add_link(trans, root, rec->ino,
2644                                              lost_found_ino, namebuf,
2645                                              namelen, type, NULL, 1);
2646                 }
2647                 if (ret < 0) {
2648                         fprintf(stderr,
2649                                 "Failed to link the inode %llu to %s dir: %s\n",
2650                                 rec->ino, dir_name, strerror(-ret));
2651                         goto out;
2652                 }
2653                 /*
2654                  * Just increase the found_link, don't actually add the
2655                  * backref. This will make things easier and this inode
2656                  * record will be freed after the repair is done.
2657                  * So fsck will not report problem about this inode.
2658                  */
2659                 rec->found_link++;
2660                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661                        namelen, namebuf, dir_name);
2662         }
2663         printf("Fixed the nlink of inode %llu\n", rec->ino);
2664 out:
2665         /*
2666          * Clear the flag anyway, or we will loop forever for the same inode
2667          * as it will not be removed from the bad inode list and the dead loop
2668          * happens.
2669          */
2670         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671         btrfs_release_path(path);
2672         return ret;
2673 }
2674
2675 /*
2676  * Check if there is any normal(reg or prealloc) file extent for given
2677  * ino.
2678  * This is used to determine the file type when neither its dir_index/item or
2679  * inode_item exists.
2680  *
2681  * This will *NOT* report error, if any error happens, just consider it does
2682  * not have any normal file extent.
2683  */
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2685 {
2686         struct btrfs_path path;
2687         struct btrfs_key key;
2688         struct btrfs_key found_key;
2689         struct btrfs_file_extent_item *fi;
2690         u8 type;
2691         int ret = 0;
2692
2693         btrfs_init_path(&path);
2694         key.objectid = ino;
2695         key.type = BTRFS_EXTENT_DATA_KEY;
2696         key.offset = 0;
2697
2698         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2699         if (ret < 0) {
2700                 ret = 0;
2701                 goto out;
2702         }
2703         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2704                 ret = btrfs_next_leaf(root, &path);
2705                 if (ret) {
2706                         ret = 0;
2707                         goto out;
2708                 }
2709         }
2710         while (1) {
2711                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2712                                       path.slots[0]);
2713                 if (found_key.objectid != ino ||
2714                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2715                         break;
2716                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2717                                     struct btrfs_file_extent_item);
2718                 type = btrfs_file_extent_type(path.nodes[0], fi);
2719                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2720                         ret = 1;
2721                         goto out;
2722                 }
2723         }
2724 out:
2725         btrfs_release_path(&path);
2726         return ret;
2727 }
2728
2729 static u32 btrfs_type_to_imode(u8 type)
2730 {
2731         static u32 imode_by_btrfs_type[] = {
2732                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2733                 [BTRFS_FT_DIR]          = S_IFDIR,
2734                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2735                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2736                 [BTRFS_FT_FIFO]         = S_IFIFO,
2737                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2738                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2739         };
2740
2741         return imode_by_btrfs_type[(type)];
2742 }
2743
2744 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2745                                 struct btrfs_root *root,
2746                                 struct btrfs_path *path,
2747                                 struct inode_record *rec)
2748 {
2749         u8 filetype;
2750         u32 mode = 0700;
2751         int type_recovered = 0;
2752         int ret = 0;
2753
2754         printf("Trying to rebuild inode:%llu\n", rec->ino);
2755
2756         type_recovered = !find_file_type(rec, &filetype);
2757
2758         /*
2759          * Try to determine inode type if type not found.
2760          *
2761          * For found regular file extent, it must be FILE.
2762          * For found dir_item/index, it must be DIR.
2763          *
2764          * For undetermined one, use FILE as fallback.
2765          *
2766          * TODO:
2767          * 1. If found backref(inode_index/item is already handled) to it,
2768          *    it must be DIR.
2769          *    Need new inode-inode ref structure to allow search for that.
2770          */
2771         if (!type_recovered) {
2772                 if (rec->found_file_extent &&
2773                     find_normal_file_extent(root, rec->ino)) {
2774                         type_recovered = 1;
2775                         filetype = BTRFS_FT_REG_FILE;
2776                 } else if (rec->found_dir_item) {
2777                         type_recovered = 1;
2778                         filetype = BTRFS_FT_DIR;
2779                 } else if (!list_empty(&rec->orphan_extents)) {
2780                         type_recovered = 1;
2781                         filetype = BTRFS_FT_REG_FILE;
2782                 } else{
2783                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2784                                rec->ino);
2785                         type_recovered = 1;
2786                         filetype = BTRFS_FT_REG_FILE;
2787                 }
2788         }
2789
2790         ret = btrfs_new_inode(trans, root, rec->ino,
2791                               mode | btrfs_type_to_imode(filetype));
2792         if (ret < 0)
2793                 goto out;
2794
2795         /*
2796          * Here inode rebuild is done, we only rebuild the inode item,
2797          * don't repair the nlink(like move to lost+found).
2798          * That is the job of nlink repair.
2799          *
2800          * We just fill the record and return
2801          */
2802         rec->found_dir_item = 1;
2803         rec->imode = mode | btrfs_type_to_imode(filetype);
2804         rec->nlink = 0;
2805         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2806         /* Ensure the inode_nlinks repair function will be called */
2807         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2808 out:
2809         return ret;
2810 }
2811
2812 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2813                                       struct btrfs_root *root,
2814                                       struct btrfs_path *path,
2815                                       struct inode_record *rec)
2816 {
2817         struct orphan_data_extent *orphan;
2818         struct orphan_data_extent *tmp;
2819         int ret = 0;
2820
2821         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2822                 /*
2823                  * Check for conflicting file extents
2824                  *
2825                  * Here we don't know whether the extents is compressed or not,
2826                  * so we can only assume it not compressed nor data offset,
2827                  * and use its disk_len as extent length.
2828                  */
2829                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2830                                        orphan->offset, orphan->disk_len, 0);
2831                 btrfs_release_path(path);
2832                 if (ret < 0)
2833                         goto out;
2834                 if (!ret) {
2835                         fprintf(stderr,
2836                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2837                                 orphan->disk_bytenr, orphan->disk_len);
2838                         ret = btrfs_free_extent(trans,
2839                                         root->fs_info->extent_root,
2840                                         orphan->disk_bytenr, orphan->disk_len,
2841                                         0, root->objectid, orphan->objectid,
2842                                         orphan->offset);
2843                         if (ret < 0)
2844                                 goto out;
2845                 }
2846                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2847                                 orphan->offset, orphan->disk_bytenr,
2848                                 orphan->disk_len, orphan->disk_len);
2849                 if (ret < 0)
2850                         goto out;
2851
2852                 /* Update file size info */
2853                 rec->found_size += orphan->disk_len;
2854                 if (rec->found_size == rec->nbytes)
2855                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2856
2857                 /* Update the file extent hole info too */
2858                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2859                                            orphan->disk_len);
2860                 if (ret < 0)
2861                         goto out;
2862                 if (RB_EMPTY_ROOT(&rec->holes))
2863                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2864
2865                 list_del(&orphan->list);
2866                 free(orphan);
2867         }
2868         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2869 out:
2870         return ret;
2871 }
2872
2873 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2874                                         struct btrfs_root *root,
2875                                         struct btrfs_path *path,
2876                                         struct inode_record *rec)
2877 {
2878         struct rb_node *node;
2879         struct file_extent_hole *hole;
2880         int found = 0;
2881         int ret = 0;
2882
2883         node = rb_first(&rec->holes);
2884
2885         while (node) {
2886                 found = 1;
2887                 hole = rb_entry(node, struct file_extent_hole, node);
2888                 ret = btrfs_punch_hole(trans, root, rec->ino,
2889                                        hole->start, hole->len);
2890                 if (ret < 0)
2891                         goto out;
2892                 ret = del_file_extent_hole(&rec->holes, hole->start,
2893                                            hole->len);
2894                 if (ret < 0)
2895                         goto out;
2896                 if (RB_EMPTY_ROOT(&rec->holes))
2897                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898                 node = rb_first(&rec->holes);
2899         }
2900         /* special case for a file losing all its file extent */
2901         if (!found) {
2902                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2903                                        round_up(rec->isize, root->sectorsize));
2904                 if (ret < 0)
2905                         goto out;
2906         }
2907         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2908                rec->ino, root->objectid);
2909 out:
2910         return ret;
2911 }
2912
2913 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2914 {
2915         struct btrfs_trans_handle *trans;
2916         struct btrfs_path *path;
2917         int ret = 0;
2918
2919         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2920                              I_ERR_NO_ORPHAN_ITEM |
2921                              I_ERR_LINK_COUNT_WRONG |
2922                              I_ERR_NO_INODE_ITEM |
2923                              I_ERR_FILE_EXTENT_ORPHAN |
2924                              I_ERR_FILE_EXTENT_DISCOUNT|
2925                              I_ERR_FILE_NBYTES_WRONG)))
2926                 return rec->errors;
2927
2928         path = btrfs_alloc_path();
2929         if (!path)
2930                 return -ENOMEM;
2931
2932         /*
2933          * For nlink repair, it may create a dir and add link, so
2934          * 2 for parent(256)'s dir_index and dir_item
2935          * 2 for lost+found dir's inode_item and inode_ref
2936          * 1 for the new inode_ref of the file
2937          * 2 for lost+found dir's dir_index and dir_item for the file
2938          */
2939         trans = btrfs_start_transaction(root, 7);
2940         if (IS_ERR(trans)) {
2941                 btrfs_free_path(path);
2942                 return PTR_ERR(trans);
2943         }
2944
2945         if (rec->errors & I_ERR_NO_INODE_ITEM)
2946                 ret = repair_inode_no_item(trans, root, path, rec);
2947         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2948                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2949         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2950                 ret = repair_inode_discount_extent(trans, root, path, rec);
2951         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2952                 ret = repair_inode_isize(trans, root, path, rec);
2953         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2954                 ret = repair_inode_orphan_item(trans, root, path, rec);
2955         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2956                 ret = repair_inode_nlinks(trans, root, path, rec);
2957         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2958                 ret = repair_inode_nbytes(trans, root, path, rec);
2959         btrfs_commit_transaction(trans, root);
2960         btrfs_free_path(path);
2961         return ret;
2962 }
2963
2964 static int check_inode_recs(struct btrfs_root *root,
2965                             struct cache_tree *inode_cache)
2966 {
2967         struct cache_extent *cache;
2968         struct ptr_node *node;
2969         struct inode_record *rec;
2970         struct inode_backref *backref;
2971         int stage = 0;
2972         int ret = 0;
2973         int err = 0;
2974         u64 error = 0;
2975         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2976
2977         if (btrfs_root_refs(&root->root_item) == 0) {
2978                 if (!cache_tree_empty(inode_cache))
2979                         fprintf(stderr, "warning line %d\n", __LINE__);
2980                 return 0;
2981         }
2982
2983         /*
2984          * We need to record the highest inode number for later 'lost+found'
2985          * dir creation.
2986          * We must select an ino not used/referred by any existing inode, or
2987          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2988          * this may cause 'lost+found' dir has wrong nlinks.
2989          */
2990         cache = last_cache_extent(inode_cache);
2991         if (cache) {
2992                 node = container_of(cache, struct ptr_node, cache);
2993                 rec = node->data;
2994                 if (rec->ino > root->highest_inode)
2995                         root->highest_inode = rec->ino;
2996         }
2997
2998         /*
2999          * We need to repair backrefs first because we could change some of the
3000          * errors in the inode recs.
3001          *
3002          * We also need to go through and delete invalid backrefs first and then
3003          * add the correct ones second.  We do this because we may get EEXIST
3004          * when adding back the correct index because we hadn't yet deleted the
3005          * invalid index.
3006          *
3007          * For example, if we were missing a dir index then the directories
3008          * isize would be wrong, so if we fixed the isize to what we thought it
3009          * would be and then fixed the backref we'd still have a invalid fs, so
3010          * we need to add back the dir index and then check to see if the isize
3011          * is still wrong.
3012          */
3013         while (stage < 3) {
3014                 stage++;
3015                 if (stage == 3 && !err)
3016                         break;
3017
3018                 cache = search_cache_extent(inode_cache, 0);
3019                 while (repair && cache) {
3020                         node = container_of(cache, struct ptr_node, cache);
3021                         rec = node->data;
3022                         cache = next_cache_extent(cache);
3023
3024                         /* Need to free everything up and rescan */
3025                         if (stage == 3) {
3026                                 remove_cache_extent(inode_cache, &node->cache);
3027                                 free(node);
3028                                 free_inode_rec(rec);
3029                                 continue;
3030                         }
3031
3032                         if (list_empty(&rec->backrefs))
3033                                 continue;
3034
3035                         ret = repair_inode_backrefs(root, rec, inode_cache,
3036                                                     stage == 1);
3037                         if (ret < 0) {
3038                                 err = ret;
3039                                 stage = 2;
3040                                 break;
3041                         } if (ret > 0) {
3042                                 err = -EAGAIN;
3043                         }
3044                 }
3045         }
3046         if (err)
3047                 return err;
3048
3049         rec = get_inode_rec(inode_cache, root_dirid, 0);
3050         BUG_ON(IS_ERR(rec));
3051         if (rec) {
3052                 ret = check_root_dir(rec);
3053                 if (ret) {
3054                         fprintf(stderr, "root %llu root dir %llu error\n",
3055                                 (unsigned long long)root->root_key.objectid,
3056                                 (unsigned long long)root_dirid);
3057                         print_inode_error(root, rec);
3058                         error++;
3059                 }
3060         } else {
3061                 if (repair) {
3062                         struct btrfs_trans_handle *trans;
3063
3064                         trans = btrfs_start_transaction(root, 1);
3065                         if (IS_ERR(trans)) {
3066                                 err = PTR_ERR(trans);
3067                                 return err;
3068                         }
3069
3070                         fprintf(stderr,
3071                                 "root %llu missing its root dir, recreating\n",
3072                                 (unsigned long long)root->objectid);
3073
3074                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3075                         BUG_ON(ret);
3076
3077                         btrfs_commit_transaction(trans, root);
3078                         return -EAGAIN;
3079                 }
3080
3081                 fprintf(stderr, "root %llu root dir %llu not found\n",
3082                         (unsigned long long)root->root_key.objectid,
3083                         (unsigned long long)root_dirid);
3084         }
3085
3086         while (1) {
3087                 cache = search_cache_extent(inode_cache, 0);
3088                 if (!cache)
3089                         break;
3090                 node = container_of(cache, struct ptr_node, cache);
3091                 rec = node->data;
3092                 remove_cache_extent(inode_cache, &node->cache);
3093                 free(node);
3094                 if (rec->ino == root_dirid ||
3095                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3096                         free_inode_rec(rec);
3097                         continue;
3098                 }
3099
3100                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3101                         ret = check_orphan_item(root, rec->ino);
3102                         if (ret == 0)
3103                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3104                         if (can_free_inode_rec(rec)) {
3105                                 free_inode_rec(rec);
3106                                 continue;
3107                         }
3108                 }
3109
3110                 if (!rec->found_inode_item)
3111                         rec->errors |= I_ERR_NO_INODE_ITEM;
3112                 if (rec->found_link != rec->nlink)
3113                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3114                 if (repair) {
3115                         ret = try_repair_inode(root, rec);
3116                         if (ret == 0 && can_free_inode_rec(rec)) {
3117                                 free_inode_rec(rec);
3118                                 continue;
3119                         }
3120                         ret = 0;
3121                 }
3122
3123                 if (!(repair && ret == 0))
3124                         error++;
3125                 print_inode_error(root, rec);
3126                 list_for_each_entry(backref, &rec->backrefs, list) {
3127                         if (!backref->found_dir_item)
3128                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3129                         if (!backref->found_dir_index)
3130                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3131                         if (!backref->found_inode_ref)
3132                                 backref->errors |= REF_ERR_NO_INODE_REF;
3133                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3134                                 " namelen %u name %s filetype %d errors %x",
3135                                 (unsigned long long)backref->dir,
3136                                 (unsigned long long)backref->index,
3137                                 backref->namelen, backref->name,
3138                                 backref->filetype, backref->errors);
3139                         print_ref_error(backref->errors);
3140                 }
3141                 free_inode_rec(rec);
3142         }
3143         return (error > 0) ? -1 : 0;
3144 }
3145
3146 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3147                                         u64 objectid)
3148 {
3149         struct cache_extent *cache;
3150         struct root_record *rec = NULL;
3151         int ret;
3152
3153         cache = lookup_cache_extent(root_cache, objectid, 1);
3154         if (cache) {
3155                 rec = container_of(cache, struct root_record, cache);
3156         } else {
3157                 rec = calloc(1, sizeof(*rec));
3158                 if (!rec)
3159                         return ERR_PTR(-ENOMEM);
3160                 rec->objectid = objectid;
3161                 INIT_LIST_HEAD(&rec->backrefs);
3162                 rec->cache.start = objectid;
3163                 rec->cache.size = 1;
3164
3165                 ret = insert_cache_extent(root_cache, &rec->cache);
3166                 if (ret)
3167                         return ERR_PTR(-EEXIST);
3168         }
3169         return rec;
3170 }
3171
3172 static struct root_backref *get_root_backref(struct root_record *rec,
3173                                              u64 ref_root, u64 dir, u64 index,
3174                                              const char *name, int namelen)
3175 {
3176         struct root_backref *backref;
3177
3178         list_for_each_entry(backref, &rec->backrefs, list) {
3179                 if (backref->ref_root != ref_root || backref->dir != dir ||
3180                     backref->namelen != namelen)
3181                         continue;
3182                 if (memcmp(name, backref->name, namelen))
3183                         continue;
3184                 return backref;
3185         }
3186
3187         backref = calloc(1, sizeof(*backref) + namelen + 1);
3188         if (!backref)
3189                 return NULL;
3190         backref->ref_root = ref_root;
3191         backref->dir = dir;
3192         backref->index = index;
3193         backref->namelen = namelen;
3194         memcpy(backref->name, name, namelen);
3195         backref->name[namelen] = '\0';
3196         list_add_tail(&backref->list, &rec->backrefs);
3197         return backref;
3198 }
3199
3200 static void free_root_record(struct cache_extent *cache)
3201 {
3202         struct root_record *rec;
3203         struct root_backref *backref;
3204
3205         rec = container_of(cache, struct root_record, cache);
3206         while (!list_empty(&rec->backrefs)) {
3207                 backref = to_root_backref(rec->backrefs.next);
3208                 list_del(&backref->list);
3209                 free(backref);
3210         }
3211
3212         free(rec);
3213 }
3214
3215 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3216
3217 static int add_root_backref(struct cache_tree *root_cache,
3218                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3219                             const char *name, int namelen,
3220                             int item_type, int errors)
3221 {
3222         struct root_record *rec;
3223         struct root_backref *backref;
3224
3225         rec = get_root_rec(root_cache, root_id);
3226         BUG_ON(IS_ERR(rec));
3227         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3228         BUG_ON(!backref);
3229
3230         backref->errors |= errors;
3231
3232         if (item_type != BTRFS_DIR_ITEM_KEY) {
3233                 if (backref->found_dir_index || backref->found_back_ref ||
3234                     backref->found_forward_ref) {
3235                         if (backref->index != index)
3236                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3237                 } else {
3238                         backref->index = index;
3239                 }
3240         }
3241
3242         if (item_type == BTRFS_DIR_ITEM_KEY) {
3243                 if (backref->found_forward_ref)
3244                         rec->found_ref++;
3245                 backref->found_dir_item = 1;
3246         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3247                 backref->found_dir_index = 1;
3248         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3249                 if (backref->found_forward_ref)
3250                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3251                 else if (backref->found_dir_item)
3252                         rec->found_ref++;
3253                 backref->found_forward_ref = 1;
3254         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3255                 if (backref->found_back_ref)
3256                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3257                 backref->found_back_ref = 1;
3258         } else {
3259                 BUG_ON(1);
3260         }
3261
3262         if (backref->found_forward_ref && backref->found_dir_item)
3263                 backref->reachable = 1;
3264         return 0;
3265 }
3266
3267 static int merge_root_recs(struct btrfs_root *root,
3268                            struct cache_tree *src_cache,
3269                            struct cache_tree *dst_cache)
3270 {
3271         struct cache_extent *cache;
3272         struct ptr_node *node;
3273         struct inode_record *rec;
3274         struct inode_backref *backref;
3275         int ret = 0;
3276
3277         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3278                 free_inode_recs_tree(src_cache);
3279                 return 0;
3280         }
3281
3282         while (1) {
3283                 cache = search_cache_extent(src_cache, 0);
3284                 if (!cache)
3285                         break;
3286                 node = container_of(cache, struct ptr_node, cache);
3287                 rec = node->data;
3288                 remove_cache_extent(src_cache, &node->cache);
3289                 free(node);
3290
3291                 ret = is_child_root(root, root->objectid, rec->ino);
3292                 if (ret < 0)
3293                         break;
3294                 else if (ret == 0)
3295                         goto skip;
3296
3297                 list_for_each_entry(backref, &rec->backrefs, list) {
3298                         BUG_ON(backref->found_inode_ref);
3299                         if (backref->found_dir_item)
3300                                 add_root_backref(dst_cache, rec->ino,
3301                                         root->root_key.objectid, backref->dir,
3302                                         backref->index, backref->name,
3303                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3304                                         backref->errors);
3305                         if (backref->found_dir_index)
3306                                 add_root_backref(dst_cache, rec->ino,
3307                                         root->root_key.objectid, backref->dir,
3308                                         backref->index, backref->name,
3309                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3310                                         backref->errors);
3311                 }
3312 skip:
3313                 free_inode_rec(rec);
3314         }
3315         if (ret < 0)
3316                 return ret;
3317         return 0;
3318 }
3319
3320 static int check_root_refs(struct btrfs_root *root,
3321                            struct cache_tree *root_cache)
3322 {
3323         struct root_record *rec;
3324         struct root_record *ref_root;
3325         struct root_backref *backref;
3326         struct cache_extent *cache;
3327         int loop = 1;
3328         int ret;
3329         int error;
3330         int errors = 0;
3331
3332         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3333         BUG_ON(IS_ERR(rec));
3334         rec->found_ref = 1;
3335
3336         /* fixme: this can not detect circular references */
3337         while (loop) {
3338                 loop = 0;
3339                 cache = search_cache_extent(root_cache, 0);
3340                 while (1) {
3341                         if (!cache)
3342                                 break;
3343                         rec = container_of(cache, struct root_record, cache);
3344                         cache = next_cache_extent(cache);
3345
3346                         if (rec->found_ref == 0)
3347                                 continue;
3348
3349                         list_for_each_entry(backref, &rec->backrefs, list) {
3350                                 if (!backref->reachable)
3351                                         continue;
3352
3353                                 ref_root = get_root_rec(root_cache,
3354                                                         backref->ref_root);
3355                                 BUG_ON(IS_ERR(ref_root));
3356                                 if (ref_root->found_ref > 0)
3357                                         continue;
3358
3359                                 backref->reachable = 0;
3360                                 rec->found_ref--;
3361                                 if (rec->found_ref == 0)
3362                                         loop = 1;
3363                         }
3364                 }
3365         }
3366
3367         cache = search_cache_extent(root_cache, 0);
3368         while (1) {
3369                 if (!cache)
3370                         break;
3371                 rec = container_of(cache, struct root_record, cache);
3372                 cache = next_cache_extent(cache);
3373
3374                 if (rec->found_ref == 0 &&
3375                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3376                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3377                         ret = check_orphan_item(root->fs_info->tree_root,
3378                                                 rec->objectid);
3379                         if (ret == 0)
3380                                 continue;
3381
3382                         /*
3383                          * If we don't have a root item then we likely just have
3384                          * a dir item in a snapshot for this root but no actual
3385                          * ref key or anything so it's meaningless.
3386                          */
3387                         if (!rec->found_root_item)
3388                                 continue;
3389                         errors++;
3390                         fprintf(stderr, "fs tree %llu not referenced\n",
3391                                 (unsigned long long)rec->objectid);
3392                 }
3393
3394                 error = 0;
3395                 if (rec->found_ref > 0 && !rec->found_root_item)
3396                         error = 1;
3397                 list_for_each_entry(backref, &rec->backrefs, list) {
3398                         if (!backref->found_dir_item)
3399                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3400                         if (!backref->found_dir_index)
3401                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3402                         if (!backref->found_back_ref)
3403                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3404                         if (!backref->found_forward_ref)
3405                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3406                         if (backref->reachable && backref->errors)
3407                                 error = 1;
3408                 }
3409                 if (!error)
3410                         continue;
3411
3412                 errors++;
3413                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3414                         (unsigned long long)rec->objectid, rec->found_ref,
3415                          rec->found_root_item ? "" : "not found");
3416
3417                 list_for_each_entry(backref, &rec->backrefs, list) {
3418                         if (!backref->reachable)
3419                                 continue;
3420                         if (!backref->errors && rec->found_root_item)
3421                                 continue;
3422                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3423                                 " index %llu namelen %u name %s errors %x\n",
3424                                 (unsigned long long)backref->ref_root,
3425                                 (unsigned long long)backref->dir,
3426                                 (unsigned long long)backref->index,
3427                                 backref->namelen, backref->name,
3428                                 backref->errors);
3429                         print_ref_error(backref->errors);
3430                 }
3431         }
3432         return errors > 0 ? 1 : 0;
3433 }
3434
3435 static int process_root_ref(struct extent_buffer *eb, int slot,
3436                             struct btrfs_key *key,
3437                             struct cache_tree *root_cache)
3438 {
3439         u64 dirid;
3440         u64 index;
3441         u32 len;
3442         u32 name_len;
3443         struct btrfs_root_ref *ref;
3444         char namebuf[BTRFS_NAME_LEN];
3445         int error;
3446
3447         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3448
3449         dirid = btrfs_root_ref_dirid(eb, ref);
3450         index = btrfs_root_ref_sequence(eb, ref);
3451         name_len = btrfs_root_ref_name_len(eb, ref);
3452
3453         if (name_len <= BTRFS_NAME_LEN) {
3454                 len = name_len;
3455                 error = 0;
3456         } else {
3457                 len = BTRFS_NAME_LEN;
3458                 error = REF_ERR_NAME_TOO_LONG;
3459         }
3460         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3461
3462         if (key->type == BTRFS_ROOT_REF_KEY) {
3463                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3464                                  index, namebuf, len, key->type, error);
3465         } else {
3466                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3467                                  index, namebuf, len, key->type, error);
3468         }
3469         return 0;
3470 }
3471
3472 static void free_corrupt_block(struct cache_extent *cache)
3473 {
3474         struct btrfs_corrupt_block *corrupt;
3475
3476         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3477         free(corrupt);
3478 }
3479
3480 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3481
3482 /*
3483  * Repair the btree of the given root.
3484  *
3485  * The fix is to remove the node key in corrupt_blocks cache_tree.
3486  * and rebalance the tree.
3487  * After the fix, the btree should be writeable.
3488  */
3489 static int repair_btree(struct btrfs_root *root,
3490                         struct cache_tree *corrupt_blocks)
3491 {
3492         struct btrfs_trans_handle *trans;
3493         struct btrfs_path *path;
3494         struct btrfs_corrupt_block *corrupt;
3495         struct cache_extent *cache;
3496         struct btrfs_key key;
3497         u64 offset;
3498         int level;
3499         int ret = 0;
3500
3501         if (cache_tree_empty(corrupt_blocks))
3502                 return 0;
3503
3504         path = btrfs_alloc_path();
3505         if (!path)
3506                 return -ENOMEM;
3507
3508         trans = btrfs_start_transaction(root, 1);
3509         if (IS_ERR(trans)) {
3510                 ret = PTR_ERR(trans);
3511                 fprintf(stderr, "Error starting transaction: %s\n",
3512                         strerror(-ret));
3513                 goto out_free_path;
3514         }
3515         cache = first_cache_extent(corrupt_blocks);
3516         while (cache) {
3517                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3518                                        cache);
3519                 level = corrupt->level;
3520                 path->lowest_level = level;
3521                 key.objectid = corrupt->key.objectid;
3522                 key.type = corrupt->key.type;
3523                 key.offset = corrupt->key.offset;
3524
3525                 /*
3526                  * Here we don't want to do any tree balance, since it may
3527                  * cause a balance with corrupted brother leaf/node,
3528                  * so ins_len set to 0 here.
3529                  * Balance will be done after all corrupt node/leaf is deleted.
3530                  */
3531                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3532                 if (ret < 0)
3533                         goto out;
3534                 offset = btrfs_node_blockptr(path->nodes[level],
3535                                              path->slots[level]);
3536
3537                 /* Remove the ptr */
3538                 ret = btrfs_del_ptr(trans, root, path, level,
3539                                     path->slots[level]);
3540                 if (ret < 0)
3541                         goto out;
3542                 /*
3543                  * Remove the corresponding extent
3544                  * return value is not concerned.
3545                  */
3546                 btrfs_release_path(path);
3547                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3548                                         0, root->root_key.objectid,
3549                                         level - 1, 0);
3550                 cache = next_cache_extent(cache);
3551         }
3552
3553         /* Balance the btree using btrfs_search_slot() */
3554         cache = first_cache_extent(corrupt_blocks);
3555         while (cache) {
3556                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3557                                        cache);
3558                 memcpy(&key, &corrupt->key, sizeof(key));
3559                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3560                 if (ret < 0)
3561                         goto out;
3562                 /* return will always >0 since it won't find the item */
3563                 ret = 0;
3564                 btrfs_release_path(path);
3565                 cache = next_cache_extent(cache);
3566         }
3567 out:
3568         btrfs_commit_transaction(trans, root);
3569 out_free_path:
3570         btrfs_free_path(path);
3571         return ret;
3572 }
3573
3574 static int check_fs_root(struct btrfs_root *root,
3575                          struct cache_tree *root_cache,
3576                          struct walk_control *wc)
3577 {
3578         int ret = 0;
3579         int err = 0;
3580         int wret;
3581         int level;
3582         struct btrfs_path path;
3583         struct shared_node root_node;
3584         struct root_record *rec;
3585         struct btrfs_root_item *root_item = &root->root_item;
3586         struct cache_tree corrupt_blocks;
3587         struct orphan_data_extent *orphan;
3588         struct orphan_data_extent *tmp;
3589         enum btrfs_tree_block_status status;
3590         struct node_refs nrefs;
3591
3592         /*
3593          * Reuse the corrupt_block cache tree to record corrupted tree block
3594          *
3595          * Unlike the usage in extent tree check, here we do it in a per
3596          * fs/subvol tree base.
3597          */
3598         cache_tree_init(&corrupt_blocks);
3599         root->fs_info->corrupt_blocks = &corrupt_blocks;
3600
3601         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3602                 rec = get_root_rec(root_cache, root->root_key.objectid);
3603                 BUG_ON(IS_ERR(rec));
3604                 if (btrfs_root_refs(root_item) > 0)
3605                         rec->found_root_item = 1;
3606         }
3607
3608         btrfs_init_path(&path);
3609         memset(&root_node, 0, sizeof(root_node));
3610         cache_tree_init(&root_node.root_cache);
3611         cache_tree_init(&root_node.inode_cache);
3612         memset(&nrefs, 0, sizeof(nrefs));
3613
3614         /* Move the orphan extent record to corresponding inode_record */
3615         list_for_each_entry_safe(orphan, tmp,
3616                                  &root->orphan_data_extents, list) {
3617                 struct inode_record *inode;
3618
3619                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3620                                       1);
3621                 BUG_ON(IS_ERR(inode));
3622                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3623                 list_move(&orphan->list, &inode->orphan_extents);
3624         }
3625
3626         level = btrfs_header_level(root->node);
3627         memset(wc->nodes, 0, sizeof(wc->nodes));
3628         wc->nodes[level] = &root_node;
3629         wc->active_node = level;
3630         wc->root_level = level;
3631
3632         /* We may not have checked the root block, lets do that now */
3633         if (btrfs_is_leaf(root->node))
3634                 status = btrfs_check_leaf(root, NULL, root->node);
3635         else
3636                 status = btrfs_check_node(root, NULL, root->node);
3637         if (status != BTRFS_TREE_BLOCK_CLEAN)
3638                 return -EIO;
3639
3640         if (btrfs_root_refs(root_item) > 0 ||
3641             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3642                 path.nodes[level] = root->node;
3643                 extent_buffer_get(root->node);
3644                 path.slots[level] = 0;
3645         } else {
3646                 struct btrfs_key key;
3647                 struct btrfs_disk_key found_key;
3648
3649                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3650                 level = root_item->drop_level;
3651                 path.lowest_level = level;
3652                 if (level > btrfs_header_level(root->node) ||
3653                     level >= BTRFS_MAX_LEVEL) {
3654                         error("ignoring invalid drop level: %u", level);
3655                         goto skip_walking;
3656                 }
3657                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3658                 if (wret < 0)
3659                         goto skip_walking;
3660                 btrfs_node_key(path.nodes[level], &found_key,
3661                                 path.slots[level]);
3662                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3663                                         sizeof(found_key)));
3664         }
3665
3666         while (1) {
3667                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3668                 if (wret < 0)
3669                         ret = wret;
3670                 if (wret != 0)
3671                         break;
3672
3673                 wret = walk_up_tree(root, &path, wc, &level);
3674                 if (wret < 0)
3675                         ret = wret;
3676                 if (wret != 0)
3677                         break;
3678         }
3679 skip_walking:
3680         btrfs_release_path(&path);
3681
3682         if (!cache_tree_empty(&corrupt_blocks)) {
3683                 struct cache_extent *cache;
3684                 struct btrfs_corrupt_block *corrupt;
3685
3686                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3687                        root->root_key.objectid);
3688                 cache = first_cache_extent(&corrupt_blocks);
3689                 while (cache) {
3690                         corrupt = container_of(cache,
3691                                                struct btrfs_corrupt_block,
3692                                                cache);
3693                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3694                                cache->start, corrupt->level,
3695                                corrupt->key.objectid, corrupt->key.type,
3696                                corrupt->key.offset);
3697                         cache = next_cache_extent(cache);
3698                 }
3699                 if (repair) {
3700                         printf("Try to repair the btree for root %llu\n",
3701                                root->root_key.objectid);
3702                         ret = repair_btree(root, &corrupt_blocks);
3703                         if (ret < 0)
3704                                 fprintf(stderr, "Failed to repair btree: %s\n",
3705                                         strerror(-ret));
3706                         if (!ret)
3707                                 printf("Btree for root %llu is fixed\n",
3708                                        root->root_key.objectid);
3709                 }
3710         }
3711
3712         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3713         if (err < 0)
3714                 ret = err;
3715
3716         if (root_node.current) {
3717                 root_node.current->checked = 1;
3718                 maybe_free_inode_rec(&root_node.inode_cache,
3719                                 root_node.current);
3720         }
3721
3722         err = check_inode_recs(root, &root_node.inode_cache);
3723         if (!ret)
3724                 ret = err;
3725
3726         free_corrupt_blocks_tree(&corrupt_blocks);
3727         root->fs_info->corrupt_blocks = NULL;
3728         free_orphan_data_extents(&root->orphan_data_extents);
3729         return ret;
3730 }
3731
3732 static int fs_root_objectid(u64 objectid)
3733 {
3734         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3735             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3736                 return 1;
3737         return is_fstree(objectid);
3738 }
3739
3740 static int check_fs_roots(struct btrfs_root *root,
3741                           struct cache_tree *root_cache)
3742 {
3743         struct btrfs_path path;
3744         struct btrfs_key key;
3745         struct walk_control wc;
3746         struct extent_buffer *leaf, *tree_node;
3747         struct btrfs_root *tmp_root;
3748         struct btrfs_root *tree_root = root->fs_info->tree_root;
3749         int ret;
3750         int err = 0;
3751
3752         if (ctx.progress_enabled) {
3753                 ctx.tp = TASK_FS_ROOTS;
3754                 task_start(ctx.info);
3755         }
3756
3757         /*
3758          * Just in case we made any changes to the extent tree that weren't
3759          * reflected into the free space cache yet.
3760          */
3761         if (repair)
3762                 reset_cached_block_groups(root->fs_info);
3763         memset(&wc, 0, sizeof(wc));
3764         cache_tree_init(&wc.shared);
3765         btrfs_init_path(&path);
3766
3767 again:
3768         key.offset = 0;
3769         key.objectid = 0;
3770         key.type = BTRFS_ROOT_ITEM_KEY;
3771         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3772         if (ret < 0) {
3773                 err = 1;
3774                 goto out;
3775         }
3776         tree_node = tree_root->node;
3777         while (1) {
3778                 if (tree_node != tree_root->node) {
3779                         free_root_recs_tree(root_cache);
3780                         btrfs_release_path(&path);
3781                         goto again;
3782                 }
3783                 leaf = path.nodes[0];
3784                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3785                         ret = btrfs_next_leaf(tree_root, &path);
3786                         if (ret) {
3787                                 if (ret < 0)
3788                                         err = 1;
3789                                 break;
3790                         }
3791                         leaf = path.nodes[0];
3792                 }
3793                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3794                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3795                     fs_root_objectid(key.objectid)) {
3796                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3797                                 tmp_root = btrfs_read_fs_root_no_cache(
3798                                                 root->fs_info, &key);
3799                         } else {
3800                                 key.offset = (u64)-1;
3801                                 tmp_root = btrfs_read_fs_root(
3802                                                 root->fs_info, &key);
3803                         }
3804                         if (IS_ERR(tmp_root)) {
3805                                 err = 1;
3806                                 goto next;
3807                         }
3808                         ret = check_fs_root(tmp_root, root_cache, &wc);
3809                         if (ret == -EAGAIN) {
3810                                 free_root_recs_tree(root_cache);
3811                                 btrfs_release_path(&path);
3812                                 goto again;
3813                         }
3814                         if (ret)
3815                                 err = 1;
3816                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3817                                 btrfs_free_fs_root(tmp_root);
3818                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3819                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3820                         process_root_ref(leaf, path.slots[0], &key,
3821                                          root_cache);
3822                 }
3823 next:
3824                 path.slots[0]++;
3825         }
3826 out:
3827         btrfs_release_path(&path);
3828         if (err)
3829                 free_extent_cache_tree(&wc.shared);
3830         if (!cache_tree_empty(&wc.shared))
3831                 fprintf(stderr, "warning line %d\n", __LINE__);
3832
3833         task_stop(ctx.info);
3834
3835         return err;
3836 }
3837
3838 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3839 {
3840         struct list_head *cur = rec->backrefs.next;
3841         struct extent_backref *back;
3842         struct tree_backref *tback;
3843         struct data_backref *dback;
3844         u64 found = 0;
3845         int err = 0;
3846
3847         while(cur != &rec->backrefs) {
3848                 back = to_extent_backref(cur);
3849                 cur = cur->next;
3850                 if (!back->found_extent_tree) {
3851                         err = 1;
3852                         if (!print_errs)
3853                                 goto out;
3854                         if (back->is_data) {
3855                                 dback = to_data_backref(back);
3856                                 fprintf(stderr, "Backref %llu %s %llu"
3857                                         " owner %llu offset %llu num_refs %lu"
3858                                         " not found in extent tree\n",
3859                                         (unsigned long long)rec->start,
3860                                         back->full_backref ?
3861                                         "parent" : "root",
3862                                         back->full_backref ?
3863                                         (unsigned long long)dback->parent:
3864                                         (unsigned long long)dback->root,
3865                                         (unsigned long long)dback->owner,
3866                                         (unsigned long long)dback->offset,
3867                                         (unsigned long)dback->num_refs);
3868                         } else {
3869                                 tback = to_tree_backref(back);
3870                                 fprintf(stderr, "Backref %llu parent %llu"
3871                                         " root %llu not found in extent tree\n",
3872                                         (unsigned long long)rec->start,
3873                                         (unsigned long long)tback->parent,
3874                                         (unsigned long long)tback->root);
3875                         }
3876                 }
3877                 if (!back->is_data && !back->found_ref) {
3878                         err = 1;
3879                         if (!print_errs)
3880                                 goto out;
3881                         tback = to_tree_backref(back);
3882                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3883                                 (unsigned long long)rec->start,
3884                                 back->full_backref ? "parent" : "root",
3885                                 back->full_backref ?
3886                                 (unsigned long long)tback->parent :
3887                                 (unsigned long long)tback->root, back);
3888                 }
3889                 if (back->is_data) {
3890                         dback = to_data_backref(back);
3891                         if (dback->found_ref != dback->num_refs) {
3892                                 err = 1;
3893                                 if (!print_errs)
3894                                         goto out;
3895                                 fprintf(stderr, "Incorrect local backref count"
3896                                         " on %llu %s %llu owner %llu"
3897                                         " offset %llu found %u wanted %u back %p\n",
3898                                         (unsigned long long)rec->start,
3899                                         back->full_backref ?
3900                                         "parent" : "root",
3901                                         back->full_backref ?
3902                                         (unsigned long long)dback->parent:
3903                                         (unsigned long long)dback->root,
3904                                         (unsigned long long)dback->owner,
3905                                         (unsigned long long)dback->offset,
3906                                         dback->found_ref, dback->num_refs, back);
3907                         }
3908                         if (dback->disk_bytenr != rec->start) {
3909                                 err = 1;
3910                                 if (!print_errs)
3911                                         goto out;
3912                                 fprintf(stderr, "Backref disk bytenr does not"
3913                                         " match extent record, bytenr=%llu, "
3914                                         "ref bytenr=%llu\n",
3915                                         (unsigned long long)rec->start,
3916                                         (unsigned long long)dback->disk_bytenr);
3917                         }
3918
3919                         if (dback->bytes != rec->nr) {
3920                                 err = 1;
3921                                 if (!print_errs)
3922                                         goto out;
3923                                 fprintf(stderr, "Backref bytes do not match "
3924                                         "extent backref, bytenr=%llu, ref "
3925                                         "bytes=%llu, backref bytes=%llu\n",
3926                                         (unsigned long long)rec->start,
3927                                         (unsigned long long)rec->nr,
3928                                         (unsigned long long)dback->bytes);
3929                         }
3930                 }
3931                 if (!back->is_data) {
3932                         found += 1;
3933                 } else {
3934                         dback = to_data_backref(back);
3935                         found += dback->found_ref;
3936                 }
3937         }
3938         if (found != rec->refs) {
3939                 err = 1;
3940                 if (!print_errs)
3941                         goto out;
3942                 fprintf(stderr, "Incorrect global backref count "
3943                         "on %llu found %llu wanted %llu\n",
3944                         (unsigned long long)rec->start,
3945                         (unsigned long long)found,
3946                         (unsigned long long)rec->refs);
3947         }
3948 out:
3949         return err;
3950 }
3951
3952 static int free_all_extent_backrefs(struct extent_record *rec)
3953 {
3954         struct extent_backref *back;
3955         struct list_head *cur;
3956         while (!list_empty(&rec->backrefs)) {
3957                 cur = rec->backrefs.next;
3958                 back = to_extent_backref(cur);
3959                 list_del(cur);
3960                 free(back);
3961         }
3962         return 0;
3963 }
3964
3965 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3966                                      struct cache_tree *extent_cache)
3967 {
3968         struct cache_extent *cache;
3969         struct extent_record *rec;
3970
3971         while (1) {
3972                 cache = first_cache_extent(extent_cache);
3973                 if (!cache)
3974                         break;
3975                 rec = container_of(cache, struct extent_record, cache);
3976                 remove_cache_extent(extent_cache, cache);
3977                 free_all_extent_backrefs(rec);
3978                 free(rec);
3979         }
3980 }
3981
3982 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3983                                  struct extent_record *rec)
3984 {
3985         if (rec->content_checked && rec->owner_ref_checked &&
3986             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3987             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3988             !rec->bad_full_backref && !rec->crossing_stripes &&
3989             !rec->wrong_chunk_type) {
3990                 remove_cache_extent(extent_cache, &rec->cache);
3991                 free_all_extent_backrefs(rec);
3992                 list_del_init(&rec->list);
3993                 free(rec);
3994         }
3995         return 0;
3996 }
3997
3998 static int check_owner_ref(struct btrfs_root *root,
3999                             struct extent_record *rec,
4000                             struct extent_buffer *buf)
4001 {
4002         struct extent_backref *node;
4003         struct tree_backref *back;
4004         struct btrfs_root *ref_root;
4005         struct btrfs_key key;
4006         struct btrfs_path path;
4007         struct extent_buffer *parent;
4008         int level;
4009         int found = 0;
4010         int ret;
4011
4012         list_for_each_entry(node, &rec->backrefs, list) {
4013                 if (node->is_data)
4014                         continue;
4015                 if (!node->found_ref)
4016                         continue;
4017                 if (node->full_backref)
4018                         continue;
4019                 back = to_tree_backref(node);
4020                 if (btrfs_header_owner(buf) == back->root)
4021                         return 0;
4022         }
4023         BUG_ON(rec->is_root);
4024
4025         /* try to find the block by search corresponding fs tree */
4026         key.objectid = btrfs_header_owner(buf);
4027         key.type = BTRFS_ROOT_ITEM_KEY;
4028         key.offset = (u64)-1;
4029
4030         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4031         if (IS_ERR(ref_root))
4032                 return 1;
4033
4034         level = btrfs_header_level(buf);
4035         if (level == 0)
4036                 btrfs_item_key_to_cpu(buf, &key, 0);
4037         else
4038                 btrfs_node_key_to_cpu(buf, &key, 0);
4039
4040         btrfs_init_path(&path);
4041         path.lowest_level = level + 1;
4042         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4043         if (ret < 0)
4044                 return 0;
4045
4046         parent = path.nodes[level + 1];
4047         if (parent && buf->start == btrfs_node_blockptr(parent,
4048                                                         path.slots[level + 1]))
4049                 found = 1;
4050
4051         btrfs_release_path(&path);
4052         return found ? 0 : 1;
4053 }
4054
4055 static int is_extent_tree_record(struct extent_record *rec)
4056 {
4057         struct list_head *cur = rec->backrefs.next;
4058         struct extent_backref *node;
4059         struct tree_backref *back;
4060         int is_extent = 0;
4061
4062         while(cur != &rec->backrefs) {
4063                 node = to_extent_backref(cur);
4064                 cur = cur->next;
4065                 if (node->is_data)
4066                         return 0;
4067                 back = to_tree_backref(node);
4068                 if (node->full_backref)
4069                         return 0;
4070                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4071                         is_extent = 1;
4072         }
4073         return is_extent;
4074 }
4075
4076
4077 static int record_bad_block_io(struct btrfs_fs_info *info,
4078                                struct cache_tree *extent_cache,
4079                                u64 start, u64 len)
4080 {
4081         struct extent_record *rec;
4082         struct cache_extent *cache;
4083         struct btrfs_key key;
4084
4085         cache = lookup_cache_extent(extent_cache, start, len);
4086         if (!cache)
4087                 return 0;
4088
4089         rec = container_of(cache, struct extent_record, cache);
4090         if (!is_extent_tree_record(rec))
4091                 return 0;
4092
4093         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4094         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4095 }
4096
4097 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4098                        struct extent_buffer *buf, int slot)
4099 {
4100         if (btrfs_header_level(buf)) {
4101                 struct btrfs_key_ptr ptr1, ptr2;
4102
4103                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4104                                    sizeof(struct btrfs_key_ptr));
4105                 read_extent_buffer(buf, &ptr2,
4106                                    btrfs_node_key_ptr_offset(slot + 1),
4107                                    sizeof(struct btrfs_key_ptr));
4108                 write_extent_buffer(buf, &ptr1,
4109                                     btrfs_node_key_ptr_offset(slot + 1),
4110                                     sizeof(struct btrfs_key_ptr));
4111                 write_extent_buffer(buf, &ptr2,
4112                                     btrfs_node_key_ptr_offset(slot),
4113                                     sizeof(struct btrfs_key_ptr));
4114                 if (slot == 0) {
4115                         struct btrfs_disk_key key;
4116                         btrfs_node_key(buf, &key, 0);
4117                         btrfs_fixup_low_keys(root, path, &key,
4118                                              btrfs_header_level(buf) + 1);
4119                 }
4120         } else {
4121                 struct btrfs_item *item1, *item2;
4122                 struct btrfs_key k1, k2;
4123                 char *item1_data, *item2_data;
4124                 u32 item1_offset, item2_offset, item1_size, item2_size;
4125
4126                 item1 = btrfs_item_nr(slot);
4127                 item2 = btrfs_item_nr(slot + 1);
4128                 btrfs_item_key_to_cpu(buf, &k1, slot);
4129                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4130                 item1_offset = btrfs_item_offset(buf, item1);
4131                 item2_offset = btrfs_item_offset(buf, item2);
4132                 item1_size = btrfs_item_size(buf, item1);
4133                 item2_size = btrfs_item_size(buf, item2);
4134
4135                 item1_data = malloc(item1_size);
4136                 if (!item1_data)
4137                         return -ENOMEM;
4138                 item2_data = malloc(item2_size);
4139                 if (!item2_data) {
4140                         free(item1_data);
4141                         return -ENOMEM;
4142                 }
4143
4144                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4145                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4146
4147                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4148                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4149                 free(item1_data);
4150                 free(item2_data);
4151
4152                 btrfs_set_item_offset(buf, item1, item2_offset);
4153                 btrfs_set_item_offset(buf, item2, item1_offset);
4154                 btrfs_set_item_size(buf, item1, item2_size);
4155                 btrfs_set_item_size(buf, item2, item1_size);
4156
4157                 path->slots[0] = slot;
4158                 btrfs_set_item_key_unsafe(root, path, &k2);
4159                 path->slots[0] = slot + 1;
4160                 btrfs_set_item_key_unsafe(root, path, &k1);
4161         }
4162         return 0;
4163 }
4164
4165 static int fix_key_order(struct btrfs_trans_handle *trans,
4166                          struct btrfs_root *root,
4167                          struct btrfs_path *path)
4168 {
4169         struct extent_buffer *buf;
4170         struct btrfs_key k1, k2;
4171         int i;
4172         int level = path->lowest_level;
4173         int ret = -EIO;
4174
4175         buf = path->nodes[level];
4176         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4177                 if (level) {
4178                         btrfs_node_key_to_cpu(buf, &k1, i);
4179                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4180                 } else {
4181                         btrfs_item_key_to_cpu(buf, &k1, i);
4182                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4183                 }
4184                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4185                         continue;
4186                 ret = swap_values(root, path, buf, i);
4187                 if (ret)
4188                         break;
4189                 btrfs_mark_buffer_dirty(buf);
4190                 i = 0;
4191         }
4192         return ret;
4193 }
4194
4195 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4196                              struct btrfs_root *root,
4197                              struct btrfs_path *path,
4198                              struct extent_buffer *buf, int slot)
4199 {
4200         struct btrfs_key key;
4201         int nritems = btrfs_header_nritems(buf);
4202
4203         btrfs_item_key_to_cpu(buf, &key, slot);
4204
4205         /* These are all the keys we can deal with missing. */
4206         if (key.type != BTRFS_DIR_INDEX_KEY &&
4207             key.type != BTRFS_EXTENT_ITEM_KEY &&
4208             key.type != BTRFS_METADATA_ITEM_KEY &&
4209             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4210             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4211                 return -1;
4212
4213         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4214                (unsigned long long)key.objectid, key.type,
4215                (unsigned long long)key.offset, slot, buf->start);
4216         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4217                               btrfs_item_nr_offset(slot + 1),
4218                               sizeof(struct btrfs_item) *
4219                               (nritems - slot - 1));
4220         btrfs_set_header_nritems(buf, nritems - 1);
4221         if (slot == 0) {
4222                 struct btrfs_disk_key disk_key;
4223
4224                 btrfs_item_key(buf, &disk_key, 0);
4225                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4226         }
4227         btrfs_mark_buffer_dirty(buf);
4228         return 0;
4229 }
4230
4231 static int fix_item_offset(struct btrfs_trans_handle *trans,
4232                            struct btrfs_root *root,
4233                            struct btrfs_path *path)
4234 {
4235         struct extent_buffer *buf;
4236         int i;
4237         int ret = 0;
4238
4239         /* We should only get this for leaves */
4240         BUG_ON(path->lowest_level);
4241         buf = path->nodes[0];
4242 again:
4243         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4244                 unsigned int shift = 0, offset;
4245
4246                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4247                     BTRFS_LEAF_DATA_SIZE(root)) {
4248                         if (btrfs_item_end_nr(buf, i) >
4249                             BTRFS_LEAF_DATA_SIZE(root)) {
4250                                 ret = delete_bogus_item(trans, root, path,
4251                                                         buf, i);
4252                                 if (!ret)
4253                                         goto again;
4254                                 fprintf(stderr, "item is off the end of the "
4255                                         "leaf, can't fix\n");
4256                                 ret = -EIO;
4257                                 break;
4258                         }
4259                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4260                                 btrfs_item_end_nr(buf, i);
4261                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4262                            btrfs_item_offset_nr(buf, i - 1)) {
4263                         if (btrfs_item_end_nr(buf, i) >
4264                             btrfs_item_offset_nr(buf, i - 1)) {
4265                                 ret = delete_bogus_item(trans, root, path,
4266                                                         buf, i);
4267                                 if (!ret)
4268                                         goto again;
4269                                 fprintf(stderr, "items overlap, can't fix\n");
4270                                 ret = -EIO;
4271                                 break;
4272                         }
4273                         shift = btrfs_item_offset_nr(buf, i - 1) -
4274                                 btrfs_item_end_nr(buf, i);
4275                 }
4276                 if (!shift)
4277                         continue;
4278
4279                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4280                        i, shift, (unsigned long long)buf->start);
4281                 offset = btrfs_item_offset_nr(buf, i);
4282                 memmove_extent_buffer(buf,
4283                                       btrfs_leaf_data(buf) + offset + shift,
4284                                       btrfs_leaf_data(buf) + offset,
4285                                       btrfs_item_size_nr(buf, i));
4286                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4287                                       offset + shift);
4288                 btrfs_mark_buffer_dirty(buf);
4289         }
4290
4291         /*
4292          * We may have moved things, in which case we want to exit so we don't
4293          * write those changes out.  Once we have proper abort functionality in
4294          * progs this can be changed to something nicer.
4295          */
4296         BUG_ON(ret);
4297         return ret;
4298 }
4299
4300 /*
4301  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4302  * then just return -EIO.
4303  */
4304 static int try_to_fix_bad_block(struct btrfs_root *root,
4305                                 struct extent_buffer *buf,
4306                                 enum btrfs_tree_block_status status)
4307 {
4308         struct btrfs_trans_handle *trans;
4309         struct ulist *roots;
4310         struct ulist_node *node;
4311         struct btrfs_root *search_root;
4312         struct btrfs_path *path;
4313         struct ulist_iterator iter;
4314         struct btrfs_key root_key, key;
4315         int ret;
4316
4317         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4318             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4319                 return -EIO;
4320
4321         path = btrfs_alloc_path();
4322         if (!path)
4323                 return -EIO;
4324
4325         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4326                                    0, &roots);
4327         if (ret) {
4328                 btrfs_free_path(path);
4329                 return -EIO;
4330         }
4331
4332         ULIST_ITER_INIT(&iter);
4333         while ((node = ulist_next(roots, &iter))) {
4334                 root_key.objectid = node->val;
4335                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4336                 root_key.offset = (u64)-1;
4337
4338                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4339                 if (IS_ERR(root)) {
4340                         ret = -EIO;
4341                         break;
4342                 }
4343
4344
4345                 trans = btrfs_start_transaction(search_root, 0);
4346                 if (IS_ERR(trans)) {
4347                         ret = PTR_ERR(trans);
4348                         break;
4349                 }
4350
4351                 path->lowest_level = btrfs_header_level(buf);
4352                 path->skip_check_block = 1;
4353                 if (path->lowest_level)
4354                         btrfs_node_key_to_cpu(buf, &key, 0);
4355                 else
4356                         btrfs_item_key_to_cpu(buf, &key, 0);
4357                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4358                 if (ret) {
4359                         ret = -EIO;
4360                         btrfs_commit_transaction(trans, search_root);
4361                         break;
4362                 }
4363                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4364                         ret = fix_key_order(trans, search_root, path);
4365                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4366                         ret = fix_item_offset(trans, search_root, path);
4367                 if (ret) {
4368                         btrfs_commit_transaction(trans, search_root);
4369                         break;
4370                 }
4371                 btrfs_release_path(path);
4372                 btrfs_commit_transaction(trans, search_root);
4373         }
4374         ulist_free(roots);
4375         btrfs_free_path(path);
4376         return ret;
4377 }
4378
4379 static int check_block(struct btrfs_root *root,
4380                        struct cache_tree *extent_cache,
4381                        struct extent_buffer *buf, u64 flags)
4382 {
4383         struct extent_record *rec;
4384         struct cache_extent *cache;
4385         struct btrfs_key key;
4386         enum btrfs_tree_block_status status;
4387         int ret = 0;
4388         int level;
4389
4390         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4391         if (!cache)
4392                 return 1;
4393         rec = container_of(cache, struct extent_record, cache);
4394         rec->generation = btrfs_header_generation(buf);
4395
4396         level = btrfs_header_level(buf);
4397         if (btrfs_header_nritems(buf) > 0) {
4398
4399                 if (level == 0)
4400                         btrfs_item_key_to_cpu(buf, &key, 0);
4401                 else
4402                         btrfs_node_key_to_cpu(buf, &key, 0);
4403
4404                 rec->info_objectid = key.objectid;
4405         }
4406         rec->info_level = level;
4407
4408         if (btrfs_is_leaf(buf))
4409                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4410         else
4411                 status = btrfs_check_node(root, &rec->parent_key, buf);
4412
4413         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4414                 if (repair)
4415                         status = try_to_fix_bad_block(root, buf, status);
4416                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4417                         ret = -EIO;
4418                         fprintf(stderr, "bad block %llu\n",
4419                                 (unsigned long long)buf->start);
4420                 } else {
4421                         /*
4422                          * Signal to callers we need to start the scan over
4423                          * again since we'll have cowed blocks.
4424                          */
4425                         ret = -EAGAIN;
4426                 }
4427         } else {
4428                 rec->content_checked = 1;
4429                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4430                         rec->owner_ref_checked = 1;
4431                 else {
4432                         ret = check_owner_ref(root, rec, buf);
4433                         if (!ret)
4434                                 rec->owner_ref_checked = 1;
4435                 }
4436         }
4437         if (!ret)
4438                 maybe_free_extent_rec(extent_cache, rec);
4439         return ret;
4440 }
4441
4442 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4443                                                 u64 parent, u64 root)
4444 {
4445         struct list_head *cur = rec->backrefs.next;
4446         struct extent_backref *node;
4447         struct tree_backref *back;
4448
4449         while(cur != &rec->backrefs) {
4450                 node = to_extent_backref(cur);
4451                 cur = cur->next;
4452                 if (node->is_data)
4453                         continue;
4454                 back = to_tree_backref(node);
4455                 if (parent > 0) {
4456                         if (!node->full_backref)
4457                                 continue;
4458                         if (parent == back->parent)
4459                                 return back;
4460                 } else {
4461                         if (node->full_backref)
4462                                 continue;
4463                         if (back->root == root)
4464                                 return back;
4465                 }
4466         }
4467         return NULL;
4468 }
4469
4470 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4471                                                 u64 parent, u64 root)
4472 {
4473         struct tree_backref *ref = malloc(sizeof(*ref));
4474
4475         if (!ref)
4476                 return NULL;
4477         memset(&ref->node, 0, sizeof(ref->node));
4478         if (parent > 0) {
4479                 ref->parent = parent;
4480                 ref->node.full_backref = 1;
4481         } else {
4482                 ref->root = root;
4483                 ref->node.full_backref = 0;
4484         }
4485         list_add_tail(&ref->node.list, &rec->backrefs);
4486
4487         return ref;
4488 }
4489
4490 static struct data_backref *find_data_backref(struct extent_record *rec,
4491                                                 u64 parent, u64 root,
4492                                                 u64 owner, u64 offset,
4493                                                 int found_ref,
4494                                                 u64 disk_bytenr, u64 bytes)
4495 {
4496         struct list_head *cur = rec->backrefs.next;
4497         struct extent_backref *node;
4498         struct data_backref *back;
4499
4500         while(cur != &rec->backrefs) {
4501                 node = to_extent_backref(cur);
4502                 cur = cur->next;
4503                 if (!node->is_data)
4504                         continue;
4505                 back = to_data_backref(node);
4506                 if (parent > 0) {
4507                         if (!node->full_backref)
4508                                 continue;
4509                         if (parent == back->parent)
4510                                 return back;
4511                 } else {
4512                         if (node->full_backref)
4513                                 continue;
4514                         if (back->root == root && back->owner == owner &&
4515                             back->offset == offset) {
4516                                 if (found_ref && node->found_ref &&
4517                                     (back->bytes != bytes ||
4518                                     back->disk_bytenr != disk_bytenr))
4519                                         continue;
4520                                 return back;
4521                         }
4522                 }
4523         }
4524         return NULL;
4525 }
4526
4527 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4528                                                 u64 parent, u64 root,
4529                                                 u64 owner, u64 offset,
4530                                                 u64 max_size)
4531 {
4532         struct data_backref *ref = malloc(sizeof(*ref));
4533
4534         if (!ref)
4535                 return NULL;
4536         memset(&ref->node, 0, sizeof(ref->node));
4537         ref->node.is_data = 1;
4538
4539         if (parent > 0) {
4540                 ref->parent = parent;
4541                 ref->owner = 0;
4542                 ref->offset = 0;
4543                 ref->node.full_backref = 1;
4544         } else {
4545                 ref->root = root;
4546                 ref->owner = owner;
4547                 ref->offset = offset;
4548                 ref->node.full_backref = 0;
4549         }
4550         ref->bytes = max_size;
4551         ref->found_ref = 0;
4552         ref->num_refs = 0;
4553         list_add_tail(&ref->node.list, &rec->backrefs);
4554         if (max_size > rec->max_size)
4555                 rec->max_size = max_size;
4556         return ref;
4557 }
4558
4559 /* Check if the type of extent matches with its chunk */
4560 static void check_extent_type(struct extent_record *rec)
4561 {
4562         struct btrfs_block_group_cache *bg_cache;
4563
4564         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4565         if (!bg_cache)
4566                 return;
4567
4568         /* data extent, check chunk directly*/
4569         if (!rec->metadata) {
4570                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4571                         rec->wrong_chunk_type = 1;
4572                 return;
4573         }
4574
4575         /* metadata extent, check the obvious case first */
4576         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4577                                  BTRFS_BLOCK_GROUP_METADATA))) {
4578                 rec->wrong_chunk_type = 1;
4579                 return;
4580         }
4581
4582         /*
4583          * Check SYSTEM extent, as it's also marked as metadata, we can only
4584          * make sure it's a SYSTEM extent by its backref
4585          */
4586         if (!list_empty(&rec->backrefs)) {
4587                 struct extent_backref *node;
4588                 struct tree_backref *tback;
4589                 u64 bg_type;
4590
4591                 node = to_extent_backref(rec->backrefs.next);
4592                 if (node->is_data) {
4593                         /* tree block shouldn't have data backref */
4594                         rec->wrong_chunk_type = 1;
4595                         return;
4596                 }
4597                 tback = container_of(node, struct tree_backref, node);
4598
4599                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4600                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4601                 else
4602                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4603                 if (!(bg_cache->flags & bg_type))
4604                         rec->wrong_chunk_type = 1;
4605         }
4606 }
4607
4608 /*
4609  * Allocate a new extent record, fill default values from @tmpl and insert int
4610  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4611  * the cache, otherwise it fails.
4612  */
4613 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4614                 struct extent_record *tmpl)
4615 {
4616         struct extent_record *rec;
4617         int ret = 0;
4618
4619         rec = malloc(sizeof(*rec));
4620         if (!rec)
4621                 return -ENOMEM;
4622         rec->start = tmpl->start;
4623         rec->max_size = tmpl->max_size;
4624         rec->nr = max(tmpl->nr, tmpl->max_size);
4625         rec->found_rec = tmpl->found_rec;
4626         rec->content_checked = tmpl->content_checked;
4627         rec->owner_ref_checked = tmpl->owner_ref_checked;
4628         rec->num_duplicates = 0;
4629         rec->metadata = tmpl->metadata;
4630         rec->flag_block_full_backref = FLAG_UNSET;
4631         rec->bad_full_backref = 0;
4632         rec->crossing_stripes = 0;
4633         rec->wrong_chunk_type = 0;
4634         rec->is_root = tmpl->is_root;
4635         rec->refs = tmpl->refs;
4636         rec->extent_item_refs = tmpl->extent_item_refs;
4637         rec->parent_generation = tmpl->parent_generation;
4638         INIT_LIST_HEAD(&rec->backrefs);
4639         INIT_LIST_HEAD(&rec->dups);
4640         INIT_LIST_HEAD(&rec->list);
4641         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4642         rec->cache.start = tmpl->start;
4643         rec->cache.size = tmpl->nr;
4644         ret = insert_cache_extent(extent_cache, &rec->cache);
4645         if (ret) {
4646                 free(rec);
4647                 return ret;
4648         }
4649         bytes_used += rec->nr;
4650
4651         if (tmpl->metadata)
4652                 rec->crossing_stripes = check_crossing_stripes(global_info,
4653                                 rec->start, global_info->tree_root->nodesize);
4654         check_extent_type(rec);
4655         return ret;
4656 }
4657
4658 /*
4659  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4660  * some are hints:
4661  * - refs              - if found, increase refs
4662  * - is_root           - if found, set
4663  * - content_checked   - if found, set
4664  * - owner_ref_checked - if found, set
4665  *
4666  * If not found, create a new one, initialize and insert.
4667  */
4668 static int add_extent_rec(struct cache_tree *extent_cache,
4669                 struct extent_record *tmpl)
4670 {
4671         struct extent_record *rec;
4672         struct cache_extent *cache;
4673         int ret = 0;
4674         int dup = 0;
4675
4676         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4677         if (cache) {
4678                 rec = container_of(cache, struct extent_record, cache);
4679                 if (tmpl->refs)
4680                         rec->refs++;
4681                 if (rec->nr == 1)
4682                         rec->nr = max(tmpl->nr, tmpl->max_size);
4683
4684                 /*
4685                  * We need to make sure to reset nr to whatever the extent
4686                  * record says was the real size, this way we can compare it to
4687                  * the backrefs.
4688                  */
4689                 if (tmpl->found_rec) {
4690                         if (tmpl->start != rec->start || rec->found_rec) {
4691                                 struct extent_record *tmp;
4692
4693                                 dup = 1;
4694                                 if (list_empty(&rec->list))
4695                                         list_add_tail(&rec->list,
4696                                                       &duplicate_extents);
4697
4698                                 /*
4699                                  * We have to do this song and dance in case we
4700                                  * find an extent record that falls inside of
4701                                  * our current extent record but does not have
4702                                  * the same objectid.
4703                                  */
4704                                 tmp = malloc(sizeof(*tmp));
4705                                 if (!tmp)
4706                                         return -ENOMEM;
4707                                 tmp->start = tmpl->start;
4708                                 tmp->max_size = tmpl->max_size;
4709                                 tmp->nr = tmpl->nr;
4710                                 tmp->found_rec = 1;
4711                                 tmp->metadata = tmpl->metadata;
4712                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4713                                 INIT_LIST_HEAD(&tmp->list);
4714                                 list_add_tail(&tmp->list, &rec->dups);
4715                                 rec->num_duplicates++;
4716                         } else {
4717                                 rec->nr = tmpl->nr;
4718                                 rec->found_rec = 1;
4719                         }
4720                 }
4721
4722                 if (tmpl->extent_item_refs && !dup) {
4723                         if (rec->extent_item_refs) {
4724                                 fprintf(stderr, "block %llu rec "
4725                                         "extent_item_refs %llu, passed %llu\n",
4726                                         (unsigned long long)tmpl->start,
4727                                         (unsigned long long)
4728                                                         rec->extent_item_refs,
4729                                         (unsigned long long)tmpl->extent_item_refs);
4730                         }
4731                         rec->extent_item_refs = tmpl->extent_item_refs;
4732                 }
4733                 if (tmpl->is_root)
4734                         rec->is_root = 1;
4735                 if (tmpl->content_checked)
4736                         rec->content_checked = 1;
4737                 if (tmpl->owner_ref_checked)
4738                         rec->owner_ref_checked = 1;
4739                 memcpy(&rec->parent_key, &tmpl->parent_key,
4740                                 sizeof(tmpl->parent_key));
4741                 if (tmpl->parent_generation)
4742                         rec->parent_generation = tmpl->parent_generation;
4743                 if (rec->max_size < tmpl->max_size)
4744                         rec->max_size = tmpl->max_size;
4745
4746                 /*
4747                  * A metadata extent can't cross stripe_len boundary, otherwise
4748                  * kernel scrub won't be able to handle it.
4749                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4750                  * it.
4751                  */
4752                 if (tmpl->metadata)
4753                         rec->crossing_stripes = check_crossing_stripes(
4754                                         global_info, rec->start,
4755                                         global_info->tree_root->nodesize);
4756                 check_extent_type(rec);
4757                 maybe_free_extent_rec(extent_cache, rec);
4758                 return ret;
4759         }
4760
4761         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4762
4763         return ret;
4764 }
4765
4766 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4767                             u64 parent, u64 root, int found_ref)
4768 {
4769         struct extent_record *rec;
4770         struct tree_backref *back;
4771         struct cache_extent *cache;
4772         int ret;
4773
4774         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4775         if (!cache) {
4776                 struct extent_record tmpl;
4777
4778                 memset(&tmpl, 0, sizeof(tmpl));
4779                 tmpl.start = bytenr;
4780                 tmpl.nr = 1;
4781                 tmpl.metadata = 1;
4782
4783                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4784                 if (ret)
4785                         return ret;
4786
4787                 /* really a bug in cache_extent implement now */
4788                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4789                 if (!cache)
4790                         return -ENOENT;
4791         }
4792
4793         rec = container_of(cache, struct extent_record, cache);
4794         if (rec->start != bytenr) {
4795                 /*
4796                  * Several cause, from unaligned bytenr to over lapping extents
4797                  */
4798                 return -EEXIST;
4799         }
4800
4801         back = find_tree_backref(rec, parent, root);
4802         if (!back) {
4803                 back = alloc_tree_backref(rec, parent, root);
4804                 if (!back)
4805                         return -ENOMEM;
4806         }
4807
4808         if (found_ref) {
4809                 if (back->node.found_ref) {
4810                         fprintf(stderr, "Extent back ref already exists "
4811                                 "for %llu parent %llu root %llu \n",
4812                                 (unsigned long long)bytenr,
4813                                 (unsigned long long)parent,
4814                                 (unsigned long long)root);
4815                 }
4816                 back->node.found_ref = 1;
4817         } else {
4818                 if (back->node.found_extent_tree) {
4819                         fprintf(stderr, "Extent back ref already exists "
4820                                 "for %llu parent %llu root %llu \n",
4821                                 (unsigned long long)bytenr,
4822                                 (unsigned long long)parent,
4823                                 (unsigned long long)root);
4824                 }
4825                 back->node.found_extent_tree = 1;
4826         }
4827         check_extent_type(rec);
4828         maybe_free_extent_rec(extent_cache, rec);
4829         return 0;
4830 }
4831
4832 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4833                             u64 parent, u64 root, u64 owner, u64 offset,
4834                             u32 num_refs, int found_ref, u64 max_size)
4835 {
4836         struct extent_record *rec;
4837         struct data_backref *back;
4838         struct cache_extent *cache;
4839         int ret;
4840
4841         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4842         if (!cache) {
4843                 struct extent_record tmpl;
4844
4845                 memset(&tmpl, 0, sizeof(tmpl));
4846                 tmpl.start = bytenr;
4847                 tmpl.nr = 1;
4848                 tmpl.max_size = max_size;
4849
4850                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4851                 if (ret)
4852                         return ret;
4853
4854                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4855                 if (!cache)
4856                         abort();
4857         }
4858
4859         rec = container_of(cache, struct extent_record, cache);
4860         if (rec->max_size < max_size)
4861                 rec->max_size = max_size;
4862
4863         /*
4864          * If found_ref is set then max_size is the real size and must match the
4865          * existing refs.  So if we have already found a ref then we need to
4866          * make sure that this ref matches the existing one, otherwise we need
4867          * to add a new backref so we can notice that the backrefs don't match
4868          * and we need to figure out who is telling the truth.  This is to
4869          * account for that awful fsync bug I introduced where we'd end up with
4870          * a btrfs_file_extent_item that would have its length include multiple
4871          * prealloc extents or point inside of a prealloc extent.
4872          */
4873         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4874                                  bytenr, max_size);
4875         if (!back) {
4876                 back = alloc_data_backref(rec, parent, root, owner, offset,
4877                                           max_size);
4878                 BUG_ON(!back);
4879         }
4880
4881         if (found_ref) {
4882                 BUG_ON(num_refs != 1);
4883                 if (back->node.found_ref)
4884                         BUG_ON(back->bytes != max_size);
4885                 back->node.found_ref = 1;
4886                 back->found_ref += 1;
4887                 back->bytes = max_size;
4888                 back->disk_bytenr = bytenr;
4889                 rec->refs += 1;
4890                 rec->content_checked = 1;
4891                 rec->owner_ref_checked = 1;
4892         } else {
4893                 if (back->node.found_extent_tree) {
4894                         fprintf(stderr, "Extent back ref already exists "
4895                                 "for %llu parent %llu root %llu "
4896                                 "owner %llu offset %llu num_refs %lu\n",
4897                                 (unsigned long long)bytenr,
4898                                 (unsigned long long)parent,
4899                                 (unsigned long long)root,
4900                                 (unsigned long long)owner,
4901                                 (unsigned long long)offset,
4902                                 (unsigned long)num_refs);
4903                 }
4904                 back->num_refs = num_refs;
4905                 back->node.found_extent_tree = 1;
4906         }
4907         maybe_free_extent_rec(extent_cache, rec);
4908         return 0;
4909 }
4910
4911 static int add_pending(struct cache_tree *pending,
4912                        struct cache_tree *seen, u64 bytenr, u32 size)
4913 {
4914         int ret;
4915         ret = add_cache_extent(seen, bytenr, size);
4916         if (ret)
4917                 return ret;
4918         add_cache_extent(pending, bytenr, size);
4919         return 0;
4920 }
4921
4922 static int pick_next_pending(struct cache_tree *pending,
4923                         struct cache_tree *reada,
4924                         struct cache_tree *nodes,
4925                         u64 last, struct block_info *bits, int bits_nr,
4926                         int *reada_bits)
4927 {
4928         unsigned long node_start = last;
4929         struct cache_extent *cache;
4930         int ret;
4931
4932         cache = search_cache_extent(reada, 0);
4933         if (cache) {
4934                 bits[0].start = cache->start;
4935                 bits[0].size = cache->size;
4936                 *reada_bits = 1;
4937                 return 1;
4938         }
4939         *reada_bits = 0;
4940         if (node_start > 32768)
4941                 node_start -= 32768;
4942
4943         cache = search_cache_extent(nodes, node_start);
4944         if (!cache)
4945                 cache = search_cache_extent(nodes, 0);
4946
4947         if (!cache) {
4948                  cache = search_cache_extent(pending, 0);
4949                  if (!cache)
4950                          return 0;
4951                  ret = 0;
4952                  do {
4953                          bits[ret].start = cache->start;
4954                          bits[ret].size = cache->size;
4955                          cache = next_cache_extent(cache);
4956                          ret++;
4957                  } while (cache && ret < bits_nr);
4958                  return ret;
4959         }
4960
4961         ret = 0;
4962         do {
4963                 bits[ret].start = cache->start;
4964                 bits[ret].size = cache->size;
4965                 cache = next_cache_extent(cache);
4966                 ret++;
4967         } while (cache && ret < bits_nr);
4968
4969         if (bits_nr - ret > 8) {
4970                 u64 lookup = bits[0].start + bits[0].size;
4971                 struct cache_extent *next;
4972                 next = search_cache_extent(pending, lookup);
4973                 while(next) {
4974                         if (next->start - lookup > 32768)
4975                                 break;
4976                         bits[ret].start = next->start;
4977                         bits[ret].size = next->size;
4978                         lookup = next->start + next->size;
4979                         ret++;
4980                         if (ret == bits_nr)
4981                                 break;
4982                         next = next_cache_extent(next);
4983                         if (!next)
4984                                 break;
4985                 }
4986         }
4987         return ret;
4988 }
4989
4990 static void free_chunk_record(struct cache_extent *cache)
4991 {
4992         struct chunk_record *rec;
4993
4994         rec = container_of(cache, struct chunk_record, cache);
4995         list_del_init(&rec->list);
4996         list_del_init(&rec->dextents);
4997         free(rec);
4998 }
4999
5000 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5001 {
5002         cache_tree_free_extents(chunk_cache, free_chunk_record);
5003 }
5004
5005 static void free_device_record(struct rb_node *node)
5006 {
5007         struct device_record *rec;
5008
5009         rec = container_of(node, struct device_record, node);
5010         free(rec);
5011 }
5012
5013 FREE_RB_BASED_TREE(device_cache, free_device_record);
5014
5015 int insert_block_group_record(struct block_group_tree *tree,
5016                               struct block_group_record *bg_rec)
5017 {
5018         int ret;
5019
5020         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5021         if (ret)
5022                 return ret;
5023
5024         list_add_tail(&bg_rec->list, &tree->block_groups);
5025         return 0;
5026 }
5027
5028 static void free_block_group_record(struct cache_extent *cache)
5029 {
5030         struct block_group_record *rec;
5031
5032         rec = container_of(cache, struct block_group_record, cache);
5033         list_del_init(&rec->list);
5034         free(rec);
5035 }
5036
5037 void free_block_group_tree(struct block_group_tree *tree)
5038 {
5039         cache_tree_free_extents(&tree->tree, free_block_group_record);
5040 }
5041
5042 int insert_device_extent_record(struct device_extent_tree *tree,
5043                                 struct device_extent_record *de_rec)
5044 {
5045         int ret;
5046
5047         /*
5048          * Device extent is a bit different from the other extents, because
5049          * the extents which belong to the different devices may have the
5050          * same start and size, so we need use the special extent cache
5051          * search/insert functions.
5052          */
5053         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5054         if (ret)
5055                 return ret;
5056
5057         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5058         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5059         return 0;
5060 }
5061
5062 static void free_device_extent_record(struct cache_extent *cache)
5063 {
5064         struct device_extent_record *rec;
5065
5066         rec = container_of(cache, struct device_extent_record, cache);
5067         if (!list_empty(&rec->chunk_list))
5068                 list_del_init(&rec->chunk_list);
5069         if (!list_empty(&rec->device_list))
5070                 list_del_init(&rec->device_list);
5071         free(rec);
5072 }
5073
5074 void free_device_extent_tree(struct device_extent_tree *tree)
5075 {
5076         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5077 }
5078
5079 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5080 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5081                                  struct extent_buffer *leaf, int slot)
5082 {
5083         struct btrfs_extent_ref_v0 *ref0;
5084         struct btrfs_key key;
5085         int ret;
5086
5087         btrfs_item_key_to_cpu(leaf, &key, slot);
5088         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5089         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5090                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5091                                 0, 0);
5092         } else {
5093                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5094                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5095         }
5096         return ret;
5097 }
5098 #endif
5099
5100 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5101                                             struct btrfs_key *key,
5102                                             int slot)
5103 {
5104         struct btrfs_chunk *ptr;
5105         struct chunk_record *rec;
5106         int num_stripes, i;
5107
5108         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5109         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5110
5111         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5112         if (!rec) {
5113                 fprintf(stderr, "memory allocation failed\n");
5114                 exit(-1);
5115         }
5116
5117         INIT_LIST_HEAD(&rec->list);
5118         INIT_LIST_HEAD(&rec->dextents);
5119         rec->bg_rec = NULL;
5120
5121         rec->cache.start = key->offset;
5122         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5123
5124         rec->generation = btrfs_header_generation(leaf);
5125
5126         rec->objectid = key->objectid;
5127         rec->type = key->type;
5128         rec->offset = key->offset;
5129
5130         rec->length = rec->cache.size;
5131         rec->owner = btrfs_chunk_owner(leaf, ptr);
5132         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5133         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5134         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5135         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5136         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5137         rec->num_stripes = num_stripes;
5138         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5139
5140         for (i = 0; i < rec->num_stripes; ++i) {
5141                 rec->stripes[i].devid =
5142                         btrfs_stripe_devid_nr(leaf, ptr, i);
5143                 rec->stripes[i].offset =
5144                         btrfs_stripe_offset_nr(leaf, ptr, i);
5145                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5146                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5147                                 BTRFS_UUID_SIZE);
5148         }
5149
5150         return rec;
5151 }
5152
5153 static int process_chunk_item(struct cache_tree *chunk_cache,
5154                               struct btrfs_key *key, struct extent_buffer *eb,
5155                               int slot)
5156 {
5157         struct chunk_record *rec;
5158         struct btrfs_chunk *chunk;
5159         int ret = 0;
5160
5161         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5162         /*
5163          * Do extra check for this chunk item,
5164          *
5165          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5166          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5167          * and owner<->key_type check.
5168          */
5169         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5170                                       key->offset);
5171         if (ret < 0) {
5172                 error("chunk(%llu, %llu) is not valid, ignore it",
5173                       key->offset, btrfs_chunk_length(eb, chunk));
5174                 return 0;
5175         }
5176         rec = btrfs_new_chunk_record(eb, key, slot);
5177         ret = insert_cache_extent(chunk_cache, &rec->cache);
5178         if (ret) {
5179                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5180                         rec->offset, rec->length);
5181                 free(rec);
5182         }
5183
5184         return ret;
5185 }
5186
5187 static int process_device_item(struct rb_root *dev_cache,
5188                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5189 {
5190         struct btrfs_dev_item *ptr;
5191         struct device_record *rec;
5192         int ret = 0;
5193
5194         ptr = btrfs_item_ptr(eb,
5195                 slot, struct btrfs_dev_item);
5196
5197         rec = malloc(sizeof(*rec));
5198         if (!rec) {
5199                 fprintf(stderr, "memory allocation failed\n");
5200                 return -ENOMEM;
5201         }
5202
5203         rec->devid = key->offset;
5204         rec->generation = btrfs_header_generation(eb);
5205
5206         rec->objectid = key->objectid;
5207         rec->type = key->type;
5208         rec->offset = key->offset;
5209
5210         rec->devid = btrfs_device_id(eb, ptr);
5211         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5212         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5213
5214         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5215         if (ret) {
5216                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5217                 free(rec);
5218         }
5219
5220         return ret;
5221 }
5222
5223 struct block_group_record *
5224 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5225                              int slot)
5226 {
5227         struct btrfs_block_group_item *ptr;
5228         struct block_group_record *rec;
5229
5230         rec = calloc(1, sizeof(*rec));
5231         if (!rec) {
5232                 fprintf(stderr, "memory allocation failed\n");
5233                 exit(-1);
5234         }
5235
5236         rec->cache.start = key->objectid;
5237         rec->cache.size = key->offset;
5238
5239         rec->generation = btrfs_header_generation(leaf);
5240
5241         rec->objectid = key->objectid;
5242         rec->type = key->type;
5243         rec->offset = key->offset;
5244
5245         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5246         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5247
5248         INIT_LIST_HEAD(&rec->list);
5249
5250         return rec;
5251 }
5252
5253 static int process_block_group_item(struct block_group_tree *block_group_cache,
5254                                     struct btrfs_key *key,
5255                                     struct extent_buffer *eb, int slot)
5256 {
5257         struct block_group_record *rec;
5258         int ret = 0;
5259
5260         rec = btrfs_new_block_group_record(eb, key, slot);
5261         ret = insert_block_group_record(block_group_cache, rec);
5262         if (ret) {
5263                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5264                         rec->objectid, rec->offset);
5265                 free(rec);
5266         }
5267
5268         return ret;
5269 }
5270
5271 struct device_extent_record *
5272 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5273                                struct btrfs_key *key, int slot)
5274 {
5275         struct device_extent_record *rec;
5276         struct btrfs_dev_extent *ptr;
5277
5278         rec = calloc(1, sizeof(*rec));
5279         if (!rec) {
5280                 fprintf(stderr, "memory allocation failed\n");
5281                 exit(-1);
5282         }
5283
5284         rec->cache.objectid = key->objectid;
5285         rec->cache.start = key->offset;
5286
5287         rec->generation = btrfs_header_generation(leaf);
5288
5289         rec->objectid = key->objectid;
5290         rec->type = key->type;
5291         rec->offset = key->offset;
5292
5293         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5294         rec->chunk_objecteid =
5295                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5296         rec->chunk_offset =
5297                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5298         rec->length = btrfs_dev_extent_length(leaf, ptr);
5299         rec->cache.size = rec->length;
5300
5301         INIT_LIST_HEAD(&rec->chunk_list);
5302         INIT_LIST_HEAD(&rec->device_list);
5303
5304         return rec;
5305 }
5306
5307 static int
5308 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5309                            struct btrfs_key *key, struct extent_buffer *eb,
5310                            int slot)
5311 {
5312         struct device_extent_record *rec;
5313         int ret;
5314
5315         rec = btrfs_new_device_extent_record(eb, key, slot);
5316         ret = insert_device_extent_record(dev_extent_cache, rec);
5317         if (ret) {
5318                 fprintf(stderr,
5319                         "Device extent[%llu, %llu, %llu] existed.\n",
5320                         rec->objectid, rec->offset, rec->length);
5321                 free(rec);
5322         }
5323
5324         return ret;
5325 }
5326
5327 static int process_extent_item(struct btrfs_root *root,
5328                                struct cache_tree *extent_cache,
5329                                struct extent_buffer *eb, int slot)
5330 {
5331         struct btrfs_extent_item *ei;
5332         struct btrfs_extent_inline_ref *iref;
5333         struct btrfs_extent_data_ref *dref;
5334         struct btrfs_shared_data_ref *sref;
5335         struct btrfs_key key;
5336         struct extent_record tmpl;
5337         unsigned long end;
5338         unsigned long ptr;
5339         int ret;
5340         int type;
5341         u32 item_size = btrfs_item_size_nr(eb, slot);
5342         u64 refs = 0;
5343         u64 offset;
5344         u64 num_bytes;
5345         int metadata = 0;
5346
5347         btrfs_item_key_to_cpu(eb, &key, slot);
5348
5349         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5350                 metadata = 1;
5351                 num_bytes = root->nodesize;
5352         } else {
5353                 num_bytes = key.offset;
5354         }
5355
5356         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5357                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5358                       key.objectid, root->sectorsize);
5359                 return -EIO;
5360         }
5361         if (item_size < sizeof(*ei)) {
5362 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5363                 struct btrfs_extent_item_v0 *ei0;
5364                 BUG_ON(item_size != sizeof(*ei0));
5365                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5366                 refs = btrfs_extent_refs_v0(eb, ei0);
5367 #else
5368                 BUG();
5369 #endif
5370                 memset(&tmpl, 0, sizeof(tmpl));
5371                 tmpl.start = key.objectid;
5372                 tmpl.nr = num_bytes;
5373                 tmpl.extent_item_refs = refs;
5374                 tmpl.metadata = metadata;
5375                 tmpl.found_rec = 1;
5376                 tmpl.max_size = num_bytes;
5377
5378                 return add_extent_rec(extent_cache, &tmpl);
5379         }
5380
5381         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5382         refs = btrfs_extent_refs(eb, ei);
5383         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5384                 metadata = 1;
5385         else
5386                 metadata = 0;
5387         if (metadata && num_bytes != root->nodesize) {
5388                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5389                       num_bytes, root->nodesize);
5390                 return -EIO;
5391         }
5392         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5393                 error("ignore invalid data extent, length %llu is not aligned to %u",
5394                       num_bytes, root->sectorsize);
5395                 return -EIO;
5396         }
5397
5398         memset(&tmpl, 0, sizeof(tmpl));
5399         tmpl.start = key.objectid;
5400         tmpl.nr = num_bytes;
5401         tmpl.extent_item_refs = refs;
5402         tmpl.metadata = metadata;
5403         tmpl.found_rec = 1;
5404         tmpl.max_size = num_bytes;
5405         add_extent_rec(extent_cache, &tmpl);
5406
5407         ptr = (unsigned long)(ei + 1);
5408         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5409             key.type == BTRFS_EXTENT_ITEM_KEY)
5410                 ptr += sizeof(struct btrfs_tree_block_info);
5411
5412         end = (unsigned long)ei + item_size;
5413         while (ptr < end) {
5414                 iref = (struct btrfs_extent_inline_ref *)ptr;
5415                 type = btrfs_extent_inline_ref_type(eb, iref);
5416                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5417                 switch (type) {
5418                 case BTRFS_TREE_BLOCK_REF_KEY:
5419                         ret = add_tree_backref(extent_cache, key.objectid,
5420                                         0, offset, 0);
5421                         if (ret < 0)
5422                                 error("add_tree_backref failed: %s",
5423                                       strerror(-ret));
5424                         break;
5425                 case BTRFS_SHARED_BLOCK_REF_KEY:
5426                         ret = add_tree_backref(extent_cache, key.objectid,
5427                                         offset, 0, 0);
5428                         if (ret < 0)
5429                                 error("add_tree_backref failed: %s",
5430                                       strerror(-ret));
5431                         break;
5432                 case BTRFS_EXTENT_DATA_REF_KEY:
5433                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5434                         add_data_backref(extent_cache, key.objectid, 0,
5435                                         btrfs_extent_data_ref_root(eb, dref),
5436                                         btrfs_extent_data_ref_objectid(eb,
5437                                                                        dref),
5438                                         btrfs_extent_data_ref_offset(eb, dref),
5439                                         btrfs_extent_data_ref_count(eb, dref),
5440                                         0, num_bytes);
5441                         break;
5442                 case BTRFS_SHARED_DATA_REF_KEY:
5443                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5444                         add_data_backref(extent_cache, key.objectid, offset,
5445                                         0, 0, 0,
5446                                         btrfs_shared_data_ref_count(eb, sref),
5447                                         0, num_bytes);
5448                         break;
5449                 default:
5450                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5451                                 key.objectid, key.type, num_bytes);
5452                         goto out;
5453                 }
5454                 ptr += btrfs_extent_inline_ref_size(type);
5455         }
5456         WARN_ON(ptr > end);
5457 out:
5458         return 0;
5459 }
5460
5461 static int check_cache_range(struct btrfs_root *root,
5462                              struct btrfs_block_group_cache *cache,
5463                              u64 offset, u64 bytes)
5464 {
5465         struct btrfs_free_space *entry;
5466         u64 *logical;
5467         u64 bytenr;
5468         int stripe_len;
5469         int i, nr, ret;
5470
5471         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5472                 bytenr = btrfs_sb_offset(i);
5473                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5474                                        cache->key.objectid, bytenr, 0,
5475                                        &logical, &nr, &stripe_len);
5476                 if (ret)
5477                         return ret;
5478
5479                 while (nr--) {
5480                         if (logical[nr] + stripe_len <= offset)
5481                                 continue;
5482                         if (offset + bytes <= logical[nr])
5483                                 continue;
5484                         if (logical[nr] == offset) {
5485                                 if (stripe_len >= bytes) {
5486                                         free(logical);
5487                                         return 0;
5488                                 }
5489                                 bytes -= stripe_len;
5490                                 offset += stripe_len;
5491                         } else if (logical[nr] < offset) {
5492                                 if (logical[nr] + stripe_len >=
5493                                     offset + bytes) {
5494                                         free(logical);
5495                                         return 0;
5496                                 }
5497                                 bytes = (offset + bytes) -
5498                                         (logical[nr] + stripe_len);
5499                                 offset = logical[nr] + stripe_len;
5500                         } else {
5501                                 /*
5502                                  * Could be tricky, the super may land in the
5503                                  * middle of the area we're checking.  First
5504                                  * check the easiest case, it's at the end.
5505                                  */
5506                                 if (logical[nr] + stripe_len >=
5507                                     bytes + offset) {
5508                                         bytes = logical[nr] - offset;
5509                                         continue;
5510                                 }
5511
5512                                 /* Check the left side */
5513                                 ret = check_cache_range(root, cache,
5514                                                         offset,
5515                                                         logical[nr] - offset);
5516                                 if (ret) {
5517                                         free(logical);
5518                                         return ret;
5519                                 }
5520
5521                                 /* Now we continue with the right side */
5522                                 bytes = (offset + bytes) -
5523                                         (logical[nr] + stripe_len);
5524                                 offset = logical[nr] + stripe_len;
5525                         }
5526                 }
5527
5528                 free(logical);
5529         }
5530
5531         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5532         if (!entry) {
5533                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5534                         offset, offset+bytes);
5535                 return -EINVAL;
5536         }
5537
5538         if (entry->offset != offset) {
5539                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5540                         entry->offset);
5541                 return -EINVAL;
5542         }
5543
5544         if (entry->bytes != bytes) {
5545                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5546                         bytes, entry->bytes, offset);
5547                 return -EINVAL;
5548         }
5549
5550         unlink_free_space(cache->free_space_ctl, entry);
5551         free(entry);
5552         return 0;
5553 }
5554
5555 static int verify_space_cache(struct btrfs_root *root,
5556                               struct btrfs_block_group_cache *cache)
5557 {
5558         struct btrfs_path *path;
5559         struct extent_buffer *leaf;
5560         struct btrfs_key key;
5561         u64 last;
5562         int ret = 0;
5563
5564         path = btrfs_alloc_path();
5565         if (!path)
5566                 return -ENOMEM;
5567
5568         root = root->fs_info->extent_root;
5569
5570         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5571
5572         key.objectid = last;
5573         key.offset = 0;
5574         key.type = BTRFS_EXTENT_ITEM_KEY;
5575
5576         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5577         if (ret < 0)
5578                 goto out;
5579         ret = 0;
5580         while (1) {
5581                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5582                         ret = btrfs_next_leaf(root, path);
5583                         if (ret < 0)
5584                                 goto out;
5585                         if (ret > 0) {
5586                                 ret = 0;
5587                                 break;
5588                         }
5589                 }
5590                 leaf = path->nodes[0];
5591                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5592                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5593                         break;
5594                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5595                     key.type != BTRFS_METADATA_ITEM_KEY) {
5596                         path->slots[0]++;
5597                         continue;
5598                 }
5599
5600                 if (last == key.objectid) {
5601                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5602                                 last = key.objectid + key.offset;
5603                         else
5604                                 last = key.objectid + root->nodesize;
5605                         path->slots[0]++;
5606                         continue;
5607                 }
5608
5609                 ret = check_cache_range(root, cache, last,
5610                                         key.objectid - last);
5611                 if (ret)
5612                         break;
5613                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5614                         last = key.objectid + key.offset;
5615                 else
5616                         last = key.objectid + root->nodesize;
5617                 path->slots[0]++;
5618         }
5619
5620         if (last < cache->key.objectid + cache->key.offset)
5621                 ret = check_cache_range(root, cache, last,
5622                                         cache->key.objectid +
5623                                         cache->key.offset - last);
5624
5625 out:
5626         btrfs_free_path(path);
5627
5628         if (!ret &&
5629             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5630                 fprintf(stderr, "There are still entries left in the space "
5631                         "cache\n");
5632                 ret = -EINVAL;
5633         }
5634
5635         return ret;
5636 }
5637
5638 static int check_space_cache(struct btrfs_root *root)
5639 {
5640         struct btrfs_block_group_cache *cache;
5641         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5642         int ret;
5643         int error = 0;
5644
5645         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5646             btrfs_super_generation(root->fs_info->super_copy) !=
5647             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5648                 printf("cache and super generation don't match, space cache "
5649                        "will be invalidated\n");
5650                 return 0;
5651         }
5652
5653         if (ctx.progress_enabled) {
5654                 ctx.tp = TASK_FREE_SPACE;
5655                 task_start(ctx.info);
5656         }
5657
5658         while (1) {
5659                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5660                 if (!cache)
5661                         break;
5662
5663                 start = cache->key.objectid + cache->key.offset;
5664                 if (!cache->free_space_ctl) {
5665                         if (btrfs_init_free_space_ctl(cache,
5666                                                       root->sectorsize)) {
5667                                 ret = -ENOMEM;
5668                                 break;
5669                         }
5670                 } else {
5671                         btrfs_remove_free_space_cache(cache);
5672                 }
5673
5674                 if (btrfs_fs_compat_ro(root->fs_info,
5675                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5676                         ret = exclude_super_stripes(root, cache);
5677                         if (ret) {
5678                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5679                                         strerror(-ret));
5680                                 error++;
5681                                 continue;
5682                         }
5683                         ret = load_free_space_tree(root->fs_info, cache);
5684                         free_excluded_extents(root, cache);
5685                         if (ret < 0) {
5686                                 fprintf(stderr, "could not load free space tree: %s\n",
5687                                         strerror(-ret));
5688                                 error++;
5689                                 continue;
5690                         }
5691                         error += ret;
5692                 } else {
5693                         ret = load_free_space_cache(root->fs_info, cache);
5694                         if (!ret)
5695                                 continue;
5696                 }
5697
5698                 ret = verify_space_cache(root, cache);
5699                 if (ret) {
5700                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5701                                 cache->key.objectid);
5702                         error++;
5703                 }
5704         }
5705
5706         task_stop(ctx.info);
5707
5708         return error ? -EINVAL : 0;
5709 }
5710
5711 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5712                         u64 num_bytes, unsigned long leaf_offset,
5713                         struct extent_buffer *eb) {
5714
5715         u64 offset = 0;
5716         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5717         char *data;
5718         unsigned long csum_offset;
5719         u32 csum;
5720         u32 csum_expected;
5721         u64 read_len;
5722         u64 data_checked = 0;
5723         u64 tmp;
5724         int ret = 0;
5725         int mirror;
5726         int num_copies;
5727
5728         if (num_bytes % root->sectorsize)
5729                 return -EINVAL;
5730
5731         data = malloc(num_bytes);
5732         if (!data)
5733                 return -ENOMEM;
5734
5735         while (offset < num_bytes) {
5736                 mirror = 0;
5737 again:
5738                 read_len = num_bytes - offset;
5739                 /* read as much space once a time */
5740                 ret = read_extent_data(root, data + offset,
5741                                 bytenr + offset, &read_len, mirror);
5742                 if (ret)
5743                         goto out;
5744                 data_checked = 0;
5745                 /* verify every 4k data's checksum */
5746                 while (data_checked < read_len) {
5747                         csum = ~(u32)0;
5748                         tmp = offset + data_checked;
5749
5750                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5751                                                csum, root->sectorsize);
5752                         btrfs_csum_final(csum, (u8 *)&csum);
5753
5754                         csum_offset = leaf_offset +
5755                                  tmp / root->sectorsize * csum_size;
5756                         read_extent_buffer(eb, (char *)&csum_expected,
5757                                            csum_offset, csum_size);
5758                         /* try another mirror */
5759                         if (csum != csum_expected) {
5760                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5761                                                 mirror, bytenr + tmp,
5762                                                 csum, csum_expected);
5763                                 num_copies = btrfs_num_copies(
5764                                                 &root->fs_info->mapping_tree,
5765                                                 bytenr, num_bytes);
5766                                 if (mirror < num_copies - 1) {
5767                                         mirror += 1;
5768                                         goto again;
5769                                 }
5770                         }
5771                         data_checked += root->sectorsize;
5772                 }
5773                 offset += read_len;
5774         }
5775 out:
5776         free(data);
5777         return ret;
5778 }
5779
5780 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5781                                u64 num_bytes)
5782 {
5783         struct btrfs_path *path;
5784         struct extent_buffer *leaf;
5785         struct btrfs_key key;
5786         int ret;
5787
5788         path = btrfs_alloc_path();
5789         if (!path) {
5790                 fprintf(stderr, "Error allocating path\n");
5791                 return -ENOMEM;
5792         }
5793
5794         key.objectid = bytenr;
5795         key.type = BTRFS_EXTENT_ITEM_KEY;
5796         key.offset = (u64)-1;
5797
5798 again:
5799         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5800                                 0, 0);
5801         if (ret < 0) {
5802                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5803                 btrfs_free_path(path);
5804                 return ret;
5805         } else if (ret) {
5806                 if (path->slots[0] > 0) {
5807                         path->slots[0]--;
5808                 } else {
5809                         ret = btrfs_prev_leaf(root, path);
5810                         if (ret < 0) {
5811                                 goto out;
5812                         } else if (ret > 0) {
5813                                 ret = 0;
5814                                 goto out;
5815                         }
5816                 }
5817         }
5818
5819         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5820
5821         /*
5822          * Block group items come before extent items if they have the same
5823          * bytenr, so walk back one more just in case.  Dear future traveller,
5824          * first congrats on mastering time travel.  Now if it's not too much
5825          * trouble could you go back to 2006 and tell Chris to make the
5826          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5827          * EXTENT_ITEM_KEY please?
5828          */
5829         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5830                 if (path->slots[0] > 0) {
5831                         path->slots[0]--;
5832                 } else {
5833                         ret = btrfs_prev_leaf(root, path);
5834                         if (ret < 0) {
5835                                 goto out;
5836                         } else if (ret > 0) {
5837                                 ret = 0;
5838                                 goto out;
5839                         }
5840                 }
5841                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5842         }
5843
5844         while (num_bytes) {
5845                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5846                         ret = btrfs_next_leaf(root, path);
5847                         if (ret < 0) {
5848                                 fprintf(stderr, "Error going to next leaf "
5849                                         "%d\n", ret);
5850                                 btrfs_free_path(path);
5851                                 return ret;
5852                         } else if (ret) {
5853                                 break;
5854                         }
5855                 }
5856                 leaf = path->nodes[0];
5857                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5858                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5859                         path->slots[0]++;
5860                         continue;
5861                 }
5862                 if (key.objectid + key.offset < bytenr) {
5863                         path->slots[0]++;
5864                         continue;
5865                 }
5866                 if (key.objectid > bytenr + num_bytes)
5867                         break;
5868
5869                 if (key.objectid == bytenr) {
5870                         if (key.offset >= num_bytes) {
5871                                 num_bytes = 0;
5872                                 break;
5873                         }
5874                         num_bytes -= key.offset;
5875                         bytenr += key.offset;
5876                 } else if (key.objectid < bytenr) {
5877                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5878                                 num_bytes = 0;
5879                                 break;
5880                         }
5881                         num_bytes = (bytenr + num_bytes) -
5882                                 (key.objectid + key.offset);
5883                         bytenr = key.objectid + key.offset;
5884                 } else {
5885                         if (key.objectid + key.offset < bytenr + num_bytes) {
5886                                 u64 new_start = key.objectid + key.offset;
5887                                 u64 new_bytes = bytenr + num_bytes - new_start;
5888
5889                                 /*
5890                                  * Weird case, the extent is in the middle of
5891                                  * our range, we'll have to search one side
5892                                  * and then the other.  Not sure if this happens
5893                                  * in real life, but no harm in coding it up
5894                                  * anyway just in case.
5895                                  */
5896                                 btrfs_release_path(path);
5897                                 ret = check_extent_exists(root, new_start,
5898                                                           new_bytes);
5899                                 if (ret) {
5900                                         fprintf(stderr, "Right section didn't "
5901                                                 "have a record\n");
5902                                         break;
5903                                 }
5904                                 num_bytes = key.objectid - bytenr;
5905                                 goto again;
5906                         }
5907                         num_bytes = key.objectid - bytenr;
5908                 }
5909                 path->slots[0]++;
5910         }
5911         ret = 0;
5912
5913 out:
5914         if (num_bytes && !ret) {
5915                 fprintf(stderr, "There are no extents for csum range "
5916                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5917                 ret = 1;
5918         }
5919
5920         btrfs_free_path(path);
5921         return ret;
5922 }
5923
5924 static int check_csums(struct btrfs_root *root)
5925 {
5926         struct btrfs_path *path;
5927         struct extent_buffer *leaf;
5928         struct btrfs_key key;
5929         u64 offset = 0, num_bytes = 0;
5930         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5931         int errors = 0;
5932         int ret;
5933         u64 data_len;
5934         unsigned long leaf_offset;
5935
5936         root = root->fs_info->csum_root;
5937         if (!extent_buffer_uptodate(root->node)) {
5938                 fprintf(stderr, "No valid csum tree found\n");
5939                 return -ENOENT;
5940         }
5941
5942         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5943         key.type = BTRFS_EXTENT_CSUM_KEY;
5944         key.offset = 0;
5945
5946         path = btrfs_alloc_path();
5947         if (!path)
5948                 return -ENOMEM;
5949
5950         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5951         if (ret < 0) {
5952                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5953                 btrfs_free_path(path);
5954                 return ret;
5955         }
5956
5957         if (ret > 0 && path->slots[0])
5958                 path->slots[0]--;
5959         ret = 0;
5960
5961         while (1) {
5962                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5963                         ret = btrfs_next_leaf(root, path);
5964                         if (ret < 0) {
5965                                 fprintf(stderr, "Error going to next leaf "
5966                                         "%d\n", ret);
5967                                 break;
5968                         }
5969                         if (ret)
5970                                 break;
5971                 }
5972                 leaf = path->nodes[0];
5973
5974                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5975                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5976                         path->slots[0]++;
5977                         continue;
5978                 }
5979
5980                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5981                               csum_size) * root->sectorsize;
5982                 if (!check_data_csum)
5983                         goto skip_csum_check;
5984                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5985                 ret = check_extent_csums(root, key.offset, data_len,
5986                                          leaf_offset, leaf);
5987                 if (ret)
5988                         break;
5989 skip_csum_check:
5990                 if (!num_bytes) {
5991                         offset = key.offset;
5992                 } else if (key.offset != offset + num_bytes) {
5993                         ret = check_extent_exists(root, offset, num_bytes);
5994                         if (ret) {
5995                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5996                                         "there is no extent record\n",
5997                                         offset, offset+num_bytes);
5998                                 errors++;
5999                         }
6000                         offset = key.offset;
6001                         num_bytes = 0;
6002                 }
6003                 num_bytes += data_len;
6004                 path->slots[0]++;
6005         }
6006
6007         btrfs_free_path(path);
6008         return errors;
6009 }
6010
6011 static int is_dropped_key(struct btrfs_key *key,
6012                           struct btrfs_key *drop_key) {
6013         if (key->objectid < drop_key->objectid)
6014                 return 1;
6015         else if (key->objectid == drop_key->objectid) {
6016                 if (key->type < drop_key->type)
6017                         return 1;
6018                 else if (key->type == drop_key->type) {
6019                         if (key->offset < drop_key->offset)
6020                                 return 1;
6021                 }
6022         }
6023         return 0;
6024 }
6025
6026 /*
6027  * Here are the rules for FULL_BACKREF.
6028  *
6029  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6030  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6031  *      FULL_BACKREF set.
6032  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6033  *    if it happened after the relocation occurred since we'll have dropped the
6034  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6035  *    have no real way to know for sure.
6036  *
6037  * We process the blocks one root at a time, and we start from the lowest root
6038  * objectid and go to the highest.  So we can just lookup the owner backref for
6039  * the record and if we don't find it then we know it doesn't exist and we have
6040  * a FULL BACKREF.
6041  *
6042  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6043  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6044  * be set or not and then we can check later once we've gathered all the refs.
6045  */
6046 static int calc_extent_flag(struct btrfs_root *root,
6047                            struct cache_tree *extent_cache,
6048                            struct extent_buffer *buf,
6049                            struct root_item_record *ri,
6050                            u64 *flags)
6051 {
6052         struct extent_record *rec;
6053         struct cache_extent *cache;
6054         struct tree_backref *tback;
6055         u64 owner = 0;
6056
6057         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6058         /* we have added this extent before */
6059         if (!cache)
6060                 return -ENOENT;
6061
6062         rec = container_of(cache, struct extent_record, cache);
6063
6064         /*
6065          * Except file/reloc tree, we can not have
6066          * FULL BACKREF MODE
6067          */
6068         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6069                 goto normal;
6070         /*
6071          * root node
6072          */
6073         if (buf->start == ri->bytenr)
6074                 goto normal;
6075
6076         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6077                 goto full_backref;
6078
6079         owner = btrfs_header_owner(buf);
6080         if (owner == ri->objectid)
6081                 goto normal;
6082
6083         tback = find_tree_backref(rec, 0, owner);
6084         if (!tback)
6085                 goto full_backref;
6086 normal:
6087         *flags = 0;
6088         if (rec->flag_block_full_backref != FLAG_UNSET &&
6089             rec->flag_block_full_backref != 0)
6090                 rec->bad_full_backref = 1;
6091         return 0;
6092 full_backref:
6093         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6094         if (rec->flag_block_full_backref != FLAG_UNSET &&
6095             rec->flag_block_full_backref != 1)
6096                 rec->bad_full_backref = 1;
6097         return 0;
6098 }
6099
6100 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6101 {
6102         fprintf(stderr, "Invalid key type(");
6103         print_key_type(stderr, 0, key_type);
6104         fprintf(stderr, ") found in root(");
6105         print_objectid(stderr, rootid, 0);
6106         fprintf(stderr, ")\n");
6107 }
6108
6109 /*
6110  * Check if the key is valid with its extent buffer.
6111  *
6112  * This is a early check in case invalid key exists in a extent buffer
6113  * This is not comprehensive yet, but should prevent wrong key/item passed
6114  * further
6115  */
6116 static int check_type_with_root(u64 rootid, u8 key_type)
6117 {
6118         switch (key_type) {
6119         /* Only valid in chunk tree */
6120         case BTRFS_DEV_ITEM_KEY:
6121         case BTRFS_CHUNK_ITEM_KEY:
6122                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6123                         goto err;
6124                 break;
6125         /* valid in csum and log tree */
6126         case BTRFS_CSUM_TREE_OBJECTID:
6127                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6128                       is_fstree(rootid)))
6129                         goto err;
6130                 break;
6131         case BTRFS_EXTENT_ITEM_KEY:
6132         case BTRFS_METADATA_ITEM_KEY:
6133         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6134                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6135                         goto err;
6136                 break;
6137         case BTRFS_ROOT_ITEM_KEY:
6138                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6139                         goto err;
6140                 break;
6141         case BTRFS_DEV_EXTENT_KEY:
6142                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6143                         goto err;
6144                 break;
6145         }
6146         return 0;
6147 err:
6148         report_mismatch_key_root(key_type, rootid);
6149         return -EINVAL;
6150 }
6151
6152 static int run_next_block(struct btrfs_root *root,
6153                           struct block_info *bits,
6154                           int bits_nr,
6155                           u64 *last,
6156                           struct cache_tree *pending,
6157                           struct cache_tree *seen,
6158                           struct cache_tree *reada,
6159                           struct cache_tree *nodes,
6160                           struct cache_tree *extent_cache,
6161                           struct cache_tree *chunk_cache,
6162                           struct rb_root *dev_cache,
6163                           struct block_group_tree *block_group_cache,
6164                           struct device_extent_tree *dev_extent_cache,
6165                           struct root_item_record *ri)
6166 {
6167         struct extent_buffer *buf;
6168         struct extent_record *rec = NULL;
6169         u64 bytenr;
6170         u32 size;
6171         u64 parent;
6172         u64 owner;
6173         u64 flags;
6174         u64 ptr;
6175         u64 gen = 0;
6176         int ret = 0;
6177         int i;
6178         int nritems;
6179         struct btrfs_key key;
6180         struct cache_extent *cache;
6181         int reada_bits;
6182
6183         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6184                                     bits_nr, &reada_bits);
6185         if (nritems == 0)
6186                 return 1;
6187
6188         if (!reada_bits) {
6189                 for(i = 0; i < nritems; i++) {
6190                         ret = add_cache_extent(reada, bits[i].start,
6191                                                bits[i].size);
6192                         if (ret == -EEXIST)
6193                                 continue;
6194
6195                         /* fixme, get the parent transid */
6196                         readahead_tree_block(root, bits[i].start,
6197                                              bits[i].size, 0);
6198                 }
6199         }
6200         *last = bits[0].start;
6201         bytenr = bits[0].start;
6202         size = bits[0].size;
6203
6204         cache = lookup_cache_extent(pending, bytenr, size);
6205         if (cache) {
6206                 remove_cache_extent(pending, cache);
6207                 free(cache);
6208         }
6209         cache = lookup_cache_extent(reada, bytenr, size);
6210         if (cache) {
6211                 remove_cache_extent(reada, cache);
6212                 free(cache);
6213         }
6214         cache = lookup_cache_extent(nodes, bytenr, size);
6215         if (cache) {
6216                 remove_cache_extent(nodes, cache);
6217                 free(cache);
6218         }
6219         cache = lookup_cache_extent(extent_cache, bytenr, size);
6220         if (cache) {
6221                 rec = container_of(cache, struct extent_record, cache);
6222                 gen = rec->parent_generation;
6223         }
6224
6225         /* fixme, get the real parent transid */
6226         buf = read_tree_block(root, bytenr, size, gen);
6227         if (!extent_buffer_uptodate(buf)) {
6228                 record_bad_block_io(root->fs_info,
6229                                     extent_cache, bytenr, size);
6230                 goto out;
6231         }
6232
6233         nritems = btrfs_header_nritems(buf);
6234
6235         flags = 0;
6236         if (!init_extent_tree) {
6237                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6238                                        btrfs_header_level(buf), 1, NULL,
6239                                        &flags);
6240                 if (ret < 0) {
6241                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6242                         if (ret < 0) {
6243                                 fprintf(stderr, "Couldn't calc extent flags\n");
6244                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6245                         }
6246                 }
6247         } else {
6248                 flags = 0;
6249                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6250                 if (ret < 0) {
6251                         fprintf(stderr, "Couldn't calc extent flags\n");
6252                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6253                 }
6254         }
6255
6256         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6257                 if (ri != NULL &&
6258                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6259                     ri->objectid == btrfs_header_owner(buf)) {
6260                         /*
6261                          * Ok we got to this block from it's original owner and
6262                          * we have FULL_BACKREF set.  Relocation can leave
6263                          * converted blocks over so this is altogether possible,
6264                          * however it's not possible if the generation > the
6265                          * last snapshot, so check for this case.
6266                          */
6267                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6268                             btrfs_header_generation(buf) > ri->last_snapshot) {
6269                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6270                                 rec->bad_full_backref = 1;
6271                         }
6272                 }
6273         } else {
6274                 if (ri != NULL &&
6275                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6276                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6277                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6278                         rec->bad_full_backref = 1;
6279                 }
6280         }
6281
6282         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6283                 rec->flag_block_full_backref = 1;
6284                 parent = bytenr;
6285                 owner = 0;
6286         } else {
6287                 rec->flag_block_full_backref = 0;
6288                 parent = 0;
6289                 owner = btrfs_header_owner(buf);
6290         }
6291
6292         ret = check_block(root, extent_cache, buf, flags);
6293         if (ret)
6294                 goto out;
6295
6296         if (btrfs_is_leaf(buf)) {
6297                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6298                 for (i = 0; i < nritems; i++) {
6299                         struct btrfs_file_extent_item *fi;
6300                         btrfs_item_key_to_cpu(buf, &key, i);
6301                         /*
6302                          * Check key type against the leaf owner.
6303                          * Could filter quite a lot of early error if
6304                          * owner is correct
6305                          */
6306                         if (check_type_with_root(btrfs_header_owner(buf),
6307                                                  key.type)) {
6308                                 fprintf(stderr, "ignoring invalid key\n");
6309                                 continue;
6310                         }
6311                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6312                                 process_extent_item(root, extent_cache, buf,
6313                                                     i);
6314                                 continue;
6315                         }
6316                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6317                                 process_extent_item(root, extent_cache, buf,
6318                                                     i);
6319                                 continue;
6320                         }
6321                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6322                                 total_csum_bytes +=
6323                                         btrfs_item_size_nr(buf, i);
6324                                 continue;
6325                         }
6326                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6327                                 process_chunk_item(chunk_cache, &key, buf, i);
6328                                 continue;
6329                         }
6330                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6331                                 process_device_item(dev_cache, &key, buf, i);
6332                                 continue;
6333                         }
6334                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6335                                 process_block_group_item(block_group_cache,
6336                                         &key, buf, i);
6337                                 continue;
6338                         }
6339                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6340                                 process_device_extent_item(dev_extent_cache,
6341                                         &key, buf, i);
6342                                 continue;
6343
6344                         }
6345                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6346 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6347                                 process_extent_ref_v0(extent_cache, buf, i);
6348 #else
6349                                 BUG();
6350 #endif
6351                                 continue;
6352                         }
6353
6354                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6355                                 ret = add_tree_backref(extent_cache,
6356                                                 key.objectid, 0, key.offset, 0);
6357                                 if (ret < 0)
6358                                         error("add_tree_backref failed: %s",
6359                                               strerror(-ret));
6360                                 continue;
6361                         }
6362                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6363                                 ret = add_tree_backref(extent_cache,
6364                                                 key.objectid, key.offset, 0, 0);
6365                                 if (ret < 0)
6366                                         error("add_tree_backref failed: %s",
6367                                               strerror(-ret));
6368                                 continue;
6369                         }
6370                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6371                                 struct btrfs_extent_data_ref *ref;
6372                                 ref = btrfs_item_ptr(buf, i,
6373                                                 struct btrfs_extent_data_ref);
6374                                 add_data_backref(extent_cache,
6375                                         key.objectid, 0,
6376                                         btrfs_extent_data_ref_root(buf, ref),
6377                                         btrfs_extent_data_ref_objectid(buf,
6378                                                                        ref),
6379                                         btrfs_extent_data_ref_offset(buf, ref),
6380                                         btrfs_extent_data_ref_count(buf, ref),
6381                                         0, root->sectorsize);
6382                                 continue;
6383                         }
6384                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6385                                 struct btrfs_shared_data_ref *ref;
6386                                 ref = btrfs_item_ptr(buf, i,
6387                                                 struct btrfs_shared_data_ref);
6388                                 add_data_backref(extent_cache,
6389                                         key.objectid, key.offset, 0, 0, 0,
6390                                         btrfs_shared_data_ref_count(buf, ref),
6391                                         0, root->sectorsize);
6392                                 continue;
6393                         }
6394                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6395                                 struct bad_item *bad;
6396
6397                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6398                                         continue;
6399                                 if (!owner)
6400                                         continue;
6401                                 bad = malloc(sizeof(struct bad_item));
6402                                 if (!bad)
6403                                         continue;
6404                                 INIT_LIST_HEAD(&bad->list);
6405                                 memcpy(&bad->key, &key,
6406                                        sizeof(struct btrfs_key));
6407                                 bad->root_id = owner;
6408                                 list_add_tail(&bad->list, &delete_items);
6409                                 continue;
6410                         }
6411                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6412                                 continue;
6413                         fi = btrfs_item_ptr(buf, i,
6414                                             struct btrfs_file_extent_item);
6415                         if (btrfs_file_extent_type(buf, fi) ==
6416                             BTRFS_FILE_EXTENT_INLINE)
6417                                 continue;
6418                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6419                                 continue;
6420
6421                         data_bytes_allocated +=
6422                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6423                         if (data_bytes_allocated < root->sectorsize) {
6424                                 abort();
6425                         }
6426                         data_bytes_referenced +=
6427                                 btrfs_file_extent_num_bytes(buf, fi);
6428                         add_data_backref(extent_cache,
6429                                 btrfs_file_extent_disk_bytenr(buf, fi),
6430                                 parent, owner, key.objectid, key.offset -
6431                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6432                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6433                 }
6434         } else {
6435                 int level;
6436                 struct btrfs_key first_key;
6437
6438                 first_key.objectid = 0;
6439
6440                 if (nritems > 0)
6441                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6442                 level = btrfs_header_level(buf);
6443                 for (i = 0; i < nritems; i++) {
6444                         struct extent_record tmpl;
6445
6446                         ptr = btrfs_node_blockptr(buf, i);
6447                         size = root->nodesize;
6448                         btrfs_node_key_to_cpu(buf, &key, i);
6449                         if (ri != NULL) {
6450                                 if ((level == ri->drop_level)
6451                                     && is_dropped_key(&key, &ri->drop_key)) {
6452                                         continue;
6453                                 }
6454                         }
6455
6456                         memset(&tmpl, 0, sizeof(tmpl));
6457                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6458                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6459                         tmpl.start = ptr;
6460                         tmpl.nr = size;
6461                         tmpl.refs = 1;
6462                         tmpl.metadata = 1;
6463                         tmpl.max_size = size;
6464                         ret = add_extent_rec(extent_cache, &tmpl);
6465                         if (ret < 0)
6466                                 goto out;
6467
6468                         ret = add_tree_backref(extent_cache, ptr, parent,
6469                                         owner, 1);
6470                         if (ret < 0) {
6471                                 error("add_tree_backref failed: %s",
6472                                       strerror(-ret));
6473                                 continue;
6474                         }
6475
6476                         if (level > 1) {
6477                                 add_pending(nodes, seen, ptr, size);
6478                         } else {
6479                                 add_pending(pending, seen, ptr, size);
6480                         }
6481                 }
6482                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6483                                       nritems) * sizeof(struct btrfs_key_ptr);
6484         }
6485         total_btree_bytes += buf->len;
6486         if (fs_root_objectid(btrfs_header_owner(buf)))
6487                 total_fs_tree_bytes += buf->len;
6488         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6489                 total_extent_tree_bytes += buf->len;
6490         if (!found_old_backref &&
6491             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6492             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6493             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6494                 found_old_backref = 1;
6495 out:
6496         free_extent_buffer(buf);
6497         return ret;
6498 }
6499
6500 static int add_root_to_pending(struct extent_buffer *buf,
6501                                struct cache_tree *extent_cache,
6502                                struct cache_tree *pending,
6503                                struct cache_tree *seen,
6504                                struct cache_tree *nodes,
6505                                u64 objectid)
6506 {
6507         struct extent_record tmpl;
6508         int ret;
6509
6510         if (btrfs_header_level(buf) > 0)
6511                 add_pending(nodes, seen, buf->start, buf->len);
6512         else
6513                 add_pending(pending, seen, buf->start, buf->len);
6514
6515         memset(&tmpl, 0, sizeof(tmpl));
6516         tmpl.start = buf->start;
6517         tmpl.nr = buf->len;
6518         tmpl.is_root = 1;
6519         tmpl.refs = 1;
6520         tmpl.metadata = 1;
6521         tmpl.max_size = buf->len;
6522         add_extent_rec(extent_cache, &tmpl);
6523
6524         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6525             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6526                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6527                                 0, 1);
6528         else
6529                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6530                                 1);
6531         return ret;
6532 }
6533
6534 /* as we fix the tree, we might be deleting blocks that
6535  * we're tracking for repair.  This hook makes sure we
6536  * remove any backrefs for blocks as we are fixing them.
6537  */
6538 static int free_extent_hook(struct btrfs_trans_handle *trans,
6539                             struct btrfs_root *root,
6540                             u64 bytenr, u64 num_bytes, u64 parent,
6541                             u64 root_objectid, u64 owner, u64 offset,
6542                             int refs_to_drop)
6543 {
6544         struct extent_record *rec;
6545         struct cache_extent *cache;
6546         int is_data;
6547         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6548
6549         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6550         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6551         if (!cache)
6552                 return 0;
6553
6554         rec = container_of(cache, struct extent_record, cache);
6555         if (is_data) {
6556                 struct data_backref *back;
6557                 back = find_data_backref(rec, parent, root_objectid, owner,
6558                                          offset, 1, bytenr, num_bytes);
6559                 if (!back)
6560                         goto out;
6561                 if (back->node.found_ref) {
6562                         back->found_ref -= refs_to_drop;
6563                         if (rec->refs)
6564                                 rec->refs -= refs_to_drop;
6565                 }
6566                 if (back->node.found_extent_tree) {
6567                         back->num_refs -= refs_to_drop;
6568                         if (rec->extent_item_refs)
6569                                 rec->extent_item_refs -= refs_to_drop;
6570                 }
6571                 if (back->found_ref == 0)
6572                         back->node.found_ref = 0;
6573                 if (back->num_refs == 0)
6574                         back->node.found_extent_tree = 0;
6575
6576                 if (!back->node.found_extent_tree && back->node.found_ref) {
6577                         list_del(&back->node.list);
6578                         free(back);
6579                 }
6580         } else {
6581                 struct tree_backref *back;
6582                 back = find_tree_backref(rec, parent, root_objectid);
6583                 if (!back)
6584                         goto out;
6585                 if (back->node.found_ref) {
6586                         if (rec->refs)
6587                                 rec->refs--;
6588                         back->node.found_ref = 0;
6589                 }
6590                 if (back->node.found_extent_tree) {
6591                         if (rec->extent_item_refs)
6592                                 rec->extent_item_refs--;
6593                         back->node.found_extent_tree = 0;
6594                 }
6595                 if (!back->node.found_extent_tree && back->node.found_ref) {
6596                         list_del(&back->node.list);
6597                         free(back);
6598                 }
6599         }
6600         maybe_free_extent_rec(extent_cache, rec);
6601 out:
6602         return 0;
6603 }
6604
6605 static int delete_extent_records(struct btrfs_trans_handle *trans,
6606                                  struct btrfs_root *root,
6607                                  struct btrfs_path *path,
6608                                  u64 bytenr, u64 new_len)
6609 {
6610         struct btrfs_key key;
6611         struct btrfs_key found_key;
6612         struct extent_buffer *leaf;
6613         int ret;
6614         int slot;
6615
6616
6617         key.objectid = bytenr;
6618         key.type = (u8)-1;
6619         key.offset = (u64)-1;
6620
6621         while(1) {
6622                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6623                                         &key, path, 0, 1);
6624                 if (ret < 0)
6625                         break;
6626
6627                 if (ret > 0) {
6628                         ret = 0;
6629                         if (path->slots[0] == 0)
6630                                 break;
6631                         path->slots[0]--;
6632                 }
6633                 ret = 0;
6634
6635                 leaf = path->nodes[0];
6636                 slot = path->slots[0];
6637
6638                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6639                 if (found_key.objectid != bytenr)
6640                         break;
6641
6642                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6643                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6644                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6645                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6646                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6647                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6648                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6649                         btrfs_release_path(path);
6650                         if (found_key.type == 0) {
6651                                 if (found_key.offset == 0)
6652                                         break;
6653                                 key.offset = found_key.offset - 1;
6654                                 key.type = found_key.type;
6655                         }
6656                         key.type = found_key.type - 1;
6657                         key.offset = (u64)-1;
6658                         continue;
6659                 }
6660
6661                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6662                         found_key.objectid, found_key.type, found_key.offset);
6663
6664                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6665                 if (ret)
6666                         break;
6667                 btrfs_release_path(path);
6668
6669                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6670                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6671                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6672                                 found_key.offset : root->nodesize;
6673
6674                         ret = btrfs_update_block_group(trans, root, bytenr,
6675                                                        bytes, 0, 0);
6676                         if (ret)
6677                                 break;
6678                 }
6679         }
6680
6681         btrfs_release_path(path);
6682         return ret;
6683 }
6684
6685 /*
6686  * for a single backref, this will allocate a new extent
6687  * and add the backref to it.
6688  */
6689 static int record_extent(struct btrfs_trans_handle *trans,
6690                          struct btrfs_fs_info *info,
6691                          struct btrfs_path *path,
6692                          struct extent_record *rec,
6693                          struct extent_backref *back,
6694                          int allocated, u64 flags)
6695 {
6696         int ret;
6697         struct btrfs_root *extent_root = info->extent_root;
6698         struct extent_buffer *leaf;
6699         struct btrfs_key ins_key;
6700         struct btrfs_extent_item *ei;
6701         struct tree_backref *tback;
6702         struct data_backref *dback;
6703         struct btrfs_tree_block_info *bi;
6704
6705         if (!back->is_data)
6706                 rec->max_size = max_t(u64, rec->max_size,
6707                                     info->extent_root->nodesize);
6708
6709         if (!allocated) {
6710                 u32 item_size = sizeof(*ei);
6711
6712                 if (!back->is_data)
6713                         item_size += sizeof(*bi);
6714
6715                 ins_key.objectid = rec->start;
6716                 ins_key.offset = rec->max_size;
6717                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6718
6719                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6720                                         &ins_key, item_size);
6721                 if (ret)
6722                         goto fail;
6723
6724                 leaf = path->nodes[0];
6725                 ei = btrfs_item_ptr(leaf, path->slots[0],
6726                                     struct btrfs_extent_item);
6727
6728                 btrfs_set_extent_refs(leaf, ei, 0);
6729                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6730
6731                 if (back->is_data) {
6732                         btrfs_set_extent_flags(leaf, ei,
6733                                                BTRFS_EXTENT_FLAG_DATA);
6734                 } else {
6735                         struct btrfs_disk_key copy_key;;
6736
6737                         tback = to_tree_backref(back);
6738                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6739                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6740                                              sizeof(*bi));
6741
6742                         btrfs_set_disk_key_objectid(&copy_key,
6743                                                     rec->info_objectid);
6744                         btrfs_set_disk_key_type(&copy_key, 0);
6745                         btrfs_set_disk_key_offset(&copy_key, 0);
6746
6747                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6748                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6749
6750                         btrfs_set_extent_flags(leaf, ei,
6751                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6752                 }
6753
6754                 btrfs_mark_buffer_dirty(leaf);
6755                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6756                                                rec->max_size, 1, 0);
6757                 if (ret)
6758                         goto fail;
6759                 btrfs_release_path(path);
6760         }
6761
6762         if (back->is_data) {
6763                 u64 parent;
6764                 int i;
6765
6766                 dback = to_data_backref(back);
6767                 if (back->full_backref)
6768                         parent = dback->parent;
6769                 else
6770                         parent = 0;
6771
6772                 for (i = 0; i < dback->found_ref; i++) {
6773                         /* if parent != 0, we're doing a full backref
6774                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6775                          * just makes the backref allocator create a data
6776                          * backref
6777                          */
6778                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6779                                                    rec->start, rec->max_size,
6780                                                    parent,
6781                                                    dback->root,
6782                                                    parent ?
6783                                                    BTRFS_FIRST_FREE_OBJECTID :
6784                                                    dback->owner,
6785                                                    dback->offset);
6786                         if (ret)
6787                                 break;
6788                 }
6789                 fprintf(stderr, "adding new data backref"
6790                                 " on %llu %s %llu owner %llu"
6791                                 " offset %llu found %d\n",
6792                                 (unsigned long long)rec->start,
6793                                 back->full_backref ?
6794                                 "parent" : "root",
6795                                 back->full_backref ?
6796                                 (unsigned long long)parent :
6797                                 (unsigned long long)dback->root,
6798                                 (unsigned long long)dback->owner,
6799                                 (unsigned long long)dback->offset,
6800                                 dback->found_ref);
6801         } else {
6802                 u64 parent;
6803
6804                 tback = to_tree_backref(back);
6805                 if (back->full_backref)
6806                         parent = tback->parent;
6807                 else
6808                         parent = 0;
6809
6810                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6811                                            rec->start, rec->max_size,
6812                                            parent, tback->root, 0, 0);
6813                 fprintf(stderr, "adding new tree backref on "
6814                         "start %llu len %llu parent %llu root %llu\n",
6815                         rec->start, rec->max_size, parent, tback->root);
6816         }
6817 fail:
6818         btrfs_release_path(path);
6819         return ret;
6820 }
6821
6822 static struct extent_entry *find_entry(struct list_head *entries,
6823                                        u64 bytenr, u64 bytes)
6824 {
6825         struct extent_entry *entry = NULL;
6826
6827         list_for_each_entry(entry, entries, list) {
6828                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6829                         return entry;
6830         }
6831
6832         return NULL;
6833 }
6834
6835 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6836 {
6837         struct extent_entry *entry, *best = NULL, *prev = NULL;
6838
6839         list_for_each_entry(entry, entries, list) {
6840                 if (!prev) {
6841                         prev = entry;
6842                         continue;
6843                 }
6844
6845                 /*
6846                  * If there are as many broken entries as entries then we know
6847                  * not to trust this particular entry.
6848                  */
6849                 if (entry->broken == entry->count)
6850                         continue;
6851
6852                 /*
6853                  * If our current entry == best then we can't be sure our best
6854                  * is really the best, so we need to keep searching.
6855                  */
6856                 if (best && best->count == entry->count) {
6857                         prev = entry;
6858                         best = NULL;
6859                         continue;
6860                 }
6861
6862                 /* Prev == entry, not good enough, have to keep searching */
6863                 if (!prev->broken && prev->count == entry->count)
6864                         continue;
6865
6866                 if (!best)
6867                         best = (prev->count > entry->count) ? prev : entry;
6868                 else if (best->count < entry->count)
6869                         best = entry;
6870                 prev = entry;
6871         }
6872
6873         return best;
6874 }
6875
6876 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6877                       struct data_backref *dback, struct extent_entry *entry)
6878 {
6879         struct btrfs_trans_handle *trans;
6880         struct btrfs_root *root;
6881         struct btrfs_file_extent_item *fi;
6882         struct extent_buffer *leaf;
6883         struct btrfs_key key;
6884         u64 bytenr, bytes;
6885         int ret, err;
6886
6887         key.objectid = dback->root;
6888         key.type = BTRFS_ROOT_ITEM_KEY;
6889         key.offset = (u64)-1;
6890         root = btrfs_read_fs_root(info, &key);
6891         if (IS_ERR(root)) {
6892                 fprintf(stderr, "Couldn't find root for our ref\n");
6893                 return -EINVAL;
6894         }
6895
6896         /*
6897          * The backref points to the original offset of the extent if it was
6898          * split, so we need to search down to the offset we have and then walk
6899          * forward until we find the backref we're looking for.
6900          */
6901         key.objectid = dback->owner;
6902         key.type = BTRFS_EXTENT_DATA_KEY;
6903         key.offset = dback->offset;
6904         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6905         if (ret < 0) {
6906                 fprintf(stderr, "Error looking up ref %d\n", ret);
6907                 return ret;
6908         }
6909
6910         while (1) {
6911                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6912                         ret = btrfs_next_leaf(root, path);
6913                         if (ret) {
6914                                 fprintf(stderr, "Couldn't find our ref, next\n");
6915                                 return -EINVAL;
6916                         }
6917                 }
6918                 leaf = path->nodes[0];
6919                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6920                 if (key.objectid != dback->owner ||
6921                     key.type != BTRFS_EXTENT_DATA_KEY) {
6922                         fprintf(stderr, "Couldn't find our ref, search\n");
6923                         return -EINVAL;
6924                 }
6925                 fi = btrfs_item_ptr(leaf, path->slots[0],
6926                                     struct btrfs_file_extent_item);
6927                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6928                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6929
6930                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6931                         break;
6932                 path->slots[0]++;
6933         }
6934
6935         btrfs_release_path(path);
6936
6937         trans = btrfs_start_transaction(root, 1);
6938         if (IS_ERR(trans))
6939                 return PTR_ERR(trans);
6940
6941         /*
6942          * Ok we have the key of the file extent we want to fix, now we can cow
6943          * down to the thing and fix it.
6944          */
6945         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6946         if (ret < 0) {
6947                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6948                         key.objectid, key.type, key.offset, ret);
6949                 goto out;
6950         }
6951         if (ret > 0) {
6952                 fprintf(stderr, "Well that's odd, we just found this key "
6953                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6954                         key.offset);
6955                 ret = -EINVAL;
6956                 goto out;
6957         }
6958         leaf = path->nodes[0];
6959         fi = btrfs_item_ptr(leaf, path->slots[0],
6960                             struct btrfs_file_extent_item);
6961
6962         if (btrfs_file_extent_compression(leaf, fi) &&
6963             dback->disk_bytenr != entry->bytenr) {
6964                 fprintf(stderr, "Ref doesn't match the record start and is "
6965                         "compressed, please take a btrfs-image of this file "
6966                         "system and send it to a btrfs developer so they can "
6967                         "complete this functionality for bytenr %Lu\n",
6968                         dback->disk_bytenr);
6969                 ret = -EINVAL;
6970                 goto out;
6971         }
6972
6973         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6974                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6975         } else if (dback->disk_bytenr > entry->bytenr) {
6976                 u64 off_diff, offset;
6977
6978                 off_diff = dback->disk_bytenr - entry->bytenr;
6979                 offset = btrfs_file_extent_offset(leaf, fi);
6980                 if (dback->disk_bytenr + offset +
6981                     btrfs_file_extent_num_bytes(leaf, fi) >
6982                     entry->bytenr + entry->bytes) {
6983                         fprintf(stderr, "Ref is past the entry end, please "
6984                                 "take a btrfs-image of this file system and "
6985                                 "send it to a btrfs developer, ref %Lu\n",
6986                                 dback->disk_bytenr);
6987                         ret = -EINVAL;
6988                         goto out;
6989                 }
6990                 offset += off_diff;
6991                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6992                 btrfs_set_file_extent_offset(leaf, fi, offset);
6993         } else if (dback->disk_bytenr < entry->bytenr) {
6994                 u64 offset;
6995
6996                 offset = btrfs_file_extent_offset(leaf, fi);
6997                 if (dback->disk_bytenr + offset < entry->bytenr) {
6998                         fprintf(stderr, "Ref is before the entry start, please"
6999                                 " take a btrfs-image of this file system and "
7000                                 "send it to a btrfs developer, ref %Lu\n",
7001                                 dback->disk_bytenr);
7002                         ret = -EINVAL;
7003                         goto out;
7004                 }
7005
7006                 offset += dback->disk_bytenr;
7007                 offset -= entry->bytenr;
7008                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7009                 btrfs_set_file_extent_offset(leaf, fi, offset);
7010         }
7011
7012         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7013
7014         /*
7015          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7016          * only do this if we aren't using compression, otherwise it's a
7017          * trickier case.
7018          */
7019         if (!btrfs_file_extent_compression(leaf, fi))
7020                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7021         else
7022                 printf("ram bytes may be wrong?\n");
7023         btrfs_mark_buffer_dirty(leaf);
7024 out:
7025         err = btrfs_commit_transaction(trans, root);
7026         btrfs_release_path(path);
7027         return ret ? ret : err;
7028 }
7029
7030 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7031                            struct extent_record *rec)
7032 {
7033         struct extent_backref *back;
7034         struct data_backref *dback;
7035         struct extent_entry *entry, *best = NULL;
7036         LIST_HEAD(entries);
7037         int nr_entries = 0;
7038         int broken_entries = 0;
7039         int ret = 0;
7040         short mismatch = 0;
7041
7042         /*
7043          * Metadata is easy and the backrefs should always agree on bytenr and
7044          * size, if not we've got bigger issues.
7045          */
7046         if (rec->metadata)
7047                 return 0;
7048
7049         list_for_each_entry(back, &rec->backrefs, list) {
7050                 if (back->full_backref || !back->is_data)
7051                         continue;
7052
7053                 dback = to_data_backref(back);
7054
7055                 /*
7056                  * We only pay attention to backrefs that we found a real
7057                  * backref for.
7058                  */
7059                 if (dback->found_ref == 0)
7060                         continue;
7061
7062                 /*
7063                  * For now we only catch when the bytes don't match, not the
7064                  * bytenr.  We can easily do this at the same time, but I want
7065                  * to have a fs image to test on before we just add repair
7066                  * functionality willy-nilly so we know we won't screw up the
7067                  * repair.
7068                  */
7069
7070                 entry = find_entry(&entries, dback->disk_bytenr,
7071                                    dback->bytes);
7072                 if (!entry) {
7073                         entry = malloc(sizeof(struct extent_entry));
7074                         if (!entry) {
7075                                 ret = -ENOMEM;
7076                                 goto out;
7077                         }
7078                         memset(entry, 0, sizeof(*entry));
7079                         entry->bytenr = dback->disk_bytenr;
7080                         entry->bytes = dback->bytes;
7081                         list_add_tail(&entry->list, &entries);
7082                         nr_entries++;
7083                 }
7084
7085                 /*
7086                  * If we only have on entry we may think the entries agree when
7087                  * in reality they don't so we have to do some extra checking.
7088                  */
7089                 if (dback->disk_bytenr != rec->start ||
7090                     dback->bytes != rec->nr || back->broken)
7091                         mismatch = 1;
7092
7093                 if (back->broken) {
7094                         entry->broken++;
7095                         broken_entries++;
7096                 }
7097
7098                 entry->count++;
7099         }
7100
7101         /* Yay all the backrefs agree, carry on good sir */
7102         if (nr_entries <= 1 && !mismatch)
7103                 goto out;
7104
7105         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7106                 "%Lu\n", rec->start);
7107
7108         /*
7109          * First we want to see if the backrefs can agree amongst themselves who
7110          * is right, so figure out which one of the entries has the highest
7111          * count.
7112          */
7113         best = find_most_right_entry(&entries);
7114
7115         /*
7116          * Ok so we may have an even split between what the backrefs think, so
7117          * this is where we use the extent ref to see what it thinks.
7118          */
7119         if (!best) {
7120                 entry = find_entry(&entries, rec->start, rec->nr);
7121                 if (!entry && (!broken_entries || !rec->found_rec)) {
7122                         fprintf(stderr, "Backrefs don't agree with each other "
7123                                 "and extent record doesn't agree with anybody,"
7124                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7125                                 rec->start, rec->nr);
7126                         ret = -EINVAL;
7127                         goto out;
7128                 } else if (!entry) {
7129                         /*
7130                          * Ok our backrefs were broken, we'll assume this is the
7131                          * correct value and add an entry for this range.
7132                          */
7133                         entry = malloc(sizeof(struct extent_entry));
7134                         if (!entry) {
7135                                 ret = -ENOMEM;
7136                                 goto out;
7137                         }
7138                         memset(entry, 0, sizeof(*entry));
7139                         entry->bytenr = rec->start;
7140                         entry->bytes = rec->nr;
7141                         list_add_tail(&entry->list, &entries);
7142                         nr_entries++;
7143                 }
7144                 entry->count++;
7145                 best = find_most_right_entry(&entries);
7146                 if (!best) {
7147                         fprintf(stderr, "Backrefs and extent record evenly "
7148                                 "split on who is right, this is going to "
7149                                 "require user input to fix bytenr %Lu bytes "
7150                                 "%Lu\n", rec->start, rec->nr);
7151                         ret = -EINVAL;
7152                         goto out;
7153                 }
7154         }
7155
7156         /*
7157          * I don't think this can happen currently as we'll abort() if we catch
7158          * this case higher up, but in case somebody removes that we still can't
7159          * deal with it properly here yet, so just bail out of that's the case.
7160          */
7161         if (best->bytenr != rec->start) {
7162                 fprintf(stderr, "Extent start and backref starts don't match, "
7163                         "please use btrfs-image on this file system and send "
7164                         "it to a btrfs developer so they can make fsck fix "
7165                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7166                         rec->start, rec->nr);
7167                 ret = -EINVAL;
7168                 goto out;
7169         }
7170
7171         /*
7172          * Ok great we all agreed on an extent record, let's go find the real
7173          * references and fix up the ones that don't match.
7174          */
7175         list_for_each_entry(back, &rec->backrefs, list) {
7176                 if (back->full_backref || !back->is_data)
7177                         continue;
7178
7179                 dback = to_data_backref(back);
7180
7181                 /*
7182                  * Still ignoring backrefs that don't have a real ref attached
7183                  * to them.
7184                  */
7185                 if (dback->found_ref == 0)
7186                         continue;
7187
7188                 if (dback->bytes == best->bytes &&
7189                     dback->disk_bytenr == best->bytenr)
7190                         continue;
7191
7192                 ret = repair_ref(info, path, dback, best);
7193                 if (ret)
7194                         goto out;
7195         }
7196
7197         /*
7198          * Ok we messed with the actual refs, which means we need to drop our
7199          * entire cache and go back and rescan.  I know this is a huge pain and
7200          * adds a lot of extra work, but it's the only way to be safe.  Once all
7201          * the backrefs agree we may not need to do anything to the extent
7202          * record itself.
7203          */
7204         ret = -EAGAIN;
7205 out:
7206         while (!list_empty(&entries)) {
7207                 entry = list_entry(entries.next, struct extent_entry, list);
7208                 list_del_init(&entry->list);
7209                 free(entry);
7210         }
7211         return ret;
7212 }
7213
7214 static int process_duplicates(struct btrfs_root *root,
7215                               struct cache_tree *extent_cache,
7216                               struct extent_record *rec)
7217 {
7218         struct extent_record *good, *tmp;
7219         struct cache_extent *cache;
7220         int ret;
7221
7222         /*
7223          * If we found a extent record for this extent then return, or if we
7224          * have more than one duplicate we are likely going to need to delete
7225          * something.
7226          */
7227         if (rec->found_rec || rec->num_duplicates > 1)
7228                 return 0;
7229
7230         /* Shouldn't happen but just in case */
7231         BUG_ON(!rec->num_duplicates);
7232
7233         /*
7234          * So this happens if we end up with a backref that doesn't match the
7235          * actual extent entry.  So either the backref is bad or the extent
7236          * entry is bad.  Either way we want to have the extent_record actually
7237          * reflect what we found in the extent_tree, so we need to take the
7238          * duplicate out and use that as the extent_record since the only way we
7239          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7240          */
7241         remove_cache_extent(extent_cache, &rec->cache);
7242
7243         good = to_extent_record(rec->dups.next);
7244         list_del_init(&good->list);
7245         INIT_LIST_HEAD(&good->backrefs);
7246         INIT_LIST_HEAD(&good->dups);
7247         good->cache.start = good->start;
7248         good->cache.size = good->nr;
7249         good->content_checked = 0;
7250         good->owner_ref_checked = 0;
7251         good->num_duplicates = 0;
7252         good->refs = rec->refs;
7253         list_splice_init(&rec->backrefs, &good->backrefs);
7254         while (1) {
7255                 cache = lookup_cache_extent(extent_cache, good->start,
7256                                             good->nr);
7257                 if (!cache)
7258                         break;
7259                 tmp = container_of(cache, struct extent_record, cache);
7260
7261                 /*
7262                  * If we find another overlapping extent and it's found_rec is
7263                  * set then it's a duplicate and we need to try and delete
7264                  * something.
7265                  */
7266                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7267                         if (list_empty(&good->list))
7268                                 list_add_tail(&good->list,
7269                                               &duplicate_extents);
7270                         good->num_duplicates += tmp->num_duplicates + 1;
7271                         list_splice_init(&tmp->dups, &good->dups);
7272                         list_del_init(&tmp->list);
7273                         list_add_tail(&tmp->list, &good->dups);
7274                         remove_cache_extent(extent_cache, &tmp->cache);
7275                         continue;
7276                 }
7277
7278                 /*
7279                  * Ok we have another non extent item backed extent rec, so lets
7280                  * just add it to this extent and carry on like we did above.
7281                  */
7282                 good->refs += tmp->refs;
7283                 list_splice_init(&tmp->backrefs, &good->backrefs);
7284                 remove_cache_extent(extent_cache, &tmp->cache);
7285                 free(tmp);
7286         }
7287         ret = insert_cache_extent(extent_cache, &good->cache);
7288         BUG_ON(ret);
7289         free(rec);
7290         return good->num_duplicates ? 0 : 1;
7291 }
7292
7293 static int delete_duplicate_records(struct btrfs_root *root,
7294                                     struct extent_record *rec)
7295 {
7296         struct btrfs_trans_handle *trans;
7297         LIST_HEAD(delete_list);
7298         struct btrfs_path *path;
7299         struct extent_record *tmp, *good, *n;
7300         int nr_del = 0;
7301         int ret = 0, err;
7302         struct btrfs_key key;
7303
7304         path = btrfs_alloc_path();
7305         if (!path) {
7306                 ret = -ENOMEM;
7307                 goto out;
7308         }
7309
7310         good = rec;
7311         /* Find the record that covers all of the duplicates. */
7312         list_for_each_entry(tmp, &rec->dups, list) {
7313                 if (good->start < tmp->start)
7314                         continue;
7315                 if (good->nr > tmp->nr)
7316                         continue;
7317
7318                 if (tmp->start + tmp->nr < good->start + good->nr) {
7319                         fprintf(stderr, "Ok we have overlapping extents that "
7320                                 "aren't completely covered by each other, this "
7321                                 "is going to require more careful thought.  "
7322                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7323                                 tmp->start, tmp->nr, good->start, good->nr);
7324                         abort();
7325                 }
7326                 good = tmp;
7327         }
7328
7329         if (good != rec)
7330                 list_add_tail(&rec->list, &delete_list);
7331
7332         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7333                 if (tmp == good)
7334                         continue;
7335                 list_move_tail(&tmp->list, &delete_list);
7336         }
7337
7338         root = root->fs_info->extent_root;
7339         trans = btrfs_start_transaction(root, 1);
7340         if (IS_ERR(trans)) {
7341                 ret = PTR_ERR(trans);
7342                 goto out;
7343         }
7344
7345         list_for_each_entry(tmp, &delete_list, list) {
7346                 if (tmp->found_rec == 0)
7347                         continue;
7348                 key.objectid = tmp->start;
7349                 key.type = BTRFS_EXTENT_ITEM_KEY;
7350                 key.offset = tmp->nr;
7351
7352                 /* Shouldn't happen but just in case */
7353                 if (tmp->metadata) {
7354                         fprintf(stderr, "Well this shouldn't happen, extent "
7355                                 "record overlaps but is metadata? "
7356                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7357                         abort();
7358                 }
7359
7360                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7361                 if (ret) {
7362                         if (ret > 0)
7363                                 ret = -EINVAL;
7364                         break;
7365                 }
7366                 ret = btrfs_del_item(trans, root, path);
7367                 if (ret)
7368                         break;
7369                 btrfs_release_path(path);
7370                 nr_del++;
7371         }
7372         err = btrfs_commit_transaction(trans, root);
7373         if (err && !ret)
7374                 ret = err;
7375 out:
7376         while (!list_empty(&delete_list)) {
7377                 tmp = to_extent_record(delete_list.next);
7378                 list_del_init(&tmp->list);
7379                 if (tmp == rec)
7380                         continue;
7381                 free(tmp);
7382         }
7383
7384         while (!list_empty(&rec->dups)) {
7385                 tmp = to_extent_record(rec->dups.next);
7386                 list_del_init(&tmp->list);
7387                 free(tmp);
7388         }
7389
7390         btrfs_free_path(path);
7391
7392         if (!ret && !nr_del)
7393                 rec->num_duplicates = 0;
7394
7395         return ret ? ret : nr_del;
7396 }
7397
7398 static int find_possible_backrefs(struct btrfs_fs_info *info,
7399                                   struct btrfs_path *path,
7400                                   struct cache_tree *extent_cache,
7401                                   struct extent_record *rec)
7402 {
7403         struct btrfs_root *root;
7404         struct extent_backref *back;
7405         struct data_backref *dback;
7406         struct cache_extent *cache;
7407         struct btrfs_file_extent_item *fi;
7408         struct btrfs_key key;
7409         u64 bytenr, bytes;
7410         int ret;
7411
7412         list_for_each_entry(back, &rec->backrefs, list) {
7413                 /* Don't care about full backrefs (poor unloved backrefs) */
7414                 if (back->full_backref || !back->is_data)
7415                         continue;
7416
7417                 dback = to_data_backref(back);
7418
7419                 /* We found this one, we don't need to do a lookup */
7420                 if (dback->found_ref)
7421                         continue;
7422
7423                 key.objectid = dback->root;
7424                 key.type = BTRFS_ROOT_ITEM_KEY;
7425                 key.offset = (u64)-1;
7426
7427                 root = btrfs_read_fs_root(info, &key);
7428
7429                 /* No root, definitely a bad ref, skip */
7430                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7431                         continue;
7432                 /* Other err, exit */
7433                 if (IS_ERR(root))
7434                         return PTR_ERR(root);
7435
7436                 key.objectid = dback->owner;
7437                 key.type = BTRFS_EXTENT_DATA_KEY;
7438                 key.offset = dback->offset;
7439                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7440                 if (ret) {
7441                         btrfs_release_path(path);
7442                         if (ret < 0)
7443                                 return ret;
7444                         /* Didn't find it, we can carry on */
7445                         ret = 0;
7446                         continue;
7447                 }
7448
7449                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7450                                     struct btrfs_file_extent_item);
7451                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7452                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7453                 btrfs_release_path(path);
7454                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7455                 if (cache) {
7456                         struct extent_record *tmp;
7457                         tmp = container_of(cache, struct extent_record, cache);
7458
7459                         /*
7460                          * If we found an extent record for the bytenr for this
7461                          * particular backref then we can't add it to our
7462                          * current extent record.  We only want to add backrefs
7463                          * that don't have a corresponding extent item in the
7464                          * extent tree since they likely belong to this record
7465                          * and we need to fix it if it doesn't match bytenrs.
7466                          */
7467                         if  (tmp->found_rec)
7468                                 continue;
7469                 }
7470
7471                 dback->found_ref += 1;
7472                 dback->disk_bytenr = bytenr;
7473                 dback->bytes = bytes;
7474
7475                 /*
7476                  * Set this so the verify backref code knows not to trust the
7477                  * values in this backref.
7478                  */
7479                 back->broken = 1;
7480         }
7481
7482         return 0;
7483 }
7484
7485 /*
7486  * Record orphan data ref into corresponding root.
7487  *
7488  * Return 0 if the extent item contains data ref and recorded.
7489  * Return 1 if the extent item contains no useful data ref
7490  *   On that case, it may contains only shared_dataref or metadata backref
7491  *   or the file extent exists(this should be handled by the extent bytenr
7492  *   recovery routine)
7493  * Return <0 if something goes wrong.
7494  */
7495 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7496                                       struct extent_record *rec)
7497 {
7498         struct btrfs_key key;
7499         struct btrfs_root *dest_root;
7500         struct extent_backref *back;
7501         struct data_backref *dback;
7502         struct orphan_data_extent *orphan;
7503         struct btrfs_path *path;
7504         int recorded_data_ref = 0;
7505         int ret = 0;
7506
7507         if (rec->metadata)
7508                 return 1;
7509         path = btrfs_alloc_path();
7510         if (!path)
7511                 return -ENOMEM;
7512         list_for_each_entry(back, &rec->backrefs, list) {
7513                 if (back->full_backref || !back->is_data ||
7514                     !back->found_extent_tree)
7515                         continue;
7516                 dback = to_data_backref(back);
7517                 if (dback->found_ref)
7518                         continue;
7519                 key.objectid = dback->root;
7520                 key.type = BTRFS_ROOT_ITEM_KEY;
7521                 key.offset = (u64)-1;
7522
7523                 dest_root = btrfs_read_fs_root(fs_info, &key);
7524
7525                 /* For non-exist root we just skip it */
7526                 if (IS_ERR(dest_root) || !dest_root)
7527                         continue;
7528
7529                 key.objectid = dback->owner;
7530                 key.type = BTRFS_EXTENT_DATA_KEY;
7531                 key.offset = dback->offset;
7532
7533                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7534                 btrfs_release_path(path);
7535                 /*
7536                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7537                  * we need to record it for inode/file extent rebuild.
7538                  * For ret > 0, we record it only for file extent rebuild.
7539                  * For ret == 0, the file extent exists but only bytenr
7540                  * mismatch, let the original bytenr fix routine to handle,
7541                  * don't record it.
7542                  */
7543                 if (ret == 0)
7544                         continue;
7545                 ret = 0;
7546                 orphan = malloc(sizeof(*orphan));
7547                 if (!orphan) {
7548                         ret = -ENOMEM;
7549                         goto out;
7550                 }
7551                 INIT_LIST_HEAD(&orphan->list);
7552                 orphan->root = dback->root;
7553                 orphan->objectid = dback->owner;
7554                 orphan->offset = dback->offset;
7555                 orphan->disk_bytenr = rec->cache.start;
7556                 orphan->disk_len = rec->cache.size;
7557                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7558                 recorded_data_ref = 1;
7559         }
7560 out:
7561         btrfs_free_path(path);
7562         if (!ret)
7563                 return !recorded_data_ref;
7564         else
7565                 return ret;
7566 }
7567
7568 /*
7569  * when an incorrect extent item is found, this will delete
7570  * all of the existing entries for it and recreate them
7571  * based on what the tree scan found.
7572  */
7573 static int fixup_extent_refs(struct btrfs_fs_info *info,
7574                              struct cache_tree *extent_cache,
7575                              struct extent_record *rec)
7576 {
7577         struct btrfs_trans_handle *trans = NULL;
7578         int ret;
7579         struct btrfs_path *path;
7580         struct list_head *cur = rec->backrefs.next;
7581         struct cache_extent *cache;
7582         struct extent_backref *back;
7583         int allocated = 0;
7584         u64 flags = 0;
7585
7586         if (rec->flag_block_full_backref)
7587                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7588
7589         path = btrfs_alloc_path();
7590         if (!path)
7591                 return -ENOMEM;
7592
7593         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7594                 /*
7595                  * Sometimes the backrefs themselves are so broken they don't
7596                  * get attached to any meaningful rec, so first go back and
7597                  * check any of our backrefs that we couldn't find and throw
7598                  * them into the list if we find the backref so that
7599                  * verify_backrefs can figure out what to do.
7600                  */
7601                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7602                 if (ret < 0)
7603                         goto out;
7604         }
7605
7606         /* step one, make sure all of the backrefs agree */
7607         ret = verify_backrefs(info, path, rec);
7608         if (ret < 0)
7609                 goto out;
7610
7611         trans = btrfs_start_transaction(info->extent_root, 1);
7612         if (IS_ERR(trans)) {
7613                 ret = PTR_ERR(trans);
7614                 goto out;
7615         }
7616
7617         /* step two, delete all the existing records */
7618         ret = delete_extent_records(trans, info->extent_root, path,
7619                                     rec->start, rec->max_size);
7620
7621         if (ret < 0)
7622                 goto out;
7623
7624         /* was this block corrupt?  If so, don't add references to it */
7625         cache = lookup_cache_extent(info->corrupt_blocks,
7626                                     rec->start, rec->max_size);
7627         if (cache) {
7628                 ret = 0;
7629                 goto out;
7630         }
7631
7632         /* step three, recreate all the refs we did find */
7633         while(cur != &rec->backrefs) {
7634                 back = to_extent_backref(cur);
7635                 cur = cur->next;
7636
7637                 /*
7638                  * if we didn't find any references, don't create a
7639                  * new extent record
7640                  */
7641                 if (!back->found_ref)
7642                         continue;
7643
7644                 rec->bad_full_backref = 0;
7645                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7646                 allocated = 1;
7647
7648                 if (ret)
7649                         goto out;
7650         }
7651 out:
7652         if (trans) {
7653                 int err = btrfs_commit_transaction(trans, info->extent_root);
7654                 if (!ret)
7655                         ret = err;
7656         }
7657
7658         btrfs_free_path(path);
7659         return ret;
7660 }
7661
7662 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7663                               struct extent_record *rec)
7664 {
7665         struct btrfs_trans_handle *trans;
7666         struct btrfs_root *root = fs_info->extent_root;
7667         struct btrfs_path *path;
7668         struct btrfs_extent_item *ei;
7669         struct btrfs_key key;
7670         u64 flags;
7671         int ret = 0;
7672
7673         key.objectid = rec->start;
7674         if (rec->metadata) {
7675                 key.type = BTRFS_METADATA_ITEM_KEY;
7676                 key.offset = rec->info_level;
7677         } else {
7678                 key.type = BTRFS_EXTENT_ITEM_KEY;
7679                 key.offset = rec->max_size;
7680         }
7681
7682         path = btrfs_alloc_path();
7683         if (!path)
7684                 return -ENOMEM;
7685
7686         trans = btrfs_start_transaction(root, 0);
7687         if (IS_ERR(trans)) {
7688                 btrfs_free_path(path);
7689                 return PTR_ERR(trans);
7690         }
7691
7692         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7693         if (ret < 0) {
7694                 btrfs_free_path(path);
7695                 btrfs_commit_transaction(trans, root);
7696                 return ret;
7697         } else if (ret) {
7698                 fprintf(stderr, "Didn't find extent for %llu\n",
7699                         (unsigned long long)rec->start);
7700                 btrfs_free_path(path);
7701                 btrfs_commit_transaction(trans, root);
7702                 return -ENOENT;
7703         }
7704
7705         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7706                             struct btrfs_extent_item);
7707         flags = btrfs_extent_flags(path->nodes[0], ei);
7708         if (rec->flag_block_full_backref) {
7709                 fprintf(stderr, "setting full backref on %llu\n",
7710                         (unsigned long long)key.objectid);
7711                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7712         } else {
7713                 fprintf(stderr, "clearing full backref on %llu\n",
7714                         (unsigned long long)key.objectid);
7715                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7716         }
7717         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7718         btrfs_mark_buffer_dirty(path->nodes[0]);
7719         btrfs_free_path(path);
7720         return btrfs_commit_transaction(trans, root);
7721 }
7722
7723 /* right now we only prune from the extent allocation tree */
7724 static int prune_one_block(struct btrfs_trans_handle *trans,
7725                            struct btrfs_fs_info *info,
7726                            struct btrfs_corrupt_block *corrupt)
7727 {
7728         int ret;
7729         struct btrfs_path path;
7730         struct extent_buffer *eb;
7731         u64 found;
7732         int slot;
7733         int nritems;
7734         int level = corrupt->level + 1;
7735
7736         btrfs_init_path(&path);
7737 again:
7738         /* we want to stop at the parent to our busted block */
7739         path.lowest_level = level;
7740
7741         ret = btrfs_search_slot(trans, info->extent_root,
7742                                 &corrupt->key, &path, -1, 1);
7743
7744         if (ret < 0)
7745                 goto out;
7746
7747         eb = path.nodes[level];
7748         if (!eb) {
7749                 ret = -ENOENT;
7750                 goto out;
7751         }
7752
7753         /*
7754          * hopefully the search gave us the block we want to prune,
7755          * lets try that first
7756          */
7757         slot = path.slots[level];
7758         found =  btrfs_node_blockptr(eb, slot);
7759         if (found == corrupt->cache.start)
7760                 goto del_ptr;
7761
7762         nritems = btrfs_header_nritems(eb);
7763
7764         /* the search failed, lets scan this node and hope we find it */
7765         for (slot = 0; slot < nritems; slot++) {
7766                 found =  btrfs_node_blockptr(eb, slot);
7767                 if (found == corrupt->cache.start)
7768                         goto del_ptr;
7769         }
7770         /*
7771          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7772          * to this block
7773          */
7774         if (eb == info->extent_root->node) {
7775                 ret = -ENOENT;
7776                 goto out;
7777         } else {
7778                 level++;
7779                 btrfs_release_path(&path);
7780                 goto again;
7781         }
7782
7783 del_ptr:
7784         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7785         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7786
7787 out:
7788         btrfs_release_path(&path);
7789         return ret;
7790 }
7791
7792 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7793 {
7794         struct btrfs_trans_handle *trans = NULL;
7795         struct cache_extent *cache;
7796         struct btrfs_corrupt_block *corrupt;
7797
7798         while (1) {
7799                 cache = search_cache_extent(info->corrupt_blocks, 0);
7800                 if (!cache)
7801                         break;
7802                 if (!trans) {
7803                         trans = btrfs_start_transaction(info->extent_root, 1);
7804                         if (IS_ERR(trans))
7805                                 return PTR_ERR(trans);
7806                 }
7807                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7808                 prune_one_block(trans, info, corrupt);
7809                 remove_cache_extent(info->corrupt_blocks, cache);
7810         }
7811         if (trans)
7812                 return btrfs_commit_transaction(trans, info->extent_root);
7813         return 0;
7814 }
7815
7816 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7817 {
7818         struct btrfs_block_group_cache *cache;
7819         u64 start, end;
7820         int ret;
7821
7822         while (1) {
7823                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7824                                             &start, &end, EXTENT_DIRTY);
7825                 if (ret)
7826                         break;
7827                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7828                                    GFP_NOFS);
7829         }
7830
7831         start = 0;
7832         while (1) {
7833                 cache = btrfs_lookup_first_block_group(fs_info, start);
7834                 if (!cache)
7835                         break;
7836                 if (cache->cached)
7837                         cache->cached = 0;
7838                 start = cache->key.objectid + cache->key.offset;
7839         }
7840 }
7841
7842 static int check_extent_refs(struct btrfs_root *root,
7843                              struct cache_tree *extent_cache)
7844 {
7845         struct extent_record *rec;
7846         struct cache_extent *cache;
7847         int err = 0;
7848         int ret = 0;
7849         int fixed = 0;
7850         int had_dups = 0;
7851         int recorded = 0;
7852
7853         if (repair) {
7854                 /*
7855                  * if we're doing a repair, we have to make sure
7856                  * we don't allocate from the problem extents.
7857                  * In the worst case, this will be all the
7858                  * extents in the FS
7859                  */
7860                 cache = search_cache_extent(extent_cache, 0);
7861                 while(cache) {
7862                         rec = container_of(cache, struct extent_record, cache);
7863                         set_extent_dirty(root->fs_info->excluded_extents,
7864                                          rec->start,
7865                                          rec->start + rec->max_size - 1,
7866                                          GFP_NOFS);
7867                         cache = next_cache_extent(cache);
7868                 }
7869
7870                 /* pin down all the corrupted blocks too */
7871                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7872                 while(cache) {
7873                         set_extent_dirty(root->fs_info->excluded_extents,
7874                                          cache->start,
7875                                          cache->start + cache->size - 1,
7876                                          GFP_NOFS);
7877                         cache = next_cache_extent(cache);
7878                 }
7879                 prune_corrupt_blocks(root->fs_info);
7880                 reset_cached_block_groups(root->fs_info);
7881         }
7882
7883         reset_cached_block_groups(root->fs_info);
7884
7885         /*
7886          * We need to delete any duplicate entries we find first otherwise we
7887          * could mess up the extent tree when we have backrefs that actually
7888          * belong to a different extent item and not the weird duplicate one.
7889          */
7890         while (repair && !list_empty(&duplicate_extents)) {
7891                 rec = to_extent_record(duplicate_extents.next);
7892                 list_del_init(&rec->list);
7893
7894                 /* Sometimes we can find a backref before we find an actual
7895                  * extent, so we need to process it a little bit to see if there
7896                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7897                  * if this is a backref screwup.  If we need to delete stuff
7898                  * process_duplicates() will return 0, otherwise it will return
7899                  * 1 and we
7900                  */
7901                 if (process_duplicates(root, extent_cache, rec))
7902                         continue;
7903                 ret = delete_duplicate_records(root, rec);
7904                 if (ret < 0)
7905                         return ret;
7906                 /*
7907                  * delete_duplicate_records will return the number of entries
7908                  * deleted, so if it's greater than 0 then we know we actually
7909                  * did something and we need to remove.
7910                  */
7911                 if (ret)
7912                         had_dups = 1;
7913         }
7914
7915         if (had_dups)
7916                 return -EAGAIN;
7917
7918         while(1) {
7919                 int cur_err = 0;
7920
7921                 fixed = 0;
7922                 recorded = 0;
7923                 cache = search_cache_extent(extent_cache, 0);
7924                 if (!cache)
7925                         break;
7926                 rec = container_of(cache, struct extent_record, cache);
7927                 if (rec->num_duplicates) {
7928                         fprintf(stderr, "extent item %llu has multiple extent "
7929                                 "items\n", (unsigned long long)rec->start);
7930                         err = 1;
7931                         cur_err = 1;
7932                 }
7933
7934                 if (rec->refs != rec->extent_item_refs) {
7935                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7936                                 (unsigned long long)rec->start,
7937                                 (unsigned long long)rec->nr);
7938                         fprintf(stderr, "extent item %llu, found %llu\n",
7939                                 (unsigned long long)rec->extent_item_refs,
7940                                 (unsigned long long)rec->refs);
7941                         ret = record_orphan_data_extents(root->fs_info, rec);
7942                         if (ret < 0)
7943                                 goto repair_abort;
7944                         if (ret == 0) {
7945                                 recorded = 1;
7946                         } else {
7947                                 /*
7948                                  * we can't use the extent to repair file
7949                                  * extent, let the fallback method handle it.
7950                                  */
7951                                 if (!fixed && repair) {
7952                                         ret = fixup_extent_refs(
7953                                                         root->fs_info,
7954                                                         extent_cache, rec);
7955                                         if (ret)
7956                                                 goto repair_abort;
7957                                         fixed = 1;
7958                                 }
7959                         }
7960                         err = 1;
7961                         cur_err = 1;
7962                 }
7963                 if (all_backpointers_checked(rec, 1)) {
7964                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7965                                 (unsigned long long)rec->start,
7966                                 (unsigned long long)rec->nr);
7967
7968                         if (!fixed && !recorded && repair) {
7969                                 ret = fixup_extent_refs(root->fs_info,
7970                                                         extent_cache, rec);
7971                                 if (ret)
7972                                         goto repair_abort;
7973                                 fixed = 1;
7974                         }
7975                         cur_err = 1;
7976                         err = 1;
7977                 }
7978                 if (!rec->owner_ref_checked) {
7979                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7980                                 (unsigned long long)rec->start,
7981                                 (unsigned long long)rec->nr);
7982                         if (!fixed && !recorded && repair) {
7983                                 ret = fixup_extent_refs(root->fs_info,
7984                                                         extent_cache, rec);
7985                                 if (ret)
7986                                         goto repair_abort;
7987                                 fixed = 1;
7988                         }
7989                         err = 1;
7990                         cur_err = 1;
7991                 }
7992                 if (rec->bad_full_backref) {
7993                         fprintf(stderr, "bad full backref, on [%llu]\n",
7994                                 (unsigned long long)rec->start);
7995                         if (repair) {
7996                                 ret = fixup_extent_flags(root->fs_info, rec);
7997                                 if (ret)
7998                                         goto repair_abort;
7999                                 fixed = 1;
8000                         }
8001                         err = 1;
8002                         cur_err = 1;
8003                 }
8004                 /*
8005                  * Although it's not a extent ref's problem, we reuse this
8006                  * routine for error reporting.
8007                  * No repair function yet.
8008                  */
8009                 if (rec->crossing_stripes) {
8010                         fprintf(stderr,
8011                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8012                                 rec->start, rec->start + rec->max_size);
8013                         err = 1;
8014                         cur_err = 1;
8015                 }
8016
8017                 if (rec->wrong_chunk_type) {
8018                         fprintf(stderr,
8019                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8020                                 rec->start, rec->start + rec->max_size);
8021                         err = 1;
8022                         cur_err = 1;
8023                 }
8024
8025                 remove_cache_extent(extent_cache, cache);
8026                 free_all_extent_backrefs(rec);
8027                 if (!init_extent_tree && repair && (!cur_err || fixed))
8028                         clear_extent_dirty(root->fs_info->excluded_extents,
8029                                            rec->start,
8030                                            rec->start + rec->max_size - 1,
8031                                            GFP_NOFS);
8032                 free(rec);
8033         }
8034 repair_abort:
8035         if (repair) {
8036                 if (ret && ret != -EAGAIN) {
8037                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8038                         exit(1);
8039                 } else if (!ret) {
8040                         struct btrfs_trans_handle *trans;
8041
8042                         root = root->fs_info->extent_root;
8043                         trans = btrfs_start_transaction(root, 1);
8044                         if (IS_ERR(trans)) {
8045                                 ret = PTR_ERR(trans);
8046                                 goto repair_abort;
8047                         }
8048
8049                         btrfs_fix_block_accounting(trans, root);
8050                         ret = btrfs_commit_transaction(trans, root);
8051                         if (ret)
8052                                 goto repair_abort;
8053                 }
8054                 if (err)
8055                         fprintf(stderr, "repaired damaged extent references\n");
8056                 return ret;
8057         }
8058         return err;
8059 }
8060
8061 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8062 {
8063         u64 stripe_size;
8064
8065         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8066                 stripe_size = length;
8067                 stripe_size /= num_stripes;
8068         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8069                 stripe_size = length * 2;
8070                 stripe_size /= num_stripes;
8071         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8072                 stripe_size = length;
8073                 stripe_size /= (num_stripes - 1);
8074         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8075                 stripe_size = length;
8076                 stripe_size /= (num_stripes - 2);
8077         } else {
8078                 stripe_size = length;
8079         }
8080         return stripe_size;
8081 }
8082
8083 /*
8084  * Check the chunk with its block group/dev list ref:
8085  * Return 0 if all refs seems valid.
8086  * Return 1 if part of refs seems valid, need later check for rebuild ref
8087  * like missing block group and needs to search extent tree to rebuild them.
8088  * Return -1 if essential refs are missing and unable to rebuild.
8089  */
8090 static int check_chunk_refs(struct chunk_record *chunk_rec,
8091                             struct block_group_tree *block_group_cache,
8092                             struct device_extent_tree *dev_extent_cache,
8093                             int silent)
8094 {
8095         struct cache_extent *block_group_item;
8096         struct block_group_record *block_group_rec;
8097         struct cache_extent *dev_extent_item;
8098         struct device_extent_record *dev_extent_rec;
8099         u64 devid;
8100         u64 offset;
8101         u64 length;
8102         int metadump_v2 = 0;
8103         int i;
8104         int ret = 0;
8105
8106         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8107                                                chunk_rec->offset,
8108                                                chunk_rec->length);
8109         if (block_group_item) {
8110                 block_group_rec = container_of(block_group_item,
8111                                                struct block_group_record,
8112                                                cache);
8113                 if (chunk_rec->length != block_group_rec->offset ||
8114                     chunk_rec->offset != block_group_rec->objectid ||
8115                     (!metadump_v2 &&
8116                      chunk_rec->type_flags != block_group_rec->flags)) {
8117                         if (!silent)
8118                                 fprintf(stderr,
8119                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8120                                         chunk_rec->objectid,
8121                                         chunk_rec->type,
8122                                         chunk_rec->offset,
8123                                         chunk_rec->length,
8124                                         chunk_rec->offset,
8125                                         chunk_rec->type_flags,
8126                                         block_group_rec->objectid,
8127                                         block_group_rec->type,
8128                                         block_group_rec->offset,
8129                                         block_group_rec->offset,
8130                                         block_group_rec->objectid,
8131                                         block_group_rec->flags);
8132                         ret = -1;
8133                 } else {
8134                         list_del_init(&block_group_rec->list);
8135                         chunk_rec->bg_rec = block_group_rec;
8136                 }
8137         } else {
8138                 if (!silent)
8139                         fprintf(stderr,
8140                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8141                                 chunk_rec->objectid,
8142                                 chunk_rec->type,
8143                                 chunk_rec->offset,
8144                                 chunk_rec->length,
8145                                 chunk_rec->offset,
8146                                 chunk_rec->type_flags);
8147                 ret = 1;
8148         }
8149
8150         if (metadump_v2)
8151                 return ret;
8152
8153         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8154                                     chunk_rec->num_stripes);
8155         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8156                 devid = chunk_rec->stripes[i].devid;
8157                 offset = chunk_rec->stripes[i].offset;
8158                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8159                                                        devid, offset, length);
8160                 if (dev_extent_item) {
8161                         dev_extent_rec = container_of(dev_extent_item,
8162                                                 struct device_extent_record,
8163                                                 cache);
8164                         if (dev_extent_rec->objectid != devid ||
8165                             dev_extent_rec->offset != offset ||
8166                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8167                             dev_extent_rec->length != length) {
8168                                 if (!silent)
8169                                         fprintf(stderr,
8170                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8171                                                 chunk_rec->objectid,
8172                                                 chunk_rec->type,
8173                                                 chunk_rec->offset,
8174                                                 chunk_rec->stripes[i].devid,
8175                                                 chunk_rec->stripes[i].offset,
8176                                                 dev_extent_rec->objectid,
8177                                                 dev_extent_rec->offset,
8178                                                 dev_extent_rec->length);
8179                                 ret = -1;
8180                         } else {
8181                                 list_move(&dev_extent_rec->chunk_list,
8182                                           &chunk_rec->dextents);
8183                         }
8184                 } else {
8185                         if (!silent)
8186                                 fprintf(stderr,
8187                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8188                                         chunk_rec->objectid,
8189                                         chunk_rec->type,
8190                                         chunk_rec->offset,
8191                                         chunk_rec->stripes[i].devid,
8192                                         chunk_rec->stripes[i].offset);
8193                         ret = -1;
8194                 }
8195         }
8196         return ret;
8197 }
8198
8199 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8200 int check_chunks(struct cache_tree *chunk_cache,
8201                  struct block_group_tree *block_group_cache,
8202                  struct device_extent_tree *dev_extent_cache,
8203                  struct list_head *good, struct list_head *bad,
8204                  struct list_head *rebuild, int silent)
8205 {
8206         struct cache_extent *chunk_item;
8207         struct chunk_record *chunk_rec;
8208         struct block_group_record *bg_rec;
8209         struct device_extent_record *dext_rec;
8210         int err;
8211         int ret = 0;
8212
8213         chunk_item = first_cache_extent(chunk_cache);
8214         while (chunk_item) {
8215                 chunk_rec = container_of(chunk_item, struct chunk_record,
8216                                          cache);
8217                 err = check_chunk_refs(chunk_rec, block_group_cache,
8218                                        dev_extent_cache, silent);
8219                 if (err < 0)
8220                         ret = err;
8221                 if (err == 0 && good)
8222                         list_add_tail(&chunk_rec->list, good);
8223                 if (err > 0 && rebuild)
8224                         list_add_tail(&chunk_rec->list, rebuild);
8225                 if (err < 0 && bad)
8226                         list_add_tail(&chunk_rec->list, bad);
8227                 chunk_item = next_cache_extent(chunk_item);
8228         }
8229
8230         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8231                 if (!silent)
8232                         fprintf(stderr,
8233                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8234                                 bg_rec->objectid,
8235                                 bg_rec->offset,
8236                                 bg_rec->flags);
8237                 if (!ret)
8238                         ret = 1;
8239         }
8240
8241         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8242                             chunk_list) {
8243                 if (!silent)
8244                         fprintf(stderr,
8245                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8246                                 dext_rec->objectid,
8247                                 dext_rec->offset,
8248                                 dext_rec->length);
8249                 if (!ret)
8250                         ret = 1;
8251         }
8252         return ret;
8253 }
8254
8255
8256 static int check_device_used(struct device_record *dev_rec,
8257                              struct device_extent_tree *dext_cache)
8258 {
8259         struct cache_extent *cache;
8260         struct device_extent_record *dev_extent_rec;
8261         u64 total_byte = 0;
8262
8263         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8264         while (cache) {
8265                 dev_extent_rec = container_of(cache,
8266                                               struct device_extent_record,
8267                                               cache);
8268                 if (dev_extent_rec->objectid != dev_rec->devid)
8269                         break;
8270
8271                 list_del_init(&dev_extent_rec->device_list);
8272                 total_byte += dev_extent_rec->length;
8273                 cache = next_cache_extent(cache);
8274         }
8275
8276         if (total_byte != dev_rec->byte_used) {
8277                 fprintf(stderr,
8278                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8279                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8280                         dev_rec->type, dev_rec->offset);
8281                 return -1;
8282         } else {
8283                 return 0;
8284         }
8285 }
8286
8287 /* check btrfs_dev_item -> btrfs_dev_extent */
8288 static int check_devices(struct rb_root *dev_cache,
8289                          struct device_extent_tree *dev_extent_cache)
8290 {
8291         struct rb_node *dev_node;
8292         struct device_record *dev_rec;
8293         struct device_extent_record *dext_rec;
8294         int err;
8295         int ret = 0;
8296
8297         dev_node = rb_first(dev_cache);
8298         while (dev_node) {
8299                 dev_rec = container_of(dev_node, struct device_record, node);
8300                 err = check_device_used(dev_rec, dev_extent_cache);
8301                 if (err)
8302                         ret = err;
8303
8304                 dev_node = rb_next(dev_node);
8305         }
8306         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8307                             device_list) {
8308                 fprintf(stderr,
8309                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8310                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8311                 if (!ret)
8312                         ret = 1;
8313         }
8314         return ret;
8315 }
8316
8317 static int add_root_item_to_list(struct list_head *head,
8318                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8319                                   u8 level, u8 drop_level,
8320                                   int level_size, struct btrfs_key *drop_key)
8321 {
8322
8323         struct root_item_record *ri_rec;
8324         ri_rec = malloc(sizeof(*ri_rec));
8325         if (!ri_rec)
8326                 return -ENOMEM;
8327         ri_rec->bytenr = bytenr;
8328         ri_rec->objectid = objectid;
8329         ri_rec->level = level;
8330         ri_rec->level_size = level_size;
8331         ri_rec->drop_level = drop_level;
8332         ri_rec->last_snapshot = last_snapshot;
8333         if (drop_key)
8334                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8335         list_add_tail(&ri_rec->list, head);
8336
8337         return 0;
8338 }
8339
8340 static void free_root_item_list(struct list_head *list)
8341 {
8342         struct root_item_record *ri_rec;
8343
8344         while (!list_empty(list)) {
8345                 ri_rec = list_first_entry(list, struct root_item_record,
8346                                           list);
8347                 list_del_init(&ri_rec->list);
8348                 free(ri_rec);
8349         }
8350 }
8351
8352 static int deal_root_from_list(struct list_head *list,
8353                                struct btrfs_root *root,
8354                                struct block_info *bits,
8355                                int bits_nr,
8356                                struct cache_tree *pending,
8357                                struct cache_tree *seen,
8358                                struct cache_tree *reada,
8359                                struct cache_tree *nodes,
8360                                struct cache_tree *extent_cache,
8361                                struct cache_tree *chunk_cache,
8362                                struct rb_root *dev_cache,
8363                                struct block_group_tree *block_group_cache,
8364                                struct device_extent_tree *dev_extent_cache)
8365 {
8366         int ret = 0;
8367         u64 last;
8368
8369         while (!list_empty(list)) {
8370                 struct root_item_record *rec;
8371                 struct extent_buffer *buf;
8372                 rec = list_entry(list->next,
8373                                  struct root_item_record, list);
8374                 last = 0;
8375                 buf = read_tree_block(root->fs_info->tree_root,
8376                                       rec->bytenr, rec->level_size, 0);
8377                 if (!extent_buffer_uptodate(buf)) {
8378                         free_extent_buffer(buf);
8379                         ret = -EIO;
8380                         break;
8381                 }
8382                 ret = add_root_to_pending(buf, extent_cache, pending,
8383                                     seen, nodes, rec->objectid);
8384                 if (ret < 0)
8385                         break;
8386                 /*
8387                  * To rebuild extent tree, we need deal with snapshot
8388                  * one by one, otherwise we deal with node firstly which
8389                  * can maximize readahead.
8390                  */
8391                 while (1) {
8392                         ret = run_next_block(root, bits, bits_nr, &last,
8393                                              pending, seen, reada, nodes,
8394                                              extent_cache, chunk_cache,
8395                                              dev_cache, block_group_cache,
8396                                              dev_extent_cache, rec);
8397                         if (ret != 0)
8398                                 break;
8399                 }
8400                 free_extent_buffer(buf);
8401                 list_del(&rec->list);
8402                 free(rec);
8403                 if (ret < 0)
8404                         break;
8405         }
8406         while (ret >= 0) {
8407                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8408                                      reada, nodes, extent_cache, chunk_cache,
8409                                      dev_cache, block_group_cache,
8410                                      dev_extent_cache, NULL);
8411                 if (ret != 0) {
8412                         if (ret > 0)
8413                                 ret = 0;
8414                         break;
8415                 }
8416         }
8417         return ret;
8418 }
8419
8420 static int check_chunks_and_extents(struct btrfs_root *root)
8421 {
8422         struct rb_root dev_cache;
8423         struct cache_tree chunk_cache;
8424         struct block_group_tree block_group_cache;
8425         struct device_extent_tree dev_extent_cache;
8426         struct cache_tree extent_cache;
8427         struct cache_tree seen;
8428         struct cache_tree pending;
8429         struct cache_tree reada;
8430         struct cache_tree nodes;
8431         struct extent_io_tree excluded_extents;
8432         struct cache_tree corrupt_blocks;
8433         struct btrfs_path path;
8434         struct btrfs_key key;
8435         struct btrfs_key found_key;
8436         int ret, err = 0;
8437         struct block_info *bits;
8438         int bits_nr;
8439         struct extent_buffer *leaf;
8440         int slot;
8441         struct btrfs_root_item ri;
8442         struct list_head dropping_trees;
8443         struct list_head normal_trees;
8444         struct btrfs_root *root1;
8445         u64 objectid;
8446         u32 level_size;
8447         u8 level;
8448
8449         dev_cache = RB_ROOT;
8450         cache_tree_init(&chunk_cache);
8451         block_group_tree_init(&block_group_cache);
8452         device_extent_tree_init(&dev_extent_cache);
8453
8454         cache_tree_init(&extent_cache);
8455         cache_tree_init(&seen);
8456         cache_tree_init(&pending);
8457         cache_tree_init(&nodes);
8458         cache_tree_init(&reada);
8459         cache_tree_init(&corrupt_blocks);
8460         extent_io_tree_init(&excluded_extents);
8461         INIT_LIST_HEAD(&dropping_trees);
8462         INIT_LIST_HEAD(&normal_trees);
8463
8464         if (repair) {
8465                 root->fs_info->excluded_extents = &excluded_extents;
8466                 root->fs_info->fsck_extent_cache = &extent_cache;
8467                 root->fs_info->free_extent_hook = free_extent_hook;
8468                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8469         }
8470
8471         bits_nr = 1024;
8472         bits = malloc(bits_nr * sizeof(struct block_info));
8473         if (!bits) {
8474                 perror("malloc");
8475                 exit(1);
8476         }
8477
8478         if (ctx.progress_enabled) {
8479                 ctx.tp = TASK_EXTENTS;
8480                 task_start(ctx.info);
8481         }
8482
8483 again:
8484         root1 = root->fs_info->tree_root;
8485         level = btrfs_header_level(root1->node);
8486         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8487                                     root1->node->start, 0, level, 0,
8488                                     root1->nodesize, NULL);
8489         if (ret < 0)
8490                 goto out;
8491         root1 = root->fs_info->chunk_root;
8492         level = btrfs_header_level(root1->node);
8493         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8494                                     root1->node->start, 0, level, 0,
8495                                     root1->nodesize, NULL);
8496         if (ret < 0)
8497                 goto out;
8498         btrfs_init_path(&path);
8499         key.offset = 0;
8500         key.objectid = 0;
8501         key.type = BTRFS_ROOT_ITEM_KEY;
8502         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8503                                         &key, &path, 0, 0);
8504         if (ret < 0)
8505                 goto out;
8506         while(1) {
8507                 leaf = path.nodes[0];
8508                 slot = path.slots[0];
8509                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8510                         ret = btrfs_next_leaf(root, &path);
8511                         if (ret != 0)
8512                                 break;
8513                         leaf = path.nodes[0];
8514                         slot = path.slots[0];
8515                 }
8516                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8517                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8518                         unsigned long offset;
8519                         u64 last_snapshot;
8520
8521                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8522                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8523                         last_snapshot = btrfs_root_last_snapshot(&ri);
8524                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8525                                 level = btrfs_root_level(&ri);
8526                                 level_size = root->nodesize;
8527                                 ret = add_root_item_to_list(&normal_trees,
8528                                                 found_key.objectid,
8529                                                 btrfs_root_bytenr(&ri),
8530                                                 last_snapshot, level,
8531                                                 0, level_size, NULL);
8532                                 if (ret < 0)
8533                                         goto out;
8534                         } else {
8535                                 level = btrfs_root_level(&ri);
8536                                 level_size = root->nodesize;
8537                                 objectid = found_key.objectid;
8538                                 btrfs_disk_key_to_cpu(&found_key,
8539                                                       &ri.drop_progress);
8540                                 ret = add_root_item_to_list(&dropping_trees,
8541                                                 objectid,
8542                                                 btrfs_root_bytenr(&ri),
8543                                                 last_snapshot, level,
8544                                                 ri.drop_level,
8545                                                 level_size, &found_key);
8546                                 if (ret < 0)
8547                                         goto out;
8548                         }
8549                 }
8550                 path.slots[0]++;
8551         }
8552         btrfs_release_path(&path);
8553
8554         /*
8555          * check_block can return -EAGAIN if it fixes something, please keep
8556          * this in mind when dealing with return values from these functions, if
8557          * we get -EAGAIN we want to fall through and restart the loop.
8558          */
8559         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8560                                   &seen, &reada, &nodes, &extent_cache,
8561                                   &chunk_cache, &dev_cache, &block_group_cache,
8562                                   &dev_extent_cache);
8563         if (ret < 0) {
8564                 if (ret == -EAGAIN)
8565                         goto loop;
8566                 goto out;
8567         }
8568         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8569                                   &pending, &seen, &reada, &nodes,
8570                                   &extent_cache, &chunk_cache, &dev_cache,
8571                                   &block_group_cache, &dev_extent_cache);
8572         if (ret < 0) {
8573                 if (ret == -EAGAIN)
8574                         goto loop;
8575                 goto out;
8576         }
8577
8578         ret = check_chunks(&chunk_cache, &block_group_cache,
8579                            &dev_extent_cache, NULL, NULL, NULL, 0);
8580         if (ret) {
8581                 if (ret == -EAGAIN)
8582                         goto loop;
8583                 err = ret;
8584         }
8585
8586         ret = check_extent_refs(root, &extent_cache);
8587         if (ret < 0) {
8588                 if (ret == -EAGAIN)
8589                         goto loop;
8590                 goto out;
8591         }
8592
8593         ret = check_devices(&dev_cache, &dev_extent_cache);
8594         if (ret && err)
8595                 ret = err;
8596
8597 out:
8598         task_stop(ctx.info);
8599         if (repair) {
8600                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8601                 extent_io_tree_cleanup(&excluded_extents);
8602                 root->fs_info->fsck_extent_cache = NULL;
8603                 root->fs_info->free_extent_hook = NULL;
8604                 root->fs_info->corrupt_blocks = NULL;
8605                 root->fs_info->excluded_extents = NULL;
8606         }
8607         free(bits);
8608         free_chunk_cache_tree(&chunk_cache);
8609         free_device_cache_tree(&dev_cache);
8610         free_block_group_tree(&block_group_cache);
8611         free_device_extent_tree(&dev_extent_cache);
8612         free_extent_cache_tree(&seen);
8613         free_extent_cache_tree(&pending);
8614         free_extent_cache_tree(&reada);
8615         free_extent_cache_tree(&nodes);
8616         return ret;
8617 loop:
8618         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8619         free_extent_cache_tree(&seen);
8620         free_extent_cache_tree(&pending);
8621         free_extent_cache_tree(&reada);
8622         free_extent_cache_tree(&nodes);
8623         free_chunk_cache_tree(&chunk_cache);
8624         free_block_group_tree(&block_group_cache);
8625         free_device_cache_tree(&dev_cache);
8626         free_device_extent_tree(&dev_extent_cache);
8627         free_extent_record_cache(root->fs_info, &extent_cache);
8628         free_root_item_list(&normal_trees);
8629         free_root_item_list(&dropping_trees);
8630         extent_io_tree_cleanup(&excluded_extents);
8631         goto again;
8632 }
8633
8634 /*
8635  * Check backrefs of a tree block given by @bytenr or @eb.
8636  *
8637  * @root:       the root containing the @bytenr or @eb
8638  * @eb:         tree block extent buffer, can be NULL
8639  * @bytenr:     bytenr of the tree block to search
8640  * @level:      tree level of the tree block
8641  * @owner:      owner of the tree block
8642  *
8643  * Return >0 for any error found and output error message
8644  * Return 0 for no error found
8645  */
8646 static int check_tree_block_ref(struct btrfs_root *root,
8647                                 struct extent_buffer *eb, u64 bytenr,
8648                                 int level, u64 owner)
8649 {
8650         struct btrfs_key key;
8651         struct btrfs_root *extent_root = root->fs_info->extent_root;
8652         struct btrfs_path path;
8653         struct btrfs_extent_item *ei;
8654         struct btrfs_extent_inline_ref *iref;
8655         struct extent_buffer *leaf;
8656         unsigned long end;
8657         unsigned long ptr;
8658         int slot;
8659         int skinny_level;
8660         int type;
8661         u32 nodesize = root->nodesize;
8662         u32 item_size;
8663         u64 offset;
8664         int found_ref = 0;
8665         int err = 0;
8666         int ret;
8667
8668         btrfs_init_path(&path);
8669         key.objectid = bytenr;
8670         if (btrfs_fs_incompat(root->fs_info,
8671                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8672                 key.type = BTRFS_METADATA_ITEM_KEY;
8673         else
8674                 key.type = BTRFS_EXTENT_ITEM_KEY;
8675         key.offset = (u64)-1;
8676
8677         /* Search for the backref in extent tree */
8678         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8679         if (ret < 0) {
8680                 err |= BACKREF_MISSING;
8681                 goto out;
8682         }
8683         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8684         if (ret) {
8685                 err |= BACKREF_MISSING;
8686                 goto out;
8687         }
8688
8689         leaf = path.nodes[0];
8690         slot = path.slots[0];
8691         btrfs_item_key_to_cpu(leaf, &key, slot);
8692
8693         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8694
8695         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8696                 skinny_level = (int)key.offset;
8697                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8698         } else {
8699                 struct btrfs_tree_block_info *info;
8700
8701                 info = (struct btrfs_tree_block_info *)(ei + 1);
8702                 skinny_level = btrfs_tree_block_level(leaf, info);
8703                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8704         }
8705
8706         if (eb) {
8707                 u64 header_gen;
8708                 u64 extent_gen;
8709
8710                 if (!(btrfs_extent_flags(leaf, ei) &
8711                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8712                         error(
8713                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8714                                 key.objectid, nodesize,
8715                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8716                         err = BACKREF_MISMATCH;
8717                 }
8718                 header_gen = btrfs_header_generation(eb);
8719                 extent_gen = btrfs_extent_generation(leaf, ei);
8720                 if (header_gen != extent_gen) {
8721                         error(
8722         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8723                                 key.objectid, nodesize, header_gen,
8724                                 extent_gen);
8725                         err = BACKREF_MISMATCH;
8726                 }
8727                 if (level != skinny_level) {
8728                         error(
8729                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8730                                 key.objectid, nodesize, level, skinny_level);
8731                         err = BACKREF_MISMATCH;
8732                 }
8733                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8734                         error(
8735                         "extent[%llu %u] is referred by other roots than %llu",
8736                                 key.objectid, nodesize, root->objectid);
8737                         err = BACKREF_MISMATCH;
8738                 }
8739         }
8740
8741         /*
8742          * Iterate the extent/metadata item to find the exact backref
8743          */
8744         item_size = btrfs_item_size_nr(leaf, slot);
8745         ptr = (unsigned long)iref;
8746         end = (unsigned long)ei + item_size;
8747         while (ptr < end) {
8748                 iref = (struct btrfs_extent_inline_ref *)ptr;
8749                 type = btrfs_extent_inline_ref_type(leaf, iref);
8750                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8751
8752                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8753                         (offset == root->objectid || offset == owner)) {
8754                         found_ref = 1;
8755                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8756                         /* Check if the backref points to valid referencer */
8757                         found_ref = !check_tree_block_ref(root, NULL, offset,
8758                                                           level + 1, owner);
8759                 }
8760
8761                 if (found_ref)
8762                         break;
8763                 ptr += btrfs_extent_inline_ref_size(type);
8764         }
8765
8766         /*
8767          * Inlined extent item doesn't have what we need, check
8768          * TREE_BLOCK_REF_KEY
8769          */
8770         if (!found_ref) {
8771                 btrfs_release_path(&path);
8772                 key.objectid = bytenr;
8773                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8774                 key.offset = root->objectid;
8775
8776                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8777                 if (!ret)
8778                         found_ref = 1;
8779         }
8780         if (!found_ref)
8781                 err |= BACKREF_MISSING;
8782 out:
8783         btrfs_release_path(&path);
8784         if (eb && (err & BACKREF_MISSING))
8785                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8786                         bytenr, nodesize, owner, level);
8787         return err;
8788 }
8789
8790 /*
8791  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8792  *
8793  * Return >0 any error found and output error message
8794  * Return 0 for no error found
8795  */
8796 static int check_extent_data_item(struct btrfs_root *root,
8797                                   struct extent_buffer *eb, int slot)
8798 {
8799         struct btrfs_file_extent_item *fi;
8800         struct btrfs_path path;
8801         struct btrfs_root *extent_root = root->fs_info->extent_root;
8802         struct btrfs_key fi_key;
8803         struct btrfs_key dbref_key;
8804         struct extent_buffer *leaf;
8805         struct btrfs_extent_item *ei;
8806         struct btrfs_extent_inline_ref *iref;
8807         struct btrfs_extent_data_ref *dref;
8808         u64 owner;
8809         u64 file_extent_gen;
8810         u64 disk_bytenr;
8811         u64 disk_num_bytes;
8812         u64 extent_num_bytes;
8813         u64 extent_flags;
8814         u64 extent_gen;
8815         u32 item_size;
8816         unsigned long end;
8817         unsigned long ptr;
8818         int type;
8819         u64 ref_root;
8820         int found_dbackref = 0;
8821         int err = 0;
8822         int ret;
8823
8824         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8825         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8826         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8827
8828         /* Nothing to check for hole and inline data extents */
8829         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8830             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8831                 return 0;
8832
8833         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8834         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8835         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8836
8837         /* Check unaligned disk_num_bytes and num_bytes */
8838         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8839                 error(
8840 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8841                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8842                         root->sectorsize);
8843                 err |= BYTES_UNALIGNED;
8844         } else {
8845                 data_bytes_allocated += disk_num_bytes;
8846         }
8847         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8848                 error(
8849 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8850                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8851                         root->sectorsize);
8852                 err |= BYTES_UNALIGNED;
8853         } else {
8854                 data_bytes_referenced += extent_num_bytes;
8855         }
8856         owner = btrfs_header_owner(eb);
8857
8858         /* Check the extent item of the file extent in extent tree */
8859         btrfs_init_path(&path);
8860         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8861         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8862         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8863
8864         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8865         if (ret) {
8866                 err |= BACKREF_MISSING;
8867                 goto error;
8868         }
8869
8870         leaf = path.nodes[0];
8871         slot = path.slots[0];
8872         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8873
8874         extent_flags = btrfs_extent_flags(leaf, ei);
8875         extent_gen = btrfs_extent_generation(leaf, ei);
8876
8877         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8878                 error(
8879                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8880                     disk_bytenr, disk_num_bytes,
8881                     BTRFS_EXTENT_FLAG_DATA);
8882                 err |= BACKREF_MISMATCH;
8883         }
8884
8885         if (file_extent_gen < extent_gen) {
8886                 error(
8887 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8888                         disk_bytenr, disk_num_bytes, file_extent_gen,
8889                         extent_gen);
8890                 err |= BACKREF_MISMATCH;
8891         }
8892
8893         /* Check data backref inside that extent item */
8894         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8895         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8896         ptr = (unsigned long)iref;
8897         end = (unsigned long)ei + item_size;
8898         while (ptr < end) {
8899                 iref = (struct btrfs_extent_inline_ref *)ptr;
8900                 type = btrfs_extent_inline_ref_type(leaf, iref);
8901                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8902
8903                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8904                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8905                         if (ref_root == owner || ref_root == root->objectid)
8906                                 found_dbackref = 1;
8907                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8908                         found_dbackref = !check_tree_block_ref(root, NULL,
8909                                 btrfs_extent_inline_ref_offset(leaf, iref),
8910                                 0, owner);
8911                 }
8912
8913                 if (found_dbackref)
8914                         break;
8915                 ptr += btrfs_extent_inline_ref_size(type);
8916         }
8917
8918         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8919         if (!found_dbackref) {
8920                 btrfs_release_path(&path);
8921
8922                 btrfs_init_path(&path);
8923                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8924                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8925                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8926                                 fi_key.objectid, fi_key.offset);
8927
8928                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8929                                         &dbref_key, &path, 0, 0);
8930                 if (!ret)
8931                         found_dbackref = 1;
8932         }
8933
8934         if (!found_dbackref)
8935                 err |= BACKREF_MISSING;
8936 error:
8937         btrfs_release_path(&path);
8938         if (err & BACKREF_MISSING) {
8939                 error("data extent[%llu %llu] backref lost",
8940                       disk_bytenr, disk_num_bytes);
8941         }
8942         return err;
8943 }
8944
8945 /*
8946  * Get real tree block level for the case like shared block
8947  * Return >= 0 as tree level
8948  * Return <0 for error
8949  */
8950 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8951 {
8952         struct extent_buffer *eb;
8953         struct btrfs_path path;
8954         struct btrfs_key key;
8955         struct btrfs_extent_item *ei;
8956         u64 flags;
8957         u64 transid;
8958         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8959         u8 backref_level;
8960         u8 header_level;
8961         int ret;
8962
8963         /* Search extent tree for extent generation and level */
8964         key.objectid = bytenr;
8965         key.type = BTRFS_METADATA_ITEM_KEY;
8966         key.offset = (u64)-1;
8967
8968         btrfs_init_path(&path);
8969         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8970         if (ret < 0)
8971                 goto release_out;
8972         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8973         if (ret < 0)
8974                 goto release_out;
8975         if (ret > 0) {
8976                 ret = -ENOENT;
8977                 goto release_out;
8978         }
8979
8980         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8981         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8982                             struct btrfs_extent_item);
8983         flags = btrfs_extent_flags(path.nodes[0], ei);
8984         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8985                 ret = -ENOENT;
8986                 goto release_out;
8987         }
8988
8989         /* Get transid for later read_tree_block() check */
8990         transid = btrfs_extent_generation(path.nodes[0], ei);
8991
8992         /* Get backref level as one source */
8993         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8994                 backref_level = key.offset;
8995         } else {
8996                 struct btrfs_tree_block_info *info;
8997
8998                 info = (struct btrfs_tree_block_info *)(ei + 1);
8999                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9000         }
9001         btrfs_release_path(&path);
9002
9003         /* Get level from tree block as an alternative source */
9004         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9005         if (!extent_buffer_uptodate(eb)) {
9006                 free_extent_buffer(eb);
9007                 return -EIO;
9008         }
9009         header_level = btrfs_header_level(eb);
9010         free_extent_buffer(eb);
9011
9012         if (header_level != backref_level)
9013                 return -EIO;
9014         return header_level;
9015
9016 release_out:
9017         btrfs_release_path(&path);
9018         return ret;
9019 }
9020
9021 /*
9022  * Check if a tree block backref is valid (points to a valid tree block)
9023  * if level == -1, level will be resolved
9024  * Return >0 for any error found and print error message
9025  */
9026 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9027                                     u64 bytenr, int level)
9028 {
9029         struct btrfs_root *root;
9030         struct btrfs_key key;
9031         struct btrfs_path path;
9032         struct extent_buffer *eb;
9033         struct extent_buffer *node;
9034         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9035         int err = 0;
9036         int ret;
9037
9038         /* Query level for level == -1 special case */
9039         if (level == -1)
9040                 level = query_tree_block_level(fs_info, bytenr);
9041         if (level < 0) {
9042                 err |= REFERENCER_MISSING;
9043                 goto out;
9044         }
9045
9046         key.objectid = root_id;
9047         key.type = BTRFS_ROOT_ITEM_KEY;
9048         key.offset = (u64)-1;
9049
9050         root = btrfs_read_fs_root(fs_info, &key);
9051         if (IS_ERR(root)) {
9052                 err |= REFERENCER_MISSING;
9053                 goto out;
9054         }
9055
9056         /* Read out the tree block to get item/node key */
9057         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9058         if (!extent_buffer_uptodate(eb)) {
9059                 err |= REFERENCER_MISSING;
9060                 free_extent_buffer(eb);
9061                 goto out;
9062         }
9063
9064         /* Empty tree, no need to check key */
9065         if (!btrfs_header_nritems(eb) && !level) {
9066                 free_extent_buffer(eb);
9067                 goto out;
9068         }
9069
9070         if (level)
9071                 btrfs_node_key_to_cpu(eb, &key, 0);
9072         else
9073                 btrfs_item_key_to_cpu(eb, &key, 0);
9074
9075         free_extent_buffer(eb);
9076
9077         btrfs_init_path(&path);
9078         path.lowest_level = level;
9079         /* Search with the first key, to ensure we can reach it */
9080         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9081         if (ret < 0) {
9082                 err |= REFERENCER_MISSING;
9083                 goto release_out;
9084         }
9085
9086         node = path.nodes[level];
9087         if (btrfs_header_bytenr(node) != bytenr) {
9088                 error(
9089         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9090                         bytenr, nodesize, bytenr,
9091                         btrfs_header_bytenr(node));
9092                 err |= REFERENCER_MISMATCH;
9093         }
9094         if (btrfs_header_level(node) != level) {
9095                 error(
9096         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9097                         bytenr, nodesize, level,
9098                         btrfs_header_level(node));
9099                 err |= REFERENCER_MISMATCH;
9100         }
9101
9102 release_out:
9103         btrfs_release_path(&path);
9104 out:
9105         if (err & REFERENCER_MISSING) {
9106                 if (level < 0)
9107                         error("extent [%llu %d] lost referencer (owner: %llu)",
9108                                 bytenr, nodesize, root_id);
9109                 else
9110                         error(
9111                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9112                                 bytenr, nodesize, root_id, level);
9113         }
9114
9115         return err;
9116 }
9117
9118 /*
9119  * Check referencer for shared block backref
9120  * If level == -1, this function will resolve the level.
9121  */
9122 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9123                                      u64 parent, u64 bytenr, int level)
9124 {
9125         struct extent_buffer *eb;
9126         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9127         u32 nr;
9128         int found_parent = 0;
9129         int i;
9130
9131         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9132         if (!extent_buffer_uptodate(eb))
9133                 goto out;
9134
9135         if (level == -1)
9136                 level = query_tree_block_level(fs_info, bytenr);
9137         if (level < 0)
9138                 goto out;
9139
9140         if (level + 1 != btrfs_header_level(eb))
9141                 goto out;
9142
9143         nr = btrfs_header_nritems(eb);
9144         for (i = 0; i < nr; i++) {
9145                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9146                         found_parent = 1;
9147                         break;
9148                 }
9149         }
9150 out:
9151         free_extent_buffer(eb);
9152         if (!found_parent) {
9153                 error(
9154         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9155                         bytenr, nodesize, parent, level);
9156                 return REFERENCER_MISSING;
9157         }
9158         return 0;
9159 }
9160
9161 /*
9162  * Check referencer for normal (inlined) data ref
9163  * If len == 0, it will be resolved by searching in extent tree
9164  */
9165 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9166                                      u64 root_id, u64 objectid, u64 offset,
9167                                      u64 bytenr, u64 len, u32 count)
9168 {
9169         struct btrfs_root *root;
9170         struct btrfs_root *extent_root = fs_info->extent_root;
9171         struct btrfs_key key;
9172         struct btrfs_path path;
9173         struct extent_buffer *leaf;
9174         struct btrfs_file_extent_item *fi;
9175         u32 found_count = 0;
9176         int slot;
9177         int ret = 0;
9178
9179         if (!len) {
9180                 key.objectid = bytenr;
9181                 key.type = BTRFS_EXTENT_ITEM_KEY;
9182                 key.offset = (u64)-1;
9183
9184                 btrfs_init_path(&path);
9185                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9186                 if (ret < 0)
9187                         goto out;
9188                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9189                 if (ret)
9190                         goto out;
9191                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9192                 if (key.objectid != bytenr ||
9193                     key.type != BTRFS_EXTENT_ITEM_KEY)
9194                         goto out;
9195                 len = key.offset;
9196                 btrfs_release_path(&path);
9197         }
9198         key.objectid = root_id;
9199         key.type = BTRFS_ROOT_ITEM_KEY;
9200         key.offset = (u64)-1;
9201         btrfs_init_path(&path);
9202
9203         root = btrfs_read_fs_root(fs_info, &key);
9204         if (IS_ERR(root))
9205                 goto out;
9206
9207         key.objectid = objectid;
9208         key.type = BTRFS_EXTENT_DATA_KEY;
9209         /*
9210          * It can be nasty as data backref offset is
9211          * file offset - file extent offset, which is smaller or
9212          * equal to original backref offset.  The only special case is
9213          * overflow.  So we need to special check and do further search.
9214          */
9215         key.offset = offset & (1ULL << 63) ? 0 : offset;
9216
9217         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9218         if (ret < 0)
9219                 goto out;
9220
9221         /*
9222          * Search afterwards to get correct one
9223          * NOTE: As we must do a comprehensive check on the data backref to
9224          * make sure the dref count also matches, we must iterate all file
9225          * extents for that inode.
9226          */
9227         while (1) {
9228                 leaf = path.nodes[0];
9229                 slot = path.slots[0];
9230
9231                 btrfs_item_key_to_cpu(leaf, &key, slot);
9232                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9233                         break;
9234                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9235                 /*
9236                  * Except normal disk bytenr and disk num bytes, we still
9237                  * need to do extra check on dbackref offset as
9238                  * dbackref offset = file_offset - file_extent_offset
9239                  */
9240                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9241                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9242                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9243                     offset)
9244                         found_count++;
9245
9246                 ret = btrfs_next_item(root, &path);
9247                 if (ret)
9248                         break;
9249         }
9250 out:
9251         btrfs_release_path(&path);
9252         if (found_count != count) {
9253                 error(
9254 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9255                         bytenr, len, root_id, objectid, offset, count, found_count);
9256                 return REFERENCER_MISSING;
9257         }
9258         return 0;
9259 }
9260
9261 /*
9262  * Check if the referencer of a shared data backref exists
9263  */
9264 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9265                                      u64 parent, u64 bytenr)
9266 {
9267         struct extent_buffer *eb;
9268         struct btrfs_key key;
9269         struct btrfs_file_extent_item *fi;
9270         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9271         u32 nr;
9272         int found_parent = 0;
9273         int i;
9274
9275         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9276         if (!extent_buffer_uptodate(eb))
9277                 goto out;
9278
9279         nr = btrfs_header_nritems(eb);
9280         for (i = 0; i < nr; i++) {
9281                 btrfs_item_key_to_cpu(eb, &key, i);
9282                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9283                         continue;
9284
9285                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9286                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9287                         continue;
9288
9289                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9290                         found_parent = 1;
9291                         break;
9292                 }
9293         }
9294
9295 out:
9296         free_extent_buffer(eb);
9297         if (!found_parent) {
9298                 error("shared extent %llu referencer lost (parent: %llu)",
9299                         bytenr, parent);
9300                 return REFERENCER_MISSING;
9301         }
9302         return 0;
9303 }
9304
9305 /*
9306  * This function will check a given extent item, including its backref and
9307  * itself (like crossing stripe boundary and type)
9308  *
9309  * Since we don't use extent_record anymore, introduce new error bit
9310  */
9311 static int check_extent_item(struct btrfs_fs_info *fs_info,
9312                              struct extent_buffer *eb, int slot)
9313 {
9314         struct btrfs_extent_item *ei;
9315         struct btrfs_extent_inline_ref *iref;
9316         struct btrfs_extent_data_ref *dref;
9317         unsigned long end;
9318         unsigned long ptr;
9319         int type;
9320         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9321         u32 item_size = btrfs_item_size_nr(eb, slot);
9322         u64 flags;
9323         u64 offset;
9324         int metadata = 0;
9325         int level;
9326         struct btrfs_key key;
9327         int ret;
9328         int err = 0;
9329
9330         btrfs_item_key_to_cpu(eb, &key, slot);
9331         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9332                 bytes_used += key.offset;
9333         else
9334                 bytes_used += nodesize;
9335
9336         if (item_size < sizeof(*ei)) {
9337                 /*
9338                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9339                  * old thing when on disk format is still un-determined.
9340                  * No need to care about it anymore
9341                  */
9342                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9343                 return -ENOTTY;
9344         }
9345
9346         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9347         flags = btrfs_extent_flags(eb, ei);
9348
9349         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9350                 metadata = 1;
9351         if (metadata && check_crossing_stripes(global_info, key.objectid,
9352                                                eb->len)) {
9353                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9354                       key.objectid, key.objectid + nodesize);
9355                 err |= CROSSING_STRIPE_BOUNDARY;
9356         }
9357
9358         ptr = (unsigned long)(ei + 1);
9359
9360         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9361                 /* Old EXTENT_ITEM metadata */
9362                 struct btrfs_tree_block_info *info;
9363
9364                 info = (struct btrfs_tree_block_info *)ptr;
9365                 level = btrfs_tree_block_level(eb, info);
9366                 ptr += sizeof(struct btrfs_tree_block_info);
9367         } else {
9368                 /* New METADATA_ITEM */
9369                 level = key.offset;
9370         }
9371         end = (unsigned long)ei + item_size;
9372
9373         if (ptr >= end) {
9374                 err |= ITEM_SIZE_MISMATCH;
9375                 goto out;
9376         }
9377
9378         /* Now check every backref in this extent item */
9379 next:
9380         iref = (struct btrfs_extent_inline_ref *)ptr;
9381         type = btrfs_extent_inline_ref_type(eb, iref);
9382         offset = btrfs_extent_inline_ref_offset(eb, iref);
9383         switch (type) {
9384         case BTRFS_TREE_BLOCK_REF_KEY:
9385                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9386                                                level);
9387                 err |= ret;
9388                 break;
9389         case BTRFS_SHARED_BLOCK_REF_KEY:
9390                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9391                                                  level);
9392                 err |= ret;
9393                 break;
9394         case BTRFS_EXTENT_DATA_REF_KEY:
9395                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9396                 ret = check_extent_data_backref(fs_info,
9397                                 btrfs_extent_data_ref_root(eb, dref),
9398                                 btrfs_extent_data_ref_objectid(eb, dref),
9399                                 btrfs_extent_data_ref_offset(eb, dref),
9400                                 key.objectid, key.offset,
9401                                 btrfs_extent_data_ref_count(eb, dref));
9402                 err |= ret;
9403                 break;
9404         case BTRFS_SHARED_DATA_REF_KEY:
9405                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9406                 err |= ret;
9407                 break;
9408         default:
9409                 error("extent[%llu %d %llu] has unknown ref type: %d",
9410                         key.objectid, key.type, key.offset, type);
9411                 err |= UNKNOWN_TYPE;
9412                 goto out;
9413         }
9414
9415         ptr += btrfs_extent_inline_ref_size(type);
9416         if (ptr < end)
9417                 goto next;
9418
9419 out:
9420         return err;
9421 }
9422
9423 /*
9424  * Check if a dev extent item is referred correctly by its chunk
9425  */
9426 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9427                                  struct extent_buffer *eb, int slot)
9428 {
9429         struct btrfs_root *chunk_root = fs_info->chunk_root;
9430         struct btrfs_dev_extent *ptr;
9431         struct btrfs_path path;
9432         struct btrfs_key chunk_key;
9433         struct btrfs_key devext_key;
9434         struct btrfs_chunk *chunk;
9435         struct extent_buffer *l;
9436         int num_stripes;
9437         u64 length;
9438         int i;
9439         int found_chunk = 0;
9440         int ret;
9441
9442         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9443         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9444         length = btrfs_dev_extent_length(eb, ptr);
9445
9446         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9447         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9448         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9449
9450         btrfs_init_path(&path);
9451         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9452         if (ret)
9453                 goto out;
9454
9455         l = path.nodes[0];
9456         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9457         if (btrfs_chunk_length(l, chunk) != length)
9458                 goto out;
9459
9460         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9461         for (i = 0; i < num_stripes; i++) {
9462                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9463                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9464
9465                 if (devid == devext_key.objectid &&
9466                     offset == devext_key.offset) {
9467                         found_chunk = 1;
9468                         break;
9469                 }
9470         }
9471 out:
9472         btrfs_release_path(&path);
9473         if (!found_chunk) {
9474                 error(
9475                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9476                         devext_key.objectid, devext_key.offset, length);
9477                 return REFERENCER_MISSING;
9478         }
9479         return 0;
9480 }
9481
9482 /*
9483  * Check if the used space is correct with the dev item
9484  */
9485 static int check_dev_item(struct btrfs_fs_info *fs_info,
9486                           struct extent_buffer *eb, int slot)
9487 {
9488         struct btrfs_root *dev_root = fs_info->dev_root;
9489         struct btrfs_dev_item *dev_item;
9490         struct btrfs_path path;
9491         struct btrfs_key key;
9492         struct btrfs_dev_extent *ptr;
9493         u64 dev_id;
9494         u64 used;
9495         u64 total = 0;
9496         int ret;
9497
9498         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9499         dev_id = btrfs_device_id(eb, dev_item);
9500         used = btrfs_device_bytes_used(eb, dev_item);
9501
9502         key.objectid = dev_id;
9503         key.type = BTRFS_DEV_EXTENT_KEY;
9504         key.offset = 0;
9505
9506         btrfs_init_path(&path);
9507         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9508         if (ret < 0) {
9509                 btrfs_item_key_to_cpu(eb, &key, slot);
9510                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9511                         key.objectid, key.type, key.offset);
9512                 btrfs_release_path(&path);
9513                 return REFERENCER_MISSING;
9514         }
9515
9516         /* Iterate dev_extents to calculate the used space of a device */
9517         while (1) {
9518                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9519
9520                 if (key.objectid > dev_id)
9521                         break;
9522                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9523                         goto next;
9524
9525                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9526                                      struct btrfs_dev_extent);
9527                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9528 next:
9529                 ret = btrfs_next_item(dev_root, &path);
9530                 if (ret)
9531                         break;
9532         }
9533         btrfs_release_path(&path);
9534
9535         if (used != total) {
9536                 btrfs_item_key_to_cpu(eb, &key, slot);
9537                 error(
9538 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9539                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9540                         BTRFS_DEV_EXTENT_KEY, dev_id);
9541                 return ACCOUNTING_MISMATCH;
9542         }
9543         return 0;
9544 }
9545
9546 /*
9547  * Check a block group item with its referener (chunk) and its used space
9548  * with extent/metadata item
9549  */
9550 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9551                                   struct extent_buffer *eb, int slot)
9552 {
9553         struct btrfs_root *extent_root = fs_info->extent_root;
9554         struct btrfs_root *chunk_root = fs_info->chunk_root;
9555         struct btrfs_block_group_item *bi;
9556         struct btrfs_block_group_item bg_item;
9557         struct btrfs_path path;
9558         struct btrfs_key bg_key;
9559         struct btrfs_key chunk_key;
9560         struct btrfs_key extent_key;
9561         struct btrfs_chunk *chunk;
9562         struct extent_buffer *leaf;
9563         struct btrfs_extent_item *ei;
9564         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9565         u64 flags;
9566         u64 bg_flags;
9567         u64 used;
9568         u64 total = 0;
9569         int ret;
9570         int err = 0;
9571
9572         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9573         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9574         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9575         used = btrfs_block_group_used(&bg_item);
9576         bg_flags = btrfs_block_group_flags(&bg_item);
9577
9578         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9579         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9580         chunk_key.offset = bg_key.objectid;
9581
9582         btrfs_init_path(&path);
9583         /* Search for the referencer chunk */
9584         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9585         if (ret) {
9586                 error(
9587                 "block group[%llu %llu] did not find the related chunk item",
9588                         bg_key.objectid, bg_key.offset);
9589                 err |= REFERENCER_MISSING;
9590         } else {
9591                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9592                                         struct btrfs_chunk);
9593                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9594                                                 bg_key.offset) {
9595                         error(
9596         "block group[%llu %llu] related chunk item length does not match",
9597                                 bg_key.objectid, bg_key.offset);
9598                         err |= REFERENCER_MISMATCH;
9599                 }
9600         }
9601         btrfs_release_path(&path);
9602
9603         /* Search from the block group bytenr */
9604         extent_key.objectid = bg_key.objectid;
9605         extent_key.type = 0;
9606         extent_key.offset = 0;
9607
9608         btrfs_init_path(&path);
9609         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9610         if (ret < 0)
9611                 goto out;
9612
9613         /* Iterate extent tree to account used space */
9614         while (1) {
9615                 leaf = path.nodes[0];
9616                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9617                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9618                         break;
9619
9620                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9621                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9622                         goto next;
9623                 if (extent_key.objectid < bg_key.objectid)
9624                         goto next;
9625
9626                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9627                         total += nodesize;
9628                 else
9629                         total += extent_key.offset;
9630
9631                 ei = btrfs_item_ptr(leaf, path.slots[0],
9632                                     struct btrfs_extent_item);
9633                 flags = btrfs_extent_flags(leaf, ei);
9634                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9635                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9636                                 error(
9637                         "bad extent[%llu, %llu) type mismatch with chunk",
9638                                         extent_key.objectid,
9639                                         extent_key.objectid + extent_key.offset);
9640                                 err |= CHUNK_TYPE_MISMATCH;
9641                         }
9642                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9643                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9644                                     BTRFS_BLOCK_GROUP_METADATA))) {
9645                                 error(
9646                         "bad extent[%llu, %llu) type mismatch with chunk",
9647                                         extent_key.objectid,
9648                                         extent_key.objectid + nodesize);
9649                                 err |= CHUNK_TYPE_MISMATCH;
9650                         }
9651                 }
9652 next:
9653                 ret = btrfs_next_item(extent_root, &path);
9654                 if (ret)
9655                         break;
9656         }
9657
9658 out:
9659         btrfs_release_path(&path);
9660
9661         if (total != used) {
9662                 error(
9663                 "block group[%llu %llu] used %llu but extent items used %llu",
9664                         bg_key.objectid, bg_key.offset, used, total);
9665                 err |= ACCOUNTING_MISMATCH;
9666         }
9667         return err;
9668 }
9669
9670 /*
9671  * Check a chunk item.
9672  * Including checking all referred dev_extents and block group
9673  */
9674 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9675                             struct extent_buffer *eb, int slot)
9676 {
9677         struct btrfs_root *extent_root = fs_info->extent_root;
9678         struct btrfs_root *dev_root = fs_info->dev_root;
9679         struct btrfs_path path;
9680         struct btrfs_key chunk_key;
9681         struct btrfs_key bg_key;
9682         struct btrfs_key devext_key;
9683         struct btrfs_chunk *chunk;
9684         struct extent_buffer *leaf;
9685         struct btrfs_block_group_item *bi;
9686         struct btrfs_block_group_item bg_item;
9687         struct btrfs_dev_extent *ptr;
9688         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9689         u64 length;
9690         u64 chunk_end;
9691         u64 type;
9692         u64 profile;
9693         int num_stripes;
9694         u64 offset;
9695         u64 objectid;
9696         int i;
9697         int ret;
9698         int err = 0;
9699
9700         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9701         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9702         length = btrfs_chunk_length(eb, chunk);
9703         chunk_end = chunk_key.offset + length;
9704         if (!IS_ALIGNED(length, sectorsize)) {
9705                 error("chunk[%llu %llu) not aligned to %u",
9706                         chunk_key.offset, chunk_end, sectorsize);
9707                 err |= BYTES_UNALIGNED;
9708                 goto out;
9709         }
9710
9711         type = btrfs_chunk_type(eb, chunk);
9712         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9713         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9714                 error("chunk[%llu %llu) has no chunk type",
9715                         chunk_key.offset, chunk_end);
9716                 err |= UNKNOWN_TYPE;
9717         }
9718         if (profile && (profile & (profile - 1))) {
9719                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9720                         chunk_key.offset, chunk_end, profile);
9721                 err |= UNKNOWN_TYPE;
9722         }
9723
9724         bg_key.objectid = chunk_key.offset;
9725         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9726         bg_key.offset = length;
9727
9728         btrfs_init_path(&path);
9729         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9730         if (ret) {
9731                 error(
9732                 "chunk[%llu %llu) did not find the related block group item",
9733                         chunk_key.offset, chunk_end);
9734                 err |= REFERENCER_MISSING;
9735         } else{
9736                 leaf = path.nodes[0];
9737                 bi = btrfs_item_ptr(leaf, path.slots[0],
9738                                     struct btrfs_block_group_item);
9739                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9740                                    sizeof(bg_item));
9741                 if (btrfs_block_group_flags(&bg_item) != type) {
9742                         error(
9743 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9744                                 chunk_key.offset, chunk_end, type,
9745                                 btrfs_block_group_flags(&bg_item));
9746                         err |= REFERENCER_MISSING;
9747                 }
9748         }
9749
9750         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9751         for (i = 0; i < num_stripes; i++) {
9752                 btrfs_release_path(&path);
9753                 btrfs_init_path(&path);
9754                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9755                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9756                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9757
9758                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9759                                         0, 0);
9760                 if (ret)
9761                         goto not_match_dev;
9762
9763                 leaf = path.nodes[0];
9764                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9765                                      struct btrfs_dev_extent);
9766                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9767                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9768                 if (objectid != chunk_key.objectid ||
9769                     offset != chunk_key.offset ||
9770                     btrfs_dev_extent_length(leaf, ptr) != length)
9771                         goto not_match_dev;
9772                 continue;
9773 not_match_dev:
9774                 err |= BACKREF_MISSING;
9775                 error(
9776                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9777                         chunk_key.objectid, chunk_end, i);
9778                 continue;
9779         }
9780         btrfs_release_path(&path);
9781 out:
9782         return err;
9783 }
9784
9785 /*
9786  * Main entry function to check known items and update related accounting info
9787  */
9788 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9789 {
9790         struct btrfs_fs_info *fs_info = root->fs_info;
9791         struct btrfs_key key;
9792         int slot = 0;
9793         int type;
9794         struct btrfs_extent_data_ref *dref;
9795         int ret;
9796         int err = 0;
9797
9798 next:
9799         btrfs_item_key_to_cpu(eb, &key, slot);
9800         type = key.type;
9801
9802         switch (type) {
9803         case BTRFS_EXTENT_DATA_KEY:
9804                 ret = check_extent_data_item(root, eb, slot);
9805                 err |= ret;
9806                 break;
9807         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9808                 ret = check_block_group_item(fs_info, eb, slot);
9809                 err |= ret;
9810                 break;
9811         case BTRFS_DEV_ITEM_KEY:
9812                 ret = check_dev_item(fs_info, eb, slot);
9813                 err |= ret;
9814                 break;
9815         case BTRFS_CHUNK_ITEM_KEY:
9816                 ret = check_chunk_item(fs_info, eb, slot);
9817                 err |= ret;
9818                 break;
9819         case BTRFS_DEV_EXTENT_KEY:
9820                 ret = check_dev_extent_item(fs_info, eb, slot);
9821                 err |= ret;
9822                 break;
9823         case BTRFS_EXTENT_ITEM_KEY:
9824         case BTRFS_METADATA_ITEM_KEY:
9825                 ret = check_extent_item(fs_info, eb, slot);
9826                 err |= ret;
9827                 break;
9828         case BTRFS_EXTENT_CSUM_KEY:
9829                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9830                 break;
9831         case BTRFS_TREE_BLOCK_REF_KEY:
9832                 ret = check_tree_block_backref(fs_info, key.offset,
9833                                                key.objectid, -1);
9834                 err |= ret;
9835                 break;
9836         case BTRFS_EXTENT_DATA_REF_KEY:
9837                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9838                 ret = check_extent_data_backref(fs_info,
9839                                 btrfs_extent_data_ref_root(eb, dref),
9840                                 btrfs_extent_data_ref_objectid(eb, dref),
9841                                 btrfs_extent_data_ref_offset(eb, dref),
9842                                 key.objectid, 0,
9843                                 btrfs_extent_data_ref_count(eb, dref));
9844                 err |= ret;
9845                 break;
9846         case BTRFS_SHARED_BLOCK_REF_KEY:
9847                 ret = check_shared_block_backref(fs_info, key.offset,
9848                                                  key.objectid, -1);
9849                 err |= ret;
9850                 break;
9851         case BTRFS_SHARED_DATA_REF_KEY:
9852                 ret = check_shared_data_backref(fs_info, key.offset,
9853                                                 key.objectid);
9854                 err |= ret;
9855                 break;
9856         default:
9857                 break;
9858         }
9859
9860         if (++slot < btrfs_header_nritems(eb))
9861                 goto next;
9862
9863         return err;
9864 }
9865
9866 /*
9867  * Helper function for later fs/subvol tree check.  To determine if a tree
9868  * block should be checked.
9869  * This function will ensure only the direct referencer with lowest rootid to
9870  * check a fs/subvolume tree block.
9871  *
9872  * Backref check at extent tree would detect errors like missing subvolume
9873  * tree, so we can do aggressive check to reduce duplicated checks.
9874  */
9875 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9876 {
9877         struct btrfs_root *extent_root = root->fs_info->extent_root;
9878         struct btrfs_key key;
9879         struct btrfs_path path;
9880         struct extent_buffer *leaf;
9881         int slot;
9882         struct btrfs_extent_item *ei;
9883         unsigned long ptr;
9884         unsigned long end;
9885         int type;
9886         u32 item_size;
9887         u64 offset;
9888         struct btrfs_extent_inline_ref *iref;
9889         int ret;
9890
9891         btrfs_init_path(&path);
9892         key.objectid = btrfs_header_bytenr(eb);
9893         key.type = BTRFS_METADATA_ITEM_KEY;
9894         key.offset = (u64)-1;
9895
9896         /*
9897          * Any failure in backref resolving means we can't determine
9898          * whom the tree block belongs to.
9899          * So in that case, we need to check that tree block
9900          */
9901         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9902         if (ret < 0)
9903                 goto need_check;
9904
9905         ret = btrfs_previous_extent_item(extent_root, &path,
9906                                          btrfs_header_bytenr(eb));
9907         if (ret)
9908                 goto need_check;
9909
9910         leaf = path.nodes[0];
9911         slot = path.slots[0];
9912         btrfs_item_key_to_cpu(leaf, &key, slot);
9913         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9914
9915         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9916                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9917         } else {
9918                 struct btrfs_tree_block_info *info;
9919
9920                 info = (struct btrfs_tree_block_info *)(ei + 1);
9921                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9922         }
9923
9924         item_size = btrfs_item_size_nr(leaf, slot);
9925         ptr = (unsigned long)iref;
9926         end = (unsigned long)ei + item_size;
9927         while (ptr < end) {
9928                 iref = (struct btrfs_extent_inline_ref *)ptr;
9929                 type = btrfs_extent_inline_ref_type(leaf, iref);
9930                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9931
9932                 /*
9933                  * We only check the tree block if current root is
9934                  * the lowest referencer of it.
9935                  */
9936                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9937                     offset < root->objectid) {
9938                         btrfs_release_path(&path);
9939                         return 0;
9940                 }
9941
9942                 ptr += btrfs_extent_inline_ref_size(type);
9943         }
9944         /*
9945          * Normally we should also check keyed tree block ref, but that may be
9946          * very time consuming.  Inlined ref should already make us skip a lot
9947          * of refs now.  So skip search keyed tree block ref.
9948          */
9949
9950 need_check:
9951         btrfs_release_path(&path);
9952         return 1;
9953 }
9954
9955 /*
9956  * Traversal function for tree block. We will do:
9957  * 1) Skip shared fs/subvolume tree blocks
9958  * 2) Update related bytes accounting
9959  * 3) Pre-order traversal
9960  */
9961 static int traverse_tree_block(struct btrfs_root *root,
9962                                 struct extent_buffer *node)
9963 {
9964         struct extent_buffer *eb;
9965         struct btrfs_key key;
9966         struct btrfs_key drop_key;
9967         int level;
9968         u64 nr;
9969         int i;
9970         int err = 0;
9971         int ret;
9972
9973         /*
9974          * Skip shared fs/subvolume tree block, in that case they will
9975          * be checked by referencer with lowest rootid
9976          */
9977         if (is_fstree(root->objectid) && !should_check(root, node))
9978                 return 0;
9979
9980         /* Update bytes accounting */
9981         total_btree_bytes += node->len;
9982         if (fs_root_objectid(btrfs_header_owner(node)))
9983                 total_fs_tree_bytes += node->len;
9984         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9985                 total_extent_tree_bytes += node->len;
9986         if (!found_old_backref &&
9987             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9988             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9989             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9990                 found_old_backref = 1;
9991
9992         /* pre-order tranversal, check itself first */
9993         level = btrfs_header_level(node);
9994         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9995                                    btrfs_header_level(node),
9996                                    btrfs_header_owner(node));
9997         err |= ret;
9998         if (err)
9999                 error(
10000         "check %s failed root %llu bytenr %llu level %d, force continue check",
10001                         level ? "node":"leaf", root->objectid,
10002                         btrfs_header_bytenr(node), btrfs_header_level(node));
10003
10004         if (!level) {
10005                 btree_space_waste += btrfs_leaf_free_space(root, node);
10006                 ret = check_leaf_items(root, node);
10007                 err |= ret;
10008                 return err;
10009         }
10010
10011         nr = btrfs_header_nritems(node);
10012         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10013         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10014                 sizeof(struct btrfs_key_ptr);
10015
10016         /* Then check all its children */
10017         for (i = 0; i < nr; i++) {
10018                 u64 blocknr = btrfs_node_blockptr(node, i);
10019
10020                 btrfs_node_key_to_cpu(node, &key, i);
10021                 if (level == root->root_item.drop_level &&
10022                     is_dropped_key(&key, &drop_key))
10023                         continue;
10024
10025                 /*
10026                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10027                  * to call the function itself.
10028                  */
10029                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10030                 if (extent_buffer_uptodate(eb)) {
10031                         ret = traverse_tree_block(root, eb);
10032                         err |= ret;
10033                 }
10034                 free_extent_buffer(eb);
10035         }
10036
10037         return err;
10038 }
10039
10040 /*
10041  * Low memory usage version check_chunks_and_extents.
10042  */
10043 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10044 {
10045         struct btrfs_path path;
10046         struct btrfs_key key;
10047         struct btrfs_root *root1;
10048         struct btrfs_root *cur_root;
10049         int err = 0;
10050         int ret;
10051
10052         root1 = root->fs_info->chunk_root;
10053         ret = traverse_tree_block(root1, root1->node);
10054         err |= ret;
10055
10056         root1 = root->fs_info->tree_root;
10057         ret = traverse_tree_block(root1, root1->node);
10058         err |= ret;
10059
10060         btrfs_init_path(&path);
10061         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10062         key.offset = 0;
10063         key.type = BTRFS_ROOT_ITEM_KEY;
10064
10065         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10066         if (ret) {
10067                 error("cannot find extent treet in tree_root");
10068                 goto out;
10069         }
10070
10071         while (1) {
10072                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10073                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10074                         goto next;
10075                 key.offset = (u64)-1;
10076
10077                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10078                 if (IS_ERR(cur_root) || !cur_root) {
10079                         error("failed to read tree: %lld", key.objectid);
10080                         goto next;
10081                 }
10082
10083                 ret = traverse_tree_block(cur_root, cur_root->node);
10084                 err |= ret;
10085
10086 next:
10087                 ret = btrfs_next_item(root1, &path);
10088                 if (ret)
10089                         goto out;
10090         }
10091
10092 out:
10093         btrfs_release_path(&path);
10094         return err;
10095 }
10096
10097 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10098                            struct btrfs_root *root, int overwrite)
10099 {
10100         struct extent_buffer *c;
10101         struct extent_buffer *old = root->node;
10102         int level;
10103         int ret;
10104         struct btrfs_disk_key disk_key = {0,0,0};
10105
10106         level = 0;
10107
10108         if (overwrite) {
10109                 c = old;
10110                 extent_buffer_get(c);
10111                 goto init;
10112         }
10113         c = btrfs_alloc_free_block(trans, root,
10114                                    root->nodesize,
10115                                    root->root_key.objectid,
10116                                    &disk_key, level, 0, 0);
10117         if (IS_ERR(c)) {
10118                 c = old;
10119                 extent_buffer_get(c);
10120                 overwrite = 1;
10121         }
10122 init:
10123         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10124         btrfs_set_header_level(c, level);
10125         btrfs_set_header_bytenr(c, c->start);
10126         btrfs_set_header_generation(c, trans->transid);
10127         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10128         btrfs_set_header_owner(c, root->root_key.objectid);
10129
10130         write_extent_buffer(c, root->fs_info->fsid,
10131                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10132
10133         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10134                             btrfs_header_chunk_tree_uuid(c),
10135                             BTRFS_UUID_SIZE);
10136
10137         btrfs_mark_buffer_dirty(c);
10138         /*
10139          * this case can happen in the following case:
10140          *
10141          * 1.overwrite previous root.
10142          *
10143          * 2.reinit reloc data root, this is because we skip pin
10144          * down reloc data tree before which means we can allocate
10145          * same block bytenr here.
10146          */
10147         if (old->start == c->start) {
10148                 btrfs_set_root_generation(&root->root_item,
10149                                           trans->transid);
10150                 root->root_item.level = btrfs_header_level(root->node);
10151                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10152                                         &root->root_key, &root->root_item);
10153                 if (ret) {
10154                         free_extent_buffer(c);
10155                         return ret;
10156                 }
10157         }
10158         free_extent_buffer(old);
10159         root->node = c;
10160         add_root_to_dirty_list(root);
10161         return 0;
10162 }
10163
10164 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10165                                 struct extent_buffer *eb, int tree_root)
10166 {
10167         struct extent_buffer *tmp;
10168         struct btrfs_root_item *ri;
10169         struct btrfs_key key;
10170         u64 bytenr;
10171         u32 nodesize;
10172         int level = btrfs_header_level(eb);
10173         int nritems;
10174         int ret;
10175         int i;
10176
10177         /*
10178          * If we have pinned this block before, don't pin it again.
10179          * This can not only avoid forever loop with broken filesystem
10180          * but also give us some speedups.
10181          */
10182         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10183                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10184                 return 0;
10185
10186         btrfs_pin_extent(fs_info, eb->start, eb->len);
10187
10188         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10189         nritems = btrfs_header_nritems(eb);
10190         for (i = 0; i < nritems; i++) {
10191                 if (level == 0) {
10192                         btrfs_item_key_to_cpu(eb, &key, i);
10193                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10194                                 continue;
10195                         /* Skip the extent root and reloc roots */
10196                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10197                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10198                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10199                                 continue;
10200                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10201                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10202
10203                         /*
10204                          * If at any point we start needing the real root we
10205                          * will have to build a stump root for the root we are
10206                          * in, but for now this doesn't actually use the root so
10207                          * just pass in extent_root.
10208                          */
10209                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10210                                               nodesize, 0);
10211                         if (!extent_buffer_uptodate(tmp)) {
10212                                 fprintf(stderr, "Error reading root block\n");
10213                                 return -EIO;
10214                         }
10215                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10216                         free_extent_buffer(tmp);
10217                         if (ret)
10218                                 return ret;
10219                 } else {
10220                         bytenr = btrfs_node_blockptr(eb, i);
10221
10222                         /* If we aren't the tree root don't read the block */
10223                         if (level == 1 && !tree_root) {
10224                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10225                                 continue;
10226                         }
10227
10228                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10229                                               nodesize, 0);
10230                         if (!extent_buffer_uptodate(tmp)) {
10231                                 fprintf(stderr, "Error reading tree block\n");
10232                                 return -EIO;
10233                         }
10234                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10235                         free_extent_buffer(tmp);
10236                         if (ret)
10237                                 return ret;
10238                 }
10239         }
10240
10241         return 0;
10242 }
10243
10244 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10245 {
10246         int ret;
10247
10248         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10249         if (ret)
10250                 return ret;
10251
10252         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10253 }
10254
10255 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10256 {
10257         struct btrfs_block_group_cache *cache;
10258         struct btrfs_path *path;
10259         struct extent_buffer *leaf;
10260         struct btrfs_chunk *chunk;
10261         struct btrfs_key key;
10262         int ret;
10263         u64 start;
10264
10265         path = btrfs_alloc_path();
10266         if (!path)
10267                 return -ENOMEM;
10268
10269         key.objectid = 0;
10270         key.type = BTRFS_CHUNK_ITEM_KEY;
10271         key.offset = 0;
10272
10273         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10274         if (ret < 0) {
10275                 btrfs_free_path(path);
10276                 return ret;
10277         }
10278
10279         /*
10280          * We do this in case the block groups were screwed up and had alloc
10281          * bits that aren't actually set on the chunks.  This happens with
10282          * restored images every time and could happen in real life I guess.
10283          */
10284         fs_info->avail_data_alloc_bits = 0;
10285         fs_info->avail_metadata_alloc_bits = 0;
10286         fs_info->avail_system_alloc_bits = 0;
10287
10288         /* First we need to create the in-memory block groups */
10289         while (1) {
10290                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10291                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10292                         if (ret < 0) {
10293                                 btrfs_free_path(path);
10294                                 return ret;
10295                         }
10296                         if (ret) {
10297                                 ret = 0;
10298                                 break;
10299                         }
10300                 }
10301                 leaf = path->nodes[0];
10302                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10303                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10304                         path->slots[0]++;
10305                         continue;
10306                 }
10307
10308                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10309                                        struct btrfs_chunk);
10310                 btrfs_add_block_group(fs_info, 0,
10311                                       btrfs_chunk_type(leaf, chunk),
10312                                       key.objectid, key.offset,
10313                                       btrfs_chunk_length(leaf, chunk));
10314                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10315                                  key.offset + btrfs_chunk_length(leaf, chunk),
10316                                  GFP_NOFS);
10317                 path->slots[0]++;
10318         }
10319         start = 0;
10320         while (1) {
10321                 cache = btrfs_lookup_first_block_group(fs_info, start);
10322                 if (!cache)
10323                         break;
10324                 cache->cached = 1;
10325                 start = cache->key.objectid + cache->key.offset;
10326         }
10327
10328         btrfs_free_path(path);
10329         return 0;
10330 }
10331
10332 static int reset_balance(struct btrfs_trans_handle *trans,
10333                          struct btrfs_fs_info *fs_info)
10334 {
10335         struct btrfs_root *root = fs_info->tree_root;
10336         struct btrfs_path *path;
10337         struct extent_buffer *leaf;
10338         struct btrfs_key key;
10339         int del_slot, del_nr = 0;
10340         int ret;
10341         int found = 0;
10342
10343         path = btrfs_alloc_path();
10344         if (!path)
10345                 return -ENOMEM;
10346
10347         key.objectid = BTRFS_BALANCE_OBJECTID;
10348         key.type = BTRFS_BALANCE_ITEM_KEY;
10349         key.offset = 0;
10350
10351         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10352         if (ret) {
10353                 if (ret > 0)
10354                         ret = 0;
10355                 if (!ret)
10356                         goto reinit_data_reloc;
10357                 else
10358                         goto out;
10359         }
10360
10361         ret = btrfs_del_item(trans, root, path);
10362         if (ret)
10363                 goto out;
10364         btrfs_release_path(path);
10365
10366         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10367         key.type = BTRFS_ROOT_ITEM_KEY;
10368         key.offset = 0;
10369
10370         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10371         if (ret < 0)
10372                 goto out;
10373         while (1) {
10374                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10375                         if (!found)
10376                                 break;
10377
10378                         if (del_nr) {
10379                                 ret = btrfs_del_items(trans, root, path,
10380                                                       del_slot, del_nr);
10381                                 del_nr = 0;
10382                                 if (ret)
10383                                         goto out;
10384                         }
10385                         key.offset++;
10386                         btrfs_release_path(path);
10387
10388                         found = 0;
10389                         ret = btrfs_search_slot(trans, root, &key, path,
10390                                                 -1, 1);
10391                         if (ret < 0)
10392                                 goto out;
10393                         continue;
10394                 }
10395                 found = 1;
10396                 leaf = path->nodes[0];
10397                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10398                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10399                         break;
10400                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10401                         path->slots[0]++;
10402                         continue;
10403                 }
10404                 if (!del_nr) {
10405                         del_slot = path->slots[0];
10406                         del_nr = 1;
10407                 } else {
10408                         del_nr++;
10409                 }
10410                 path->slots[0]++;
10411         }
10412
10413         if (del_nr) {
10414                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10415                 if (ret)
10416                         goto out;
10417         }
10418         btrfs_release_path(path);
10419
10420 reinit_data_reloc:
10421         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10422         key.type = BTRFS_ROOT_ITEM_KEY;
10423         key.offset = (u64)-1;
10424         root = btrfs_read_fs_root(fs_info, &key);
10425         if (IS_ERR(root)) {
10426                 fprintf(stderr, "Error reading data reloc tree\n");
10427                 ret = PTR_ERR(root);
10428                 goto out;
10429         }
10430         record_root_in_trans(trans, root);
10431         ret = btrfs_fsck_reinit_root(trans, root, 0);
10432         if (ret)
10433                 goto out;
10434         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10435 out:
10436         btrfs_free_path(path);
10437         return ret;
10438 }
10439
10440 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10441                               struct btrfs_fs_info *fs_info)
10442 {
10443         u64 start = 0;
10444         int ret;
10445
10446         /*
10447          * The only reason we don't do this is because right now we're just
10448          * walking the trees we find and pinning down their bytes, we don't look
10449          * at any of the leaves.  In order to do mixed groups we'd have to check
10450          * the leaves of any fs roots and pin down the bytes for any file
10451          * extents we find.  Not hard but why do it if we don't have to?
10452          */
10453         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10454                 fprintf(stderr, "We don't support re-initing the extent tree "
10455                         "for mixed block groups yet, please notify a btrfs "
10456                         "developer you want to do this so they can add this "
10457                         "functionality.\n");
10458                 return -EINVAL;
10459         }
10460
10461         /*
10462          * first we need to walk all of the trees except the extent tree and pin
10463          * down the bytes that are in use so we don't overwrite any existing
10464          * metadata.
10465          */
10466         ret = pin_metadata_blocks(fs_info);
10467         if (ret) {
10468                 fprintf(stderr, "error pinning down used bytes\n");
10469                 return ret;
10470         }
10471
10472         /*
10473          * Need to drop all the block groups since we're going to recreate all
10474          * of them again.
10475          */
10476         btrfs_free_block_groups(fs_info);
10477         ret = reset_block_groups(fs_info);
10478         if (ret) {
10479                 fprintf(stderr, "error resetting the block groups\n");
10480                 return ret;
10481         }
10482
10483         /* Ok we can allocate now, reinit the extent root */
10484         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10485         if (ret) {
10486                 fprintf(stderr, "extent root initialization failed\n");
10487                 /*
10488                  * When the transaction code is updated we should end the
10489                  * transaction, but for now progs only knows about commit so
10490                  * just return an error.
10491                  */
10492                 return ret;
10493         }
10494
10495         /*
10496          * Now we have all the in-memory block groups setup so we can make
10497          * allocations properly, and the metadata we care about is safe since we
10498          * pinned all of it above.
10499          */
10500         while (1) {
10501                 struct btrfs_block_group_cache *cache;
10502
10503                 cache = btrfs_lookup_first_block_group(fs_info, start);
10504                 if (!cache)
10505                         break;
10506                 start = cache->key.objectid + cache->key.offset;
10507                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10508                                         &cache->key, &cache->item,
10509                                         sizeof(cache->item));
10510                 if (ret) {
10511                         fprintf(stderr, "Error adding block group\n");
10512                         return ret;
10513                 }
10514                 btrfs_extent_post_op(trans, fs_info->extent_root);
10515         }
10516
10517         ret = reset_balance(trans, fs_info);
10518         if (ret)
10519                 fprintf(stderr, "error resetting the pending balance\n");
10520
10521         return ret;
10522 }
10523
10524 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10525 {
10526         struct btrfs_path *path;
10527         struct btrfs_trans_handle *trans;
10528         struct btrfs_key key;
10529         int ret;
10530
10531         printf("Recowing metadata block %llu\n", eb->start);
10532         key.objectid = btrfs_header_owner(eb);
10533         key.type = BTRFS_ROOT_ITEM_KEY;
10534         key.offset = (u64)-1;
10535
10536         root = btrfs_read_fs_root(root->fs_info, &key);
10537         if (IS_ERR(root)) {
10538                 fprintf(stderr, "Couldn't find owner root %llu\n",
10539                         key.objectid);
10540                 return PTR_ERR(root);
10541         }
10542
10543         path = btrfs_alloc_path();
10544         if (!path)
10545                 return -ENOMEM;
10546
10547         trans = btrfs_start_transaction(root, 1);
10548         if (IS_ERR(trans)) {
10549                 btrfs_free_path(path);
10550                 return PTR_ERR(trans);
10551         }
10552
10553         path->lowest_level = btrfs_header_level(eb);
10554         if (path->lowest_level)
10555                 btrfs_node_key_to_cpu(eb, &key, 0);
10556         else
10557                 btrfs_item_key_to_cpu(eb, &key, 0);
10558
10559         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10560         btrfs_commit_transaction(trans, root);
10561         btrfs_free_path(path);
10562         return ret;
10563 }
10564
10565 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10566 {
10567         struct btrfs_path *path;
10568         struct btrfs_trans_handle *trans;
10569         struct btrfs_key key;
10570         int ret;
10571
10572         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10573                bad->key.type, bad->key.offset);
10574         key.objectid = bad->root_id;
10575         key.type = BTRFS_ROOT_ITEM_KEY;
10576         key.offset = (u64)-1;
10577
10578         root = btrfs_read_fs_root(root->fs_info, &key);
10579         if (IS_ERR(root)) {
10580                 fprintf(stderr, "Couldn't find owner root %llu\n",
10581                         key.objectid);
10582                 return PTR_ERR(root);
10583         }
10584
10585         path = btrfs_alloc_path();
10586         if (!path)
10587                 return -ENOMEM;
10588
10589         trans = btrfs_start_transaction(root, 1);
10590         if (IS_ERR(trans)) {
10591                 btrfs_free_path(path);
10592                 return PTR_ERR(trans);
10593         }
10594
10595         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10596         if (ret) {
10597                 if (ret > 0)
10598                         ret = 0;
10599                 goto out;
10600         }
10601         ret = btrfs_del_item(trans, root, path);
10602 out:
10603         btrfs_commit_transaction(trans, root);
10604         btrfs_free_path(path);
10605         return ret;
10606 }
10607
10608 static int zero_log_tree(struct btrfs_root *root)
10609 {
10610         struct btrfs_trans_handle *trans;
10611         int ret;
10612
10613         trans = btrfs_start_transaction(root, 1);
10614         if (IS_ERR(trans)) {
10615                 ret = PTR_ERR(trans);
10616                 return ret;
10617         }
10618         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10619         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10620         ret = btrfs_commit_transaction(trans, root);
10621         return ret;
10622 }
10623
10624 static int populate_csum(struct btrfs_trans_handle *trans,
10625                          struct btrfs_root *csum_root, char *buf, u64 start,
10626                          u64 len)
10627 {
10628         u64 offset = 0;
10629         u64 sectorsize;
10630         int ret = 0;
10631
10632         while (offset < len) {
10633                 sectorsize = csum_root->sectorsize;
10634                 ret = read_extent_data(csum_root, buf, start + offset,
10635                                        &sectorsize, 0);
10636                 if (ret)
10637                         break;
10638                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10639                                             start + offset, buf, sectorsize);
10640                 if (ret)
10641                         break;
10642                 offset += sectorsize;
10643         }
10644         return ret;
10645 }
10646
10647 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10648                                       struct btrfs_root *csum_root,
10649                                       struct btrfs_root *cur_root)
10650 {
10651         struct btrfs_path *path;
10652         struct btrfs_key key;
10653         struct extent_buffer *node;
10654         struct btrfs_file_extent_item *fi;
10655         char *buf = NULL;
10656         u64 start = 0;
10657         u64 len = 0;
10658         int slot = 0;
10659         int ret = 0;
10660
10661         path = btrfs_alloc_path();
10662         if (!path)
10663                 return -ENOMEM;
10664         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10665         if (!buf) {
10666                 ret = -ENOMEM;
10667                 goto out;
10668         }
10669
10670         key.objectid = 0;
10671         key.offset = 0;
10672         key.type = 0;
10673
10674         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10675         if (ret < 0)
10676                 goto out;
10677         /* Iterate all regular file extents and fill its csum */
10678         while (1) {
10679                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10680
10681                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10682                         goto next;
10683                 node = path->nodes[0];
10684                 slot = path->slots[0];
10685                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10686                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10687                         goto next;
10688                 start = btrfs_file_extent_disk_bytenr(node, fi);
10689                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10690
10691                 ret = populate_csum(trans, csum_root, buf, start, len);
10692                 if (ret == -EEXIST)
10693                         ret = 0;
10694                 if (ret < 0)
10695                         goto out;
10696 next:
10697                 /*
10698                  * TODO: if next leaf is corrupted, jump to nearest next valid
10699                  * leaf.
10700                  */
10701                 ret = btrfs_next_item(cur_root, path);
10702                 if (ret < 0)
10703                         goto out;
10704                 if (ret > 0) {
10705                         ret = 0;
10706                         goto out;
10707                 }
10708         }
10709
10710 out:
10711         btrfs_free_path(path);
10712         free(buf);
10713         return ret;
10714 }
10715
10716 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10717                                   struct btrfs_root *csum_root)
10718 {
10719         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10720         struct btrfs_path *path;
10721         struct btrfs_root *tree_root = fs_info->tree_root;
10722         struct btrfs_root *cur_root;
10723         struct extent_buffer *node;
10724         struct btrfs_key key;
10725         int slot = 0;
10726         int ret = 0;
10727
10728         path = btrfs_alloc_path();
10729         if (!path)
10730                 return -ENOMEM;
10731
10732         key.objectid = BTRFS_FS_TREE_OBJECTID;
10733         key.offset = 0;
10734         key.type = BTRFS_ROOT_ITEM_KEY;
10735
10736         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10737         if (ret < 0)
10738                 goto out;
10739         if (ret > 0) {
10740                 ret = -ENOENT;
10741                 goto out;
10742         }
10743
10744         while (1) {
10745                 node = path->nodes[0];
10746                 slot = path->slots[0];
10747                 btrfs_item_key_to_cpu(node, &key, slot);
10748                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10749                         goto out;
10750                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10751                         goto next;
10752                 if (!is_fstree(key.objectid))
10753                         goto next;
10754                 key.offset = (u64)-1;
10755
10756                 cur_root = btrfs_read_fs_root(fs_info, &key);
10757                 if (IS_ERR(cur_root) || !cur_root) {
10758                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10759                                 key.objectid);
10760                         goto out;
10761                 }
10762                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10763                                 cur_root);
10764                 if (ret < 0)
10765                         goto out;
10766 next:
10767                 ret = btrfs_next_item(tree_root, path);
10768                 if (ret > 0) {
10769                         ret = 0;
10770                         goto out;
10771                 }
10772                 if (ret < 0)
10773                         goto out;
10774         }
10775
10776 out:
10777         btrfs_free_path(path);
10778         return ret;
10779 }
10780
10781 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10782                                       struct btrfs_root *csum_root)
10783 {
10784         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10785         struct btrfs_path *path;
10786         struct btrfs_extent_item *ei;
10787         struct extent_buffer *leaf;
10788         char *buf;
10789         struct btrfs_key key;
10790         int ret;
10791
10792         path = btrfs_alloc_path();
10793         if (!path)
10794                 return -ENOMEM;
10795
10796         key.objectid = 0;
10797         key.type = BTRFS_EXTENT_ITEM_KEY;
10798         key.offset = 0;
10799
10800         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10801         if (ret < 0) {
10802                 btrfs_free_path(path);
10803                 return ret;
10804         }
10805
10806         buf = malloc(csum_root->sectorsize);
10807         if (!buf) {
10808                 btrfs_free_path(path);
10809                 return -ENOMEM;
10810         }
10811
10812         while (1) {
10813                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10814                         ret = btrfs_next_leaf(extent_root, path);
10815                         if (ret < 0)
10816                                 break;
10817                         if (ret) {
10818                                 ret = 0;
10819                                 break;
10820                         }
10821                 }
10822                 leaf = path->nodes[0];
10823
10824                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10825                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10826                         path->slots[0]++;
10827                         continue;
10828                 }
10829
10830                 ei = btrfs_item_ptr(leaf, path->slots[0],
10831                                     struct btrfs_extent_item);
10832                 if (!(btrfs_extent_flags(leaf, ei) &
10833                       BTRFS_EXTENT_FLAG_DATA)) {
10834                         path->slots[0]++;
10835                         continue;
10836                 }
10837
10838                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10839                                     key.offset);
10840                 if (ret)
10841                         break;
10842                 path->slots[0]++;
10843         }
10844
10845         btrfs_free_path(path);
10846         free(buf);
10847         return ret;
10848 }
10849
10850 /*
10851  * Recalculate the csum and put it into the csum tree.
10852  *
10853  * Extent tree init will wipe out all the extent info, so in that case, we
10854  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10855  * will use fs/subvol trees to init the csum tree.
10856  */
10857 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10858                           struct btrfs_root *csum_root,
10859                           int search_fs_tree)
10860 {
10861         if (search_fs_tree)
10862                 return fill_csum_tree_from_fs(trans, csum_root);
10863         else
10864                 return fill_csum_tree_from_extent(trans, csum_root);
10865 }
10866
10867 static void free_roots_info_cache(void)
10868 {
10869         if (!roots_info_cache)
10870                 return;
10871
10872         while (!cache_tree_empty(roots_info_cache)) {
10873                 struct cache_extent *entry;
10874                 struct root_item_info *rii;
10875
10876                 entry = first_cache_extent(roots_info_cache);
10877                 if (!entry)
10878                         break;
10879                 remove_cache_extent(roots_info_cache, entry);
10880                 rii = container_of(entry, struct root_item_info, cache_extent);
10881                 free(rii);
10882         }
10883
10884         free(roots_info_cache);
10885         roots_info_cache = NULL;
10886 }
10887
10888 static int build_roots_info_cache(struct btrfs_fs_info *info)
10889 {
10890         int ret = 0;
10891         struct btrfs_key key;
10892         struct extent_buffer *leaf;
10893         struct btrfs_path *path;
10894
10895         if (!roots_info_cache) {
10896                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10897                 if (!roots_info_cache)
10898                         return -ENOMEM;
10899                 cache_tree_init(roots_info_cache);
10900         }
10901
10902         path = btrfs_alloc_path();
10903         if (!path)
10904                 return -ENOMEM;
10905
10906         key.objectid = 0;
10907         key.type = BTRFS_EXTENT_ITEM_KEY;
10908         key.offset = 0;
10909
10910         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10911         if (ret < 0)
10912                 goto out;
10913         leaf = path->nodes[0];
10914
10915         while (1) {
10916                 struct btrfs_key found_key;
10917                 struct btrfs_extent_item *ei;
10918                 struct btrfs_extent_inline_ref *iref;
10919                 int slot = path->slots[0];
10920                 int type;
10921                 u64 flags;
10922                 u64 root_id;
10923                 u8 level;
10924                 struct cache_extent *entry;
10925                 struct root_item_info *rii;
10926
10927                 if (slot >= btrfs_header_nritems(leaf)) {
10928                         ret = btrfs_next_leaf(info->extent_root, path);
10929                         if (ret < 0) {
10930                                 break;
10931                         } else if (ret) {
10932                                 ret = 0;
10933                                 break;
10934                         }
10935                         leaf = path->nodes[0];
10936                         slot = path->slots[0];
10937                 }
10938
10939                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10940
10941                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10942                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10943                         goto next;
10944
10945                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10946                 flags = btrfs_extent_flags(leaf, ei);
10947
10948                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10949                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10950                         goto next;
10951
10952                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10953                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10954                         level = found_key.offset;
10955                 } else {
10956                         struct btrfs_tree_block_info *binfo;
10957
10958                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10959                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10960                         level = btrfs_tree_block_level(leaf, binfo);
10961                 }
10962
10963                 /*
10964                  * For a root extent, it must be of the following type and the
10965                  * first (and only one) iref in the item.
10966                  */
10967                 type = btrfs_extent_inline_ref_type(leaf, iref);
10968                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10969                         goto next;
10970
10971                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10972                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10973                 if (!entry) {
10974                         rii = malloc(sizeof(struct root_item_info));
10975                         if (!rii) {
10976                                 ret = -ENOMEM;
10977                                 goto out;
10978                         }
10979                         rii->cache_extent.start = root_id;
10980                         rii->cache_extent.size = 1;
10981                         rii->level = (u8)-1;
10982                         entry = &rii->cache_extent;
10983                         ret = insert_cache_extent(roots_info_cache, entry);
10984                         ASSERT(ret == 0);
10985                 } else {
10986                         rii = container_of(entry, struct root_item_info,
10987                                            cache_extent);
10988                 }
10989
10990                 ASSERT(rii->cache_extent.start == root_id);
10991                 ASSERT(rii->cache_extent.size == 1);
10992
10993                 if (level > rii->level || rii->level == (u8)-1) {
10994                         rii->level = level;
10995                         rii->bytenr = found_key.objectid;
10996                         rii->gen = btrfs_extent_generation(leaf, ei);
10997                         rii->node_count = 1;
10998                 } else if (level == rii->level) {
10999                         rii->node_count++;
11000                 }
11001 next:
11002                 path->slots[0]++;
11003         }
11004
11005 out:
11006         btrfs_free_path(path);
11007
11008         return ret;
11009 }
11010
11011 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11012                                   struct btrfs_path *path,
11013                                   const struct btrfs_key *root_key,
11014                                   const int read_only_mode)
11015 {
11016         const u64 root_id = root_key->objectid;
11017         struct cache_extent *entry;
11018         struct root_item_info *rii;
11019         struct btrfs_root_item ri;
11020         unsigned long offset;
11021
11022         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11023         if (!entry) {
11024                 fprintf(stderr,
11025                         "Error: could not find extent items for root %llu\n",
11026                         root_key->objectid);
11027                 return -ENOENT;
11028         }
11029
11030         rii = container_of(entry, struct root_item_info, cache_extent);
11031         ASSERT(rii->cache_extent.start == root_id);
11032         ASSERT(rii->cache_extent.size == 1);
11033
11034         if (rii->node_count != 1) {
11035                 fprintf(stderr,
11036                         "Error: could not find btree root extent for root %llu\n",
11037                         root_id);
11038                 return -ENOENT;
11039         }
11040
11041         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11042         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11043
11044         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11045             btrfs_root_level(&ri) != rii->level ||
11046             btrfs_root_generation(&ri) != rii->gen) {
11047
11048                 /*
11049                  * If we're in repair mode but our caller told us to not update
11050                  * the root item, i.e. just check if it needs to be updated, don't
11051                  * print this message, since the caller will call us again shortly
11052                  * for the same root item without read only mode (the caller will
11053                  * open a transaction first).
11054                  */
11055                 if (!(read_only_mode && repair))
11056                         fprintf(stderr,
11057                                 "%sroot item for root %llu,"
11058                                 " current bytenr %llu, current gen %llu, current level %u,"
11059                                 " new bytenr %llu, new gen %llu, new level %u\n",
11060                                 (read_only_mode ? "" : "fixing "),
11061                                 root_id,
11062                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11063                                 btrfs_root_level(&ri),
11064                                 rii->bytenr, rii->gen, rii->level);
11065
11066                 if (btrfs_root_generation(&ri) > rii->gen) {
11067                         fprintf(stderr,
11068                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11069                                 root_id, btrfs_root_generation(&ri), rii->gen);
11070                         return -EINVAL;
11071                 }
11072
11073                 if (!read_only_mode) {
11074                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11075                         btrfs_set_root_level(&ri, rii->level);
11076                         btrfs_set_root_generation(&ri, rii->gen);
11077                         write_extent_buffer(path->nodes[0], &ri,
11078                                             offset, sizeof(ri));
11079                 }
11080
11081                 return 1;
11082         }
11083
11084         return 0;
11085 }
11086
11087 /*
11088  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11089  * caused read-only snapshots to be corrupted if they were created at a moment
11090  * when the source subvolume/snapshot had orphan items. The issue was that the
11091  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11092  * node instead of the post orphan cleanup root node.
11093  * So this function, and its callees, just detects and fixes those cases. Even
11094  * though the regression was for read-only snapshots, this function applies to
11095  * any snapshot/subvolume root.
11096  * This must be run before any other repair code - not doing it so, makes other
11097  * repair code delete or modify backrefs in the extent tree for example, which
11098  * will result in an inconsistent fs after repairing the root items.
11099  */
11100 static int repair_root_items(struct btrfs_fs_info *info)
11101 {
11102         struct btrfs_path *path = NULL;
11103         struct btrfs_key key;
11104         struct extent_buffer *leaf;
11105         struct btrfs_trans_handle *trans = NULL;
11106         int ret = 0;
11107         int bad_roots = 0;
11108         int need_trans = 0;
11109
11110         ret = build_roots_info_cache(info);
11111         if (ret)
11112                 goto out;
11113
11114         path = btrfs_alloc_path();
11115         if (!path) {
11116                 ret = -ENOMEM;
11117                 goto out;
11118         }
11119
11120         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11121         key.type = BTRFS_ROOT_ITEM_KEY;
11122         key.offset = 0;
11123
11124 again:
11125         /*
11126          * Avoid opening and committing transactions if a leaf doesn't have
11127          * any root items that need to be fixed, so that we avoid rotating
11128          * backup roots unnecessarily.
11129          */
11130         if (need_trans) {
11131                 trans = btrfs_start_transaction(info->tree_root, 1);
11132                 if (IS_ERR(trans)) {
11133                         ret = PTR_ERR(trans);
11134                         goto out;
11135                 }
11136         }
11137
11138         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11139                                 0, trans ? 1 : 0);
11140         if (ret < 0)
11141                 goto out;
11142         leaf = path->nodes[0];
11143
11144         while (1) {
11145                 struct btrfs_key found_key;
11146
11147                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11148                         int no_more_keys = find_next_key(path, &key);
11149
11150                         btrfs_release_path(path);
11151                         if (trans) {
11152                                 ret = btrfs_commit_transaction(trans,
11153                                                                info->tree_root);
11154                                 trans = NULL;
11155                                 if (ret < 0)
11156                                         goto out;
11157                         }
11158                         need_trans = 0;
11159                         if (no_more_keys)
11160                                 break;
11161                         goto again;
11162                 }
11163
11164                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11165
11166                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11167                         goto next;
11168                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11169                         goto next;
11170
11171                 ret = maybe_repair_root_item(info, path, &found_key,
11172                                              trans ? 0 : 1);
11173                 if (ret < 0)
11174                         goto out;
11175                 if (ret) {
11176                         if (!trans && repair) {
11177                                 need_trans = 1;
11178                                 key = found_key;
11179                                 btrfs_release_path(path);
11180                                 goto again;
11181                         }
11182                         bad_roots++;
11183                 }
11184 next:
11185                 path->slots[0]++;
11186         }
11187         ret = 0;
11188 out:
11189         free_roots_info_cache();
11190         btrfs_free_path(path);
11191         if (trans)
11192                 btrfs_commit_transaction(trans, info->tree_root);
11193         if (ret < 0)
11194                 return ret;
11195
11196         return bad_roots;
11197 }
11198
11199 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11200 {
11201         struct btrfs_trans_handle *trans;
11202         struct btrfs_block_group_cache *bg_cache;
11203         u64 current = 0;
11204         int ret = 0;
11205
11206         /* Clear all free space cache inodes and its extent data */
11207         while (1) {
11208                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11209                 if (!bg_cache)
11210                         break;
11211                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11212                 if (ret < 0)
11213                         return ret;
11214                 current = bg_cache->key.objectid + bg_cache->key.offset;
11215         }
11216
11217         /* Don't forget to set cache_generation to -1 */
11218         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11219         if (IS_ERR(trans)) {
11220                 error("failed to update super block cache generation");
11221                 return PTR_ERR(trans);
11222         }
11223         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11224         btrfs_commit_transaction(trans, fs_info->tree_root);
11225
11226         return ret;
11227 }
11228
11229 const char * const cmd_check_usage[] = {
11230         "btrfs check [options] <device>",
11231         "Check structural integrity of a filesystem (unmounted).",
11232         "Check structural integrity of an unmounted filesystem. Verify internal",
11233         "trees' consistency and item connectivity. In the repair mode try to",
11234         "fix the problems found. ",
11235         "WARNING: the repair mode is considered dangerous",
11236         "",
11237         "-s|--super <superblock>     use this superblock copy",
11238         "-b|--backup                 use the first valid backup root copy",
11239         "--repair                    try to repair the filesystem",
11240         "--readonly                  run in read-only mode (default)",
11241         "--init-csum-tree            create a new CRC tree",
11242         "--init-extent-tree          create a new extent tree",
11243         "--mode <MODE>               allows choice of memory/IO trade-offs",
11244         "                            where MODE is one of:",
11245         "                            original - read inodes and extents to memory (requires",
11246         "                                       more memory, does less IO)",
11247         "                            lowmem   - try to use less memory but read blocks again",
11248         "                                       when needed",
11249         "--check-data-csum           verify checksums of data blocks",
11250         "-Q|--qgroup-report          print a report on qgroup consistency",
11251         "-E|--subvol-extents <subvolid>",
11252         "                            print subvolume extents and sharing state",
11253         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11254         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11255         "-p|--progress               indicate progress",
11256         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11257         "                            NOTE: v1 support implemented",
11258         NULL
11259 };
11260
11261 int cmd_check(int argc, char **argv)
11262 {
11263         struct cache_tree root_cache;
11264         struct btrfs_root *root;
11265         struct btrfs_fs_info *info;
11266         u64 bytenr = 0;
11267         u64 subvolid = 0;
11268         u64 tree_root_bytenr = 0;
11269         u64 chunk_root_bytenr = 0;
11270         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11271         int ret;
11272         u64 num;
11273         int init_csum_tree = 0;
11274         int readonly = 0;
11275         int clear_space_cache = 0;
11276         int qgroup_report = 0;
11277         int qgroups_repaired = 0;
11278         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11279
11280         while(1) {
11281                 int c;
11282                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11283                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11284                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11285                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11286                 static const struct option long_options[] = {
11287                         { "super", required_argument, NULL, 's' },
11288                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11289                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11290                         { "init-csum-tree", no_argument, NULL,
11291                                 GETOPT_VAL_INIT_CSUM },
11292                         { "init-extent-tree", no_argument, NULL,
11293                                 GETOPT_VAL_INIT_EXTENT },
11294                         { "check-data-csum", no_argument, NULL,
11295                                 GETOPT_VAL_CHECK_CSUM },
11296                         { "backup", no_argument, NULL, 'b' },
11297                         { "subvol-extents", required_argument, NULL, 'E' },
11298                         { "qgroup-report", no_argument, NULL, 'Q' },
11299                         { "tree-root", required_argument, NULL, 'r' },
11300                         { "chunk-root", required_argument, NULL,
11301                                 GETOPT_VAL_CHUNK_TREE },
11302                         { "progress", no_argument, NULL, 'p' },
11303                         { "mode", required_argument, NULL,
11304                                 GETOPT_VAL_MODE },
11305                         { "clear-space-cache", required_argument, NULL,
11306                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11307                         { NULL, 0, NULL, 0}
11308                 };
11309
11310                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11311                 if (c < 0)
11312                         break;
11313                 switch(c) {
11314                         case 'a': /* ignored */ break;
11315                         case 'b':
11316                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11317                                 break;
11318                         case 's':
11319                                 num = arg_strtou64(optarg);
11320                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11321                                         error(
11322                                         "super mirror should be less than %d",
11323                                                 BTRFS_SUPER_MIRROR_MAX);
11324                                         exit(1);
11325                                 }
11326                                 bytenr = btrfs_sb_offset(((int)num));
11327                                 printf("using SB copy %llu, bytenr %llu\n", num,
11328                                        (unsigned long long)bytenr);
11329                                 break;
11330                         case 'Q':
11331                                 qgroup_report = 1;
11332                                 break;
11333                         case 'E':
11334                                 subvolid = arg_strtou64(optarg);
11335                                 break;
11336                         case 'r':
11337                                 tree_root_bytenr = arg_strtou64(optarg);
11338                                 break;
11339                         case GETOPT_VAL_CHUNK_TREE:
11340                                 chunk_root_bytenr = arg_strtou64(optarg);
11341                                 break;
11342                         case 'p':
11343                                 ctx.progress_enabled = true;
11344                                 break;
11345                         case '?':
11346                         case 'h':
11347                                 usage(cmd_check_usage);
11348                         case GETOPT_VAL_REPAIR:
11349                                 printf("enabling repair mode\n");
11350                                 repair = 1;
11351                                 ctree_flags |= OPEN_CTREE_WRITES;
11352                                 break;
11353                         case GETOPT_VAL_READONLY:
11354                                 readonly = 1;
11355                                 break;
11356                         case GETOPT_VAL_INIT_CSUM:
11357                                 printf("Creating a new CRC tree\n");
11358                                 init_csum_tree = 1;
11359                                 repair = 1;
11360                                 ctree_flags |= OPEN_CTREE_WRITES;
11361                                 break;
11362                         case GETOPT_VAL_INIT_EXTENT:
11363                                 init_extent_tree = 1;
11364                                 ctree_flags |= (OPEN_CTREE_WRITES |
11365                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11366                                 repair = 1;
11367                                 break;
11368                         case GETOPT_VAL_CHECK_CSUM:
11369                                 check_data_csum = 1;
11370                                 break;
11371                         case GETOPT_VAL_MODE:
11372                                 check_mode = parse_check_mode(optarg);
11373                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11374                                         error("unknown mode: %s", optarg);
11375                                         exit(1);
11376                                 }
11377                                 break;
11378                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11379                                 if (strcmp(optarg, "v1") != 0) {
11380                                         error(
11381                         "only v1 support implmented, unrecognized value %s",
11382                         optarg);
11383                                         exit(1);
11384                                 }
11385                                 clear_space_cache = 1;
11386                                 ctree_flags |= OPEN_CTREE_WRITES;
11387                                 break;
11388                 }
11389         }
11390
11391         if (check_argc_exact(argc - optind, 1))
11392                 usage(cmd_check_usage);
11393
11394         if (ctx.progress_enabled) {
11395                 ctx.tp = TASK_NOTHING;
11396                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11397         }
11398
11399         /* This check is the only reason for --readonly to exist */
11400         if (readonly && repair) {
11401                 error("repair options are not compatible with --readonly");
11402                 exit(1);
11403         }
11404
11405         /*
11406          * Not supported yet
11407          */
11408         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11409                 error("low memory mode doesn't support repair yet");
11410                 exit(1);
11411         }
11412
11413         radix_tree_init();
11414         cache_tree_init(&root_cache);
11415
11416         if((ret = check_mounted(argv[optind])) < 0) {
11417                 error("could not check mount status: %s", strerror(-ret));
11418                 goto err_out;
11419         } else if(ret) {
11420                 error("%s is currently mounted, aborting", argv[optind]);
11421                 ret = -EBUSY;
11422                 goto err_out;
11423         }
11424
11425         /* only allow partial opening under repair mode */
11426         if (repair)
11427                 ctree_flags |= OPEN_CTREE_PARTIAL;
11428
11429         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11430                                   chunk_root_bytenr, ctree_flags);
11431         if (!info) {
11432                 error("cannot open file system");
11433                 ret = -EIO;
11434                 goto err_out;
11435         }
11436
11437         global_info = info;
11438         root = info->fs_root;
11439         if (clear_space_cache) {
11440                 if (btrfs_fs_compat_ro(info,
11441                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11442                         error(
11443                         "free space cache v2 detected, clearing not implemented");
11444                         ret = 1;
11445                         goto close_out;
11446                 }
11447                 printf("Clearing free space cache\n");
11448                 ret = clear_free_space_cache(info);
11449                 if (ret) {
11450                         error("failed to clear free space cache");
11451                         ret = 1;
11452                 } else {
11453                         printf("Free space cache cleared\n");
11454                 }
11455                 goto close_out;
11456         }
11457
11458         /*
11459          * repair mode will force us to commit transaction which
11460          * will make us fail to load log tree when mounting.
11461          */
11462         if (repair && btrfs_super_log_root(info->super_copy)) {
11463                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11464                 if (!ret) {
11465                         ret = 1;
11466                         goto close_out;
11467                 }
11468                 ret = zero_log_tree(root);
11469                 if (ret) {
11470                         error("failed to zero log tree: %d", ret);
11471                         goto close_out;
11472                 }
11473         }
11474
11475         uuid_unparse(info->super_copy->fsid, uuidbuf);
11476         if (qgroup_report) {
11477                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11478                        uuidbuf);
11479                 ret = qgroup_verify_all(info);
11480                 if (ret == 0)
11481                         report_qgroups(1);
11482                 goto close_out;
11483         }
11484         if (subvolid) {
11485                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11486                        subvolid, argv[optind], uuidbuf);
11487                 ret = print_extent_state(info, subvolid);
11488                 goto close_out;
11489         }
11490         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11491
11492         if (!extent_buffer_uptodate(info->tree_root->node) ||
11493             !extent_buffer_uptodate(info->dev_root->node) ||
11494             !extent_buffer_uptodate(info->chunk_root->node)) {
11495                 error("critical roots corrupted, unable to check the filesystem");
11496                 ret = -EIO;
11497                 goto close_out;
11498         }
11499
11500         if (init_extent_tree || init_csum_tree) {
11501                 struct btrfs_trans_handle *trans;
11502
11503                 trans = btrfs_start_transaction(info->extent_root, 0);
11504                 if (IS_ERR(trans)) {
11505                         error("error starting transaction");
11506                         ret = PTR_ERR(trans);
11507                         goto close_out;
11508                 }
11509
11510                 if (init_extent_tree) {
11511                         printf("Creating a new extent tree\n");
11512                         ret = reinit_extent_tree(trans, info);
11513                         if (ret)
11514                                 goto close_out;
11515                 }
11516
11517                 if (init_csum_tree) {
11518                         printf("Reinitialize checksum tree\n");
11519                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11520                         if (ret) {
11521                                 error("checksum tree initialization failed: %d",
11522                                                 ret);
11523                                 ret = -EIO;
11524                                 goto close_out;
11525                         }
11526
11527                         ret = fill_csum_tree(trans, info->csum_root,
11528                                              init_extent_tree);
11529                         if (ret) {
11530                                 error("checksum tree refilling failed: %d", ret);
11531                                 return -EIO;
11532                         }
11533                 }
11534                 /*
11535                  * Ok now we commit and run the normal fsck, which will add
11536                  * extent entries for all of the items it finds.
11537                  */
11538                 ret = btrfs_commit_transaction(trans, info->extent_root);
11539                 if (ret)
11540                         goto close_out;
11541         }
11542         if (!extent_buffer_uptodate(info->extent_root->node)) {
11543                 error("critical: extent_root, unable to check the filesystem");
11544                 ret = -EIO;
11545                 goto close_out;
11546         }
11547         if (!extent_buffer_uptodate(info->csum_root->node)) {
11548                 error("critical: csum_root, unable to check the filesystem");
11549                 ret = -EIO;
11550                 goto close_out;
11551         }
11552
11553         if (!ctx.progress_enabled)
11554                 printf("checking extents");
11555         if (check_mode == CHECK_MODE_LOWMEM)
11556                 ret = check_chunks_and_extents_v2(root);
11557         else
11558                 ret = check_chunks_and_extents(root);
11559         if (ret)
11560                 printf("Errors found in extent allocation tree or chunk allocation");
11561
11562         ret = repair_root_items(info);
11563         if (ret < 0)
11564                 goto close_out;
11565         if (repair) {
11566                 fprintf(stderr, "Fixed %d roots.\n", ret);
11567                 ret = 0;
11568         } else if (ret > 0) {
11569                 fprintf(stderr,
11570                        "Found %d roots with an outdated root item.\n",
11571                        ret);
11572                 fprintf(stderr,
11573                         "Please run a filesystem check with the option --repair to fix them.\n");
11574                 ret = 1;
11575                 goto close_out;
11576         }
11577
11578         if (!ctx.progress_enabled) {
11579                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11580                         fprintf(stderr, "checking free space tree\n");
11581                 else
11582                         fprintf(stderr, "checking free space cache\n");
11583         }
11584         ret = check_space_cache(root);
11585         if (ret)
11586                 goto out;
11587
11588         /*
11589          * We used to have to have these hole extents in between our real
11590          * extents so if we don't have this flag set we need to make sure there
11591          * are no gaps in the file extents for inodes, otherwise we can just
11592          * ignore it when this happens.
11593          */
11594         no_holes = btrfs_fs_incompat(root->fs_info,
11595                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11596         if (!ctx.progress_enabled)
11597                 fprintf(stderr, "checking fs roots\n");
11598         ret = check_fs_roots(root, &root_cache);
11599         if (ret)
11600                 goto out;
11601
11602         fprintf(stderr, "checking csums\n");
11603         ret = check_csums(root);
11604         if (ret)
11605                 goto out;
11606
11607         fprintf(stderr, "checking root refs\n");
11608         ret = check_root_refs(root, &root_cache);
11609         if (ret)
11610                 goto out;
11611
11612         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11613                 struct extent_buffer *eb;
11614
11615                 eb = list_first_entry(&root->fs_info->recow_ebs,
11616                                       struct extent_buffer, recow);
11617                 list_del_init(&eb->recow);
11618                 ret = recow_extent_buffer(root, eb);
11619                 if (ret)
11620                         break;
11621         }
11622
11623         while (!list_empty(&delete_items)) {
11624                 struct bad_item *bad;
11625
11626                 bad = list_first_entry(&delete_items, struct bad_item, list);
11627                 list_del_init(&bad->list);
11628                 if (repair)
11629                         ret = delete_bad_item(root, bad);
11630                 free(bad);
11631         }
11632
11633         if (info->quota_enabled) {
11634                 int err;
11635                 fprintf(stderr, "checking quota groups\n");
11636                 err = qgroup_verify_all(info);
11637                 if (err)
11638                         goto out;
11639                 report_qgroups(0);
11640                 err = repair_qgroups(info, &qgroups_repaired);
11641                 if (err)
11642                         goto out;
11643         }
11644
11645         if (!list_empty(&root->fs_info->recow_ebs)) {
11646                 error("transid errors in file system");
11647                 ret = 1;
11648         }
11649 out:
11650         /* Don't override original ret */
11651         if (!ret && qgroups_repaired)
11652                 ret = qgroups_repaired;
11653
11654         if (found_old_backref) { /*
11655                  * there was a disk format change when mixed
11656                  * backref was in testing tree. The old format
11657                  * existed about one week.
11658                  */
11659                 printf("\n * Found old mixed backref format. "
11660                        "The old format is not supported! *"
11661                        "\n * Please mount the FS in readonly mode, "
11662                        "backup data and re-format the FS. *\n\n");
11663                 ret = 1;
11664         }
11665         printf("found %llu bytes used err is %d\n",
11666                (unsigned long long)bytes_used, ret);
11667         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11668         printf("total tree bytes: %llu\n",
11669                (unsigned long long)total_btree_bytes);
11670         printf("total fs tree bytes: %llu\n",
11671                (unsigned long long)total_fs_tree_bytes);
11672         printf("total extent tree bytes: %llu\n",
11673                (unsigned long long)total_extent_tree_bytes);
11674         printf("btree space waste bytes: %llu\n",
11675                (unsigned long long)btree_space_waste);
11676         printf("file data blocks allocated: %llu\n referenced %llu\n",
11677                 (unsigned long long)data_bytes_allocated,
11678                 (unsigned long long)data_bytes_referenced);
11679
11680         free_qgroup_counts();
11681         free_root_recs_tree(&root_cache);
11682 close_out:
11683         close_ctree(root);
11684 err_out:
11685         if (ctx.progress_enabled)
11686                 task_deinit(ctx.info);
11687
11688         return ret;
11689 }