Revert "btrfs-progs: check: supplement extent backref list with rbtree"
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         unsigned int filetype:8;
189         int errors;
190         unsigned int ref_type;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         size_t size;
663         int ret;
664
665         rec = malloc(sizeof(*rec));
666         if (!rec)
667                 return ERR_PTR(-ENOMEM);
668         memcpy(rec, orig_rec, sizeof(*rec));
669         rec->refs = 1;
670         INIT_LIST_HEAD(&rec->backrefs);
671         INIT_LIST_HEAD(&rec->orphan_extents);
672         rec->holes = RB_ROOT;
673
674         list_for_each_entry(orig, &orig_rec->backrefs, list) {
675                 size = sizeof(*orig) + orig->namelen + 1;
676                 backref = malloc(size);
677                 if (!backref) {
678                         ret = -ENOMEM;
679                         goto cleanup;
680                 }
681                 memcpy(backref, orig, size);
682                 list_add_tail(&backref->list, &rec->backrefs);
683         }
684         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
685                 dst_orphan = malloc(sizeof(*dst_orphan));
686                 if (!dst_orphan) {
687                         ret = -ENOMEM;
688                         goto cleanup;
689                 }
690                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
691                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
692         }
693         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
694         BUG_ON(ret < 0);
695
696         return rec;
697
698 cleanup:
699         if (!list_empty(&rec->backrefs))
700                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
701                         list_del(&orig->list);
702                         free(orig);
703                 }
704
705         if (!list_empty(&rec->orphan_extents))
706                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
707                         list_del(&orig->list);
708                         free(orig);
709                 }
710
711         free(rec);
712
713         return ERR_PTR(ret);
714 }
715
716 static void print_orphan_data_extents(struct list_head *orphan_extents,
717                                       u64 objectid)
718 {
719         struct orphan_data_extent *orphan;
720
721         if (list_empty(orphan_extents))
722                 return;
723         printf("The following data extent is lost in tree %llu:\n",
724                objectid);
725         list_for_each_entry(orphan, orphan_extents, list) {
726                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
727                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
728                        orphan->disk_len);
729         }
730 }
731
732 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
733 {
734         u64 root_objectid = root->root_key.objectid;
735         int errors = rec->errors;
736
737         if (!errors)
738                 return;
739         /* reloc root errors, we print its corresponding fs root objectid*/
740         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
741                 root_objectid = root->root_key.offset;
742                 fprintf(stderr, "reloc");
743         }
744         fprintf(stderr, "root %llu inode %llu errors %x",
745                 (unsigned long long) root_objectid,
746                 (unsigned long long) rec->ino, rec->errors);
747
748         if (errors & I_ERR_NO_INODE_ITEM)
749                 fprintf(stderr, ", no inode item");
750         if (errors & I_ERR_NO_ORPHAN_ITEM)
751                 fprintf(stderr, ", no orphan item");
752         if (errors & I_ERR_DUP_INODE_ITEM)
753                 fprintf(stderr, ", dup inode item");
754         if (errors & I_ERR_DUP_DIR_INDEX)
755                 fprintf(stderr, ", dup dir index");
756         if (errors & I_ERR_ODD_DIR_ITEM)
757                 fprintf(stderr, ", odd dir item");
758         if (errors & I_ERR_ODD_FILE_EXTENT)
759                 fprintf(stderr, ", odd file extent");
760         if (errors & I_ERR_BAD_FILE_EXTENT)
761                 fprintf(stderr, ", bad file extent");
762         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
763                 fprintf(stderr, ", file extent overlap");
764         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
765                 fprintf(stderr, ", file extent discount");
766         if (errors & I_ERR_DIR_ISIZE_WRONG)
767                 fprintf(stderr, ", dir isize wrong");
768         if (errors & I_ERR_FILE_NBYTES_WRONG)
769                 fprintf(stderr, ", nbytes wrong");
770         if (errors & I_ERR_ODD_CSUM_ITEM)
771                 fprintf(stderr, ", odd csum item");
772         if (errors & I_ERR_SOME_CSUM_MISSING)
773                 fprintf(stderr, ", some csum missing");
774         if (errors & I_ERR_LINK_COUNT_WRONG)
775                 fprintf(stderr, ", link count wrong");
776         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
777                 fprintf(stderr, ", orphan file extent");
778         fprintf(stderr, "\n");
779         /* Print the orphan extents if needed */
780         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
781                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
782
783         /* Print the holes if needed */
784         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
785                 struct file_extent_hole *hole;
786                 struct rb_node *node;
787                 int found = 0;
788
789                 node = rb_first(&rec->holes);
790                 fprintf(stderr, "Found file extent holes:\n");
791                 while (node) {
792                         found = 1;
793                         hole = rb_entry(node, struct file_extent_hole, node);
794                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
795                                 hole->start, hole->len);
796                         node = rb_next(node);
797                 }
798                 if (!found)
799                         fprintf(stderr, "\tstart: 0, len: %llu\n",
800                                 round_up(rec->isize, root->sectorsize));
801         }
802 }
803
804 static void print_ref_error(int errors)
805 {
806         if (errors & REF_ERR_NO_DIR_ITEM)
807                 fprintf(stderr, ", no dir item");
808         if (errors & REF_ERR_NO_DIR_INDEX)
809                 fprintf(stderr, ", no dir index");
810         if (errors & REF_ERR_NO_INODE_REF)
811                 fprintf(stderr, ", no inode ref");
812         if (errors & REF_ERR_DUP_DIR_ITEM)
813                 fprintf(stderr, ", dup dir item");
814         if (errors & REF_ERR_DUP_DIR_INDEX)
815                 fprintf(stderr, ", dup dir index");
816         if (errors & REF_ERR_DUP_INODE_REF)
817                 fprintf(stderr, ", dup inode ref");
818         if (errors & REF_ERR_INDEX_UNMATCH)
819                 fprintf(stderr, ", index mismatch");
820         if (errors & REF_ERR_FILETYPE_UNMATCH)
821                 fprintf(stderr, ", filetype mismatch");
822         if (errors & REF_ERR_NAME_TOO_LONG)
823                 fprintf(stderr, ", name too long");
824         if (errors & REF_ERR_NO_ROOT_REF)
825                 fprintf(stderr, ", no root ref");
826         if (errors & REF_ERR_NO_ROOT_BACKREF)
827                 fprintf(stderr, ", no root backref");
828         if (errors & REF_ERR_DUP_ROOT_REF)
829                 fprintf(stderr, ", dup root ref");
830         if (errors & REF_ERR_DUP_ROOT_BACKREF)
831                 fprintf(stderr, ", dup root backref");
832         fprintf(stderr, "\n");
833 }
834
835 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
836                                           u64 ino, int mod)
837 {
838         struct ptr_node *node;
839         struct cache_extent *cache;
840         struct inode_record *rec = NULL;
841         int ret;
842
843         cache = lookup_cache_extent(inode_cache, ino, 1);
844         if (cache) {
845                 node = container_of(cache, struct ptr_node, cache);
846                 rec = node->data;
847                 if (mod && rec->refs > 1) {
848                         node->data = clone_inode_rec(rec);
849                         if (IS_ERR(node->data))
850                                 return node->data;
851                         rec->refs--;
852                         rec = node->data;
853                 }
854         } else if (mod) {
855                 rec = calloc(1, sizeof(*rec));
856                 if (!rec)
857                         return ERR_PTR(-ENOMEM);
858                 rec->ino = ino;
859                 rec->extent_start = (u64)-1;
860                 rec->refs = 1;
861                 INIT_LIST_HEAD(&rec->backrefs);
862                 INIT_LIST_HEAD(&rec->orphan_extents);
863                 rec->holes = RB_ROOT;
864
865                 node = malloc(sizeof(*node));
866                 if (!node) {
867                         free(rec);
868                         return ERR_PTR(-ENOMEM);
869                 }
870                 node->cache.start = ino;
871                 node->cache.size = 1;
872                 node->data = rec;
873
874                 if (ino == BTRFS_FREE_INO_OBJECTID)
875                         rec->found_link = 1;
876
877                 ret = insert_cache_extent(inode_cache, &node->cache);
878                 if (ret)
879                         return ERR_PTR(-EEXIST);
880         }
881         return rec;
882 }
883
884 static void free_orphan_data_extents(struct list_head *orphan_extents)
885 {
886         struct orphan_data_extent *orphan;
887
888         while (!list_empty(orphan_extents)) {
889                 orphan = list_entry(orphan_extents->next,
890                                     struct orphan_data_extent, list);
891                 list_del(&orphan->list);
892                 free(orphan);
893         }
894 }
895
896 static void free_inode_rec(struct inode_record *rec)
897 {
898         struct inode_backref *backref;
899
900         if (--rec->refs > 0)
901                 return;
902
903         while (!list_empty(&rec->backrefs)) {
904                 backref = to_inode_backref(rec->backrefs.next);
905                 list_del(&backref->list);
906                 free(backref);
907         }
908         free_orphan_data_extents(&rec->orphan_extents);
909         free_file_extent_holes(&rec->holes);
910         free(rec);
911 }
912
913 static int can_free_inode_rec(struct inode_record *rec)
914 {
915         if (!rec->errors && rec->checked && rec->found_inode_item &&
916             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
917                 return 1;
918         return 0;
919 }
920
921 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
922                                  struct inode_record *rec)
923 {
924         struct cache_extent *cache;
925         struct inode_backref *tmp, *backref;
926         struct ptr_node *node;
927         unsigned char filetype;
928
929         if (!rec->found_inode_item)
930                 return;
931
932         filetype = imode_to_type(rec->imode);
933         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
934                 if (backref->found_dir_item && backref->found_dir_index) {
935                         if (backref->filetype != filetype)
936                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
937                         if (!backref->errors && backref->found_inode_ref &&
938                             rec->nlink == rec->found_link) {
939                                 list_del(&backref->list);
940                                 free(backref);
941                         }
942                 }
943         }
944
945         if (!rec->checked || rec->merging)
946                 return;
947
948         if (S_ISDIR(rec->imode)) {
949                 if (rec->found_size != rec->isize)
950                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
951                 if (rec->found_file_extent)
952                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
953         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
954                 if (rec->found_dir_item)
955                         rec->errors |= I_ERR_ODD_DIR_ITEM;
956                 if (rec->found_size != rec->nbytes)
957                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
958                 if (rec->nlink > 0 && !no_holes &&
959                     (rec->extent_end < rec->isize ||
960                      first_extent_gap(&rec->holes) < rec->isize))
961                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
962         }
963
964         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
965                 if (rec->found_csum_item && rec->nodatasum)
966                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
967                 if (rec->some_csum_missing && !rec->nodatasum)
968                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
969         }
970
971         BUG_ON(rec->refs != 1);
972         if (can_free_inode_rec(rec)) {
973                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
974                 node = container_of(cache, struct ptr_node, cache);
975                 BUG_ON(node->data != rec);
976                 remove_cache_extent(inode_cache, &node->cache);
977                 free(node);
978                 free_inode_rec(rec);
979         }
980 }
981
982 static int check_orphan_item(struct btrfs_root *root, u64 ino)
983 {
984         struct btrfs_path path;
985         struct btrfs_key key;
986         int ret;
987
988         key.objectid = BTRFS_ORPHAN_OBJECTID;
989         key.type = BTRFS_ORPHAN_ITEM_KEY;
990         key.offset = ino;
991
992         btrfs_init_path(&path);
993         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
994         btrfs_release_path(&path);
995         if (ret > 0)
996                 ret = -ENOENT;
997         return ret;
998 }
999
1000 static int process_inode_item(struct extent_buffer *eb,
1001                               int slot, struct btrfs_key *key,
1002                               struct shared_node *active_node)
1003 {
1004         struct inode_record *rec;
1005         struct btrfs_inode_item *item;
1006
1007         rec = active_node->current;
1008         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1009         if (rec->found_inode_item) {
1010                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1011                 return 1;
1012         }
1013         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1014         rec->nlink = btrfs_inode_nlink(eb, item);
1015         rec->isize = btrfs_inode_size(eb, item);
1016         rec->nbytes = btrfs_inode_nbytes(eb, item);
1017         rec->imode = btrfs_inode_mode(eb, item);
1018         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1019                 rec->nodatasum = 1;
1020         rec->found_inode_item = 1;
1021         if (rec->nlink == 0)
1022                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1023         maybe_free_inode_rec(&active_node->inode_cache, rec);
1024         return 0;
1025 }
1026
1027 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1028                                                 const char *name,
1029                                                 int namelen, u64 dir)
1030 {
1031         struct inode_backref *backref;
1032
1033         list_for_each_entry(backref, &rec->backrefs, list) {
1034                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1035                         break;
1036                 if (backref->dir != dir || backref->namelen != namelen)
1037                         continue;
1038                 if (memcmp(name, backref->name, namelen))
1039                         continue;
1040                 return backref;
1041         }
1042
1043         backref = malloc(sizeof(*backref) + namelen + 1);
1044         if (!backref)
1045                 return NULL;
1046         memset(backref, 0, sizeof(*backref));
1047         backref->dir = dir;
1048         backref->namelen = namelen;
1049         memcpy(backref->name, name, namelen);
1050         backref->name[namelen] = '\0';
1051         list_add_tail(&backref->list, &rec->backrefs);
1052         return backref;
1053 }
1054
1055 static int add_inode_backref(struct cache_tree *inode_cache,
1056                              u64 ino, u64 dir, u64 index,
1057                              const char *name, int namelen,
1058                              int filetype, int itemtype, int errors)
1059 {
1060         struct inode_record *rec;
1061         struct inode_backref *backref;
1062
1063         rec = get_inode_rec(inode_cache, ino, 1);
1064         BUG_ON(IS_ERR(rec));
1065         backref = get_inode_backref(rec, name, namelen, dir);
1066         BUG_ON(!backref);
1067         if (errors)
1068                 backref->errors |= errors;
1069         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1070                 if (backref->found_dir_index)
1071                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1072                 if (backref->found_inode_ref && backref->index != index)
1073                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1074                 if (backref->found_dir_item && backref->filetype != filetype)
1075                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1076
1077                 backref->index = index;
1078                 backref->filetype = filetype;
1079                 backref->found_dir_index = 1;
1080         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1081                 rec->found_link++;
1082                 if (backref->found_dir_item)
1083                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1084                 if (backref->found_dir_index && backref->filetype != filetype)
1085                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1086
1087                 backref->filetype = filetype;
1088                 backref->found_dir_item = 1;
1089         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1090                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1091                 if (backref->found_inode_ref)
1092                         backref->errors |= REF_ERR_DUP_INODE_REF;
1093                 if (backref->found_dir_index && backref->index != index)
1094                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1095                 else
1096                         backref->index = index;
1097
1098                 backref->ref_type = itemtype;
1099                 backref->found_inode_ref = 1;
1100         } else {
1101                 BUG_ON(1);
1102         }
1103
1104         maybe_free_inode_rec(inode_cache, rec);
1105         return 0;
1106 }
1107
1108 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1109                             struct cache_tree *dst_cache)
1110 {
1111         struct inode_backref *backref;
1112         u32 dir_count = 0;
1113         int ret = 0;
1114
1115         dst->merging = 1;
1116         list_for_each_entry(backref, &src->backrefs, list) {
1117                 if (backref->found_dir_index) {
1118                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1119                                         backref->index, backref->name,
1120                                         backref->namelen, backref->filetype,
1121                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1122                 }
1123                 if (backref->found_dir_item) {
1124                         dir_count++;
1125                         add_inode_backref(dst_cache, dst->ino,
1126                                         backref->dir, 0, backref->name,
1127                                         backref->namelen, backref->filetype,
1128                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1129                 }
1130                 if (backref->found_inode_ref) {
1131                         add_inode_backref(dst_cache, dst->ino,
1132                                         backref->dir, backref->index,
1133                                         backref->name, backref->namelen, 0,
1134                                         backref->ref_type, backref->errors);
1135                 }
1136         }
1137
1138         if (src->found_dir_item)
1139                 dst->found_dir_item = 1;
1140         if (src->found_file_extent)
1141                 dst->found_file_extent = 1;
1142         if (src->found_csum_item)
1143                 dst->found_csum_item = 1;
1144         if (src->some_csum_missing)
1145                 dst->some_csum_missing = 1;
1146         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1147                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1148                 if (ret < 0)
1149                         return ret;
1150         }
1151
1152         BUG_ON(src->found_link < dir_count);
1153         dst->found_link += src->found_link - dir_count;
1154         dst->found_size += src->found_size;
1155         if (src->extent_start != (u64)-1) {
1156                 if (dst->extent_start == (u64)-1) {
1157                         dst->extent_start = src->extent_start;
1158                         dst->extent_end = src->extent_end;
1159                 } else {
1160                         if (dst->extent_end > src->extent_start)
1161                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1162                         else if (dst->extent_end < src->extent_start) {
1163                                 ret = add_file_extent_hole(&dst->holes,
1164                                         dst->extent_end,
1165                                         src->extent_start - dst->extent_end);
1166                         }
1167                         if (dst->extent_end < src->extent_end)
1168                                 dst->extent_end = src->extent_end;
1169                 }
1170         }
1171
1172         dst->errors |= src->errors;
1173         if (src->found_inode_item) {
1174                 if (!dst->found_inode_item) {
1175                         dst->nlink = src->nlink;
1176                         dst->isize = src->isize;
1177                         dst->nbytes = src->nbytes;
1178                         dst->imode = src->imode;
1179                         dst->nodatasum = src->nodatasum;
1180                         dst->found_inode_item = 1;
1181                 } else {
1182                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1183                 }
1184         }
1185         dst->merging = 0;
1186
1187         return 0;
1188 }
1189
1190 static int splice_shared_node(struct shared_node *src_node,
1191                               struct shared_node *dst_node)
1192 {
1193         struct cache_extent *cache;
1194         struct ptr_node *node, *ins;
1195         struct cache_tree *src, *dst;
1196         struct inode_record *rec, *conflict;
1197         u64 current_ino = 0;
1198         int splice = 0;
1199         int ret;
1200
1201         if (--src_node->refs == 0)
1202                 splice = 1;
1203         if (src_node->current)
1204                 current_ino = src_node->current->ino;
1205
1206         src = &src_node->root_cache;
1207         dst = &dst_node->root_cache;
1208 again:
1209         cache = search_cache_extent(src, 0);
1210         while (cache) {
1211                 node = container_of(cache, struct ptr_node, cache);
1212                 rec = node->data;
1213                 cache = next_cache_extent(cache);
1214
1215                 if (splice) {
1216                         remove_cache_extent(src, &node->cache);
1217                         ins = node;
1218                 } else {
1219                         ins = malloc(sizeof(*ins));
1220                         BUG_ON(!ins);
1221                         ins->cache.start = node->cache.start;
1222                         ins->cache.size = node->cache.size;
1223                         ins->data = rec;
1224                         rec->refs++;
1225                 }
1226                 ret = insert_cache_extent(dst, &ins->cache);
1227                 if (ret == -EEXIST) {
1228                         conflict = get_inode_rec(dst, rec->ino, 1);
1229                         BUG_ON(IS_ERR(conflict));
1230                         merge_inode_recs(rec, conflict, dst);
1231                         if (rec->checked) {
1232                                 conflict->checked = 1;
1233                                 if (dst_node->current == conflict)
1234                                         dst_node->current = NULL;
1235                         }
1236                         maybe_free_inode_rec(dst, conflict);
1237                         free_inode_rec(rec);
1238                         free(ins);
1239                 } else {
1240                         BUG_ON(ret);
1241                 }
1242         }
1243
1244         if (src == &src_node->root_cache) {
1245                 src = &src_node->inode_cache;
1246                 dst = &dst_node->inode_cache;
1247                 goto again;
1248         }
1249
1250         if (current_ino > 0 && (!dst_node->current ||
1251             current_ino > dst_node->current->ino)) {
1252                 if (dst_node->current) {
1253                         dst_node->current->checked = 1;
1254                         maybe_free_inode_rec(dst, dst_node->current);
1255                 }
1256                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1257                 BUG_ON(IS_ERR(dst_node->current));
1258         }
1259         return 0;
1260 }
1261
1262 static void free_inode_ptr(struct cache_extent *cache)
1263 {
1264         struct ptr_node *node;
1265         struct inode_record *rec;
1266
1267         node = container_of(cache, struct ptr_node, cache);
1268         rec = node->data;
1269         free_inode_rec(rec);
1270         free(node);
1271 }
1272
1273 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1274
1275 static struct shared_node *find_shared_node(struct cache_tree *shared,
1276                                             u64 bytenr)
1277 {
1278         struct cache_extent *cache;
1279         struct shared_node *node;
1280
1281         cache = lookup_cache_extent(shared, bytenr, 1);
1282         if (cache) {
1283                 node = container_of(cache, struct shared_node, cache);
1284                 return node;
1285         }
1286         return NULL;
1287 }
1288
1289 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1290 {
1291         int ret;
1292         struct shared_node *node;
1293
1294         node = calloc(1, sizeof(*node));
1295         if (!node)
1296                 return -ENOMEM;
1297         node->cache.start = bytenr;
1298         node->cache.size = 1;
1299         cache_tree_init(&node->root_cache);
1300         cache_tree_init(&node->inode_cache);
1301         node->refs = refs;
1302
1303         ret = insert_cache_extent(shared, &node->cache);
1304
1305         return ret;
1306 }
1307
1308 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1309                              struct walk_control *wc, int level)
1310 {
1311         struct shared_node *node;
1312         struct shared_node *dest;
1313         int ret;
1314
1315         if (level == wc->active_node)
1316                 return 0;
1317
1318         BUG_ON(wc->active_node <= level);
1319         node = find_shared_node(&wc->shared, bytenr);
1320         if (!node) {
1321                 ret = add_shared_node(&wc->shared, bytenr, refs);
1322                 BUG_ON(ret);
1323                 node = find_shared_node(&wc->shared, bytenr);
1324                 wc->nodes[level] = node;
1325                 wc->active_node = level;
1326                 return 0;
1327         }
1328
1329         if (wc->root_level == wc->active_node &&
1330             btrfs_root_refs(&root->root_item) == 0) {
1331                 if (--node->refs == 0) {
1332                         free_inode_recs_tree(&node->root_cache);
1333                         free_inode_recs_tree(&node->inode_cache);
1334                         remove_cache_extent(&wc->shared, &node->cache);
1335                         free(node);
1336                 }
1337                 return 1;
1338         }
1339
1340         dest = wc->nodes[wc->active_node];
1341         splice_shared_node(node, dest);
1342         if (node->refs == 0) {
1343                 remove_cache_extent(&wc->shared, &node->cache);
1344                 free(node);
1345         }
1346         return 1;
1347 }
1348
1349 static int leave_shared_node(struct btrfs_root *root,
1350                              struct walk_control *wc, int level)
1351 {
1352         struct shared_node *node;
1353         struct shared_node *dest;
1354         int i;
1355
1356         if (level == wc->root_level)
1357                 return 0;
1358
1359         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1360                 if (wc->nodes[i])
1361                         break;
1362         }
1363         BUG_ON(i >= BTRFS_MAX_LEVEL);
1364
1365         node = wc->nodes[wc->active_node];
1366         wc->nodes[wc->active_node] = NULL;
1367         wc->active_node = i;
1368
1369         dest = wc->nodes[wc->active_node];
1370         if (wc->active_node < wc->root_level ||
1371             btrfs_root_refs(&root->root_item) > 0) {
1372                 BUG_ON(node->refs <= 1);
1373                 splice_shared_node(node, dest);
1374         } else {
1375                 BUG_ON(node->refs < 2);
1376                 node->refs--;
1377         }
1378         return 0;
1379 }
1380
1381 /*
1382  * Returns:
1383  * < 0 - on error
1384  * 1   - if the root with id child_root_id is a child of root parent_root_id
1385  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1386  *       has other root(s) as parent(s)
1387  * 2   - if the root child_root_id doesn't have any parent roots
1388  */
1389 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1390                          u64 child_root_id)
1391 {
1392         struct btrfs_path path;
1393         struct btrfs_key key;
1394         struct extent_buffer *leaf;
1395         int has_parent = 0;
1396         int ret;
1397
1398         btrfs_init_path(&path);
1399
1400         key.objectid = parent_root_id;
1401         key.type = BTRFS_ROOT_REF_KEY;
1402         key.offset = child_root_id;
1403         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1404                                 0, 0);
1405         if (ret < 0)
1406                 return ret;
1407         btrfs_release_path(&path);
1408         if (!ret)
1409                 return 1;
1410
1411         key.objectid = child_root_id;
1412         key.type = BTRFS_ROOT_BACKREF_KEY;
1413         key.offset = 0;
1414         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1415                                 0, 0);
1416         if (ret < 0)
1417                 goto out;
1418
1419         while (1) {
1420                 leaf = path.nodes[0];
1421                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1422                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1423                         if (ret)
1424                                 break;
1425                         leaf = path.nodes[0];
1426                 }
1427
1428                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1429                 if (key.objectid != child_root_id ||
1430                     key.type != BTRFS_ROOT_BACKREF_KEY)
1431                         break;
1432
1433                 has_parent = 1;
1434
1435                 if (key.offset == parent_root_id) {
1436                         btrfs_release_path(&path);
1437                         return 1;
1438                 }
1439
1440                 path.slots[0]++;
1441         }
1442 out:
1443         btrfs_release_path(&path);
1444         if (ret < 0)
1445                 return ret;
1446         return has_parent ? 0 : 2;
1447 }
1448
1449 static int process_dir_item(struct btrfs_root *root,
1450                             struct extent_buffer *eb,
1451                             int slot, struct btrfs_key *key,
1452                             struct shared_node *active_node)
1453 {
1454         u32 total;
1455         u32 cur = 0;
1456         u32 len;
1457         u32 name_len;
1458         u32 data_len;
1459         int error;
1460         int nritems = 0;
1461         int filetype;
1462         struct btrfs_dir_item *di;
1463         struct inode_record *rec;
1464         struct cache_tree *root_cache;
1465         struct cache_tree *inode_cache;
1466         struct btrfs_key location;
1467         char namebuf[BTRFS_NAME_LEN];
1468
1469         root_cache = &active_node->root_cache;
1470         inode_cache = &active_node->inode_cache;
1471         rec = active_node->current;
1472         rec->found_dir_item = 1;
1473
1474         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1475         total = btrfs_item_size_nr(eb, slot);
1476         while (cur < total) {
1477                 nritems++;
1478                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1479                 name_len = btrfs_dir_name_len(eb, di);
1480                 data_len = btrfs_dir_data_len(eb, di);
1481                 filetype = btrfs_dir_type(eb, di);
1482
1483                 rec->found_size += name_len;
1484                 if (name_len <= BTRFS_NAME_LEN) {
1485                         len = name_len;
1486                         error = 0;
1487                 } else {
1488                         len = BTRFS_NAME_LEN;
1489                         error = REF_ERR_NAME_TOO_LONG;
1490                 }
1491                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1492
1493                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1494                         add_inode_backref(inode_cache, location.objectid,
1495                                           key->objectid, key->offset, namebuf,
1496                                           len, filetype, key->type, error);
1497                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1498                         add_inode_backref(root_cache, location.objectid,
1499                                           key->objectid, key->offset,
1500                                           namebuf, len, filetype,
1501                                           key->type, error);
1502                 } else {
1503                         fprintf(stderr, "invalid location in dir item %u\n",
1504                                 location.type);
1505                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1506                                           key->objectid, key->offset, namebuf,
1507                                           len, filetype, key->type, error);
1508                 }
1509
1510                 len = sizeof(*di) + name_len + data_len;
1511                 di = (struct btrfs_dir_item *)((char *)di + len);
1512                 cur += len;
1513         }
1514         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1515                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1516
1517         return 0;
1518 }
1519
1520 static int process_inode_ref(struct extent_buffer *eb,
1521                              int slot, struct btrfs_key *key,
1522                              struct shared_node *active_node)
1523 {
1524         u32 total;
1525         u32 cur = 0;
1526         u32 len;
1527         u32 name_len;
1528         u64 index;
1529         int error;
1530         struct cache_tree *inode_cache;
1531         struct btrfs_inode_ref *ref;
1532         char namebuf[BTRFS_NAME_LEN];
1533
1534         inode_cache = &active_node->inode_cache;
1535
1536         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1537         total = btrfs_item_size_nr(eb, slot);
1538         while (cur < total) {
1539                 name_len = btrfs_inode_ref_name_len(eb, ref);
1540                 index = btrfs_inode_ref_index(eb, ref);
1541                 if (name_len <= BTRFS_NAME_LEN) {
1542                         len = name_len;
1543                         error = 0;
1544                 } else {
1545                         len = BTRFS_NAME_LEN;
1546                         error = REF_ERR_NAME_TOO_LONG;
1547                 }
1548                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1549                 add_inode_backref(inode_cache, key->objectid, key->offset,
1550                                   index, namebuf, len, 0, key->type, error);
1551
1552                 len = sizeof(*ref) + name_len;
1553                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1554                 cur += len;
1555         }
1556         return 0;
1557 }
1558
1559 static int process_inode_extref(struct extent_buffer *eb,
1560                                 int slot, struct btrfs_key *key,
1561                                 struct shared_node *active_node)
1562 {
1563         u32 total;
1564         u32 cur = 0;
1565         u32 len;
1566         u32 name_len;
1567         u64 index;
1568         u64 parent;
1569         int error;
1570         struct cache_tree *inode_cache;
1571         struct btrfs_inode_extref *extref;
1572         char namebuf[BTRFS_NAME_LEN];
1573
1574         inode_cache = &active_node->inode_cache;
1575
1576         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1577         total = btrfs_item_size_nr(eb, slot);
1578         while (cur < total) {
1579                 name_len = btrfs_inode_extref_name_len(eb, extref);
1580                 index = btrfs_inode_extref_index(eb, extref);
1581                 parent = btrfs_inode_extref_parent(eb, extref);
1582                 if (name_len <= BTRFS_NAME_LEN) {
1583                         len = name_len;
1584                         error = 0;
1585                 } else {
1586                         len = BTRFS_NAME_LEN;
1587                         error = REF_ERR_NAME_TOO_LONG;
1588                 }
1589                 read_extent_buffer(eb, namebuf,
1590                                    (unsigned long)(extref + 1), len);
1591                 add_inode_backref(inode_cache, key->objectid, parent,
1592                                   index, namebuf, len, 0, key->type, error);
1593
1594                 len = sizeof(*extref) + name_len;
1595                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1596                 cur += len;
1597         }
1598         return 0;
1599
1600 }
1601
1602 static int count_csum_range(struct btrfs_root *root, u64 start,
1603                             u64 len, u64 *found)
1604 {
1605         struct btrfs_key key;
1606         struct btrfs_path path;
1607         struct extent_buffer *leaf;
1608         int ret;
1609         size_t size;
1610         *found = 0;
1611         u64 csum_end;
1612         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1613
1614         btrfs_init_path(&path);
1615
1616         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1617         key.offset = start;
1618         key.type = BTRFS_EXTENT_CSUM_KEY;
1619
1620         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1621                                 &key, &path, 0, 0);
1622         if (ret < 0)
1623                 goto out;
1624         if (ret > 0 && path.slots[0] > 0) {
1625                 leaf = path.nodes[0];
1626                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1627                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1628                     key.type == BTRFS_EXTENT_CSUM_KEY)
1629                         path.slots[0]--;
1630         }
1631
1632         while (len > 0) {
1633                 leaf = path.nodes[0];
1634                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1635                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1636                         if (ret > 0)
1637                                 break;
1638                         else if (ret < 0)
1639                                 goto out;
1640                         leaf = path.nodes[0];
1641                 }
1642
1643                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1644                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1645                     key.type != BTRFS_EXTENT_CSUM_KEY)
1646                         break;
1647
1648                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1649                 if (key.offset >= start + len)
1650                         break;
1651
1652                 if (key.offset > start)
1653                         start = key.offset;
1654
1655                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1656                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1657                 if (csum_end > start) {
1658                         size = min(csum_end - start, len);
1659                         len -= size;
1660                         start += size;
1661                         *found += size;
1662                 }
1663
1664                 path.slots[0]++;
1665         }
1666 out:
1667         btrfs_release_path(&path);
1668         if (ret < 0)
1669                 return ret;
1670         return 0;
1671 }
1672
1673 static int process_file_extent(struct btrfs_root *root,
1674                                 struct extent_buffer *eb,
1675                                 int slot, struct btrfs_key *key,
1676                                 struct shared_node *active_node)
1677 {
1678         struct inode_record *rec;
1679         struct btrfs_file_extent_item *fi;
1680         u64 num_bytes = 0;
1681         u64 disk_bytenr = 0;
1682         u64 extent_offset = 0;
1683         u64 mask = root->sectorsize - 1;
1684         int extent_type;
1685         int ret;
1686
1687         rec = active_node->current;
1688         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1689         rec->found_file_extent = 1;
1690
1691         if (rec->extent_start == (u64)-1) {
1692                 rec->extent_start = key->offset;
1693                 rec->extent_end = key->offset;
1694         }
1695
1696         if (rec->extent_end > key->offset)
1697                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1698         else if (rec->extent_end < key->offset) {
1699                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1700                                            key->offset - rec->extent_end);
1701                 if (ret < 0)
1702                         return ret;
1703         }
1704
1705         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1706         extent_type = btrfs_file_extent_type(eb, fi);
1707
1708         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1709                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1710                 if (num_bytes == 0)
1711                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1712                 rec->found_size += num_bytes;
1713                 num_bytes = (num_bytes + mask) & ~mask;
1714         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1715                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1716                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1717                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1718                 extent_offset = btrfs_file_extent_offset(eb, fi);
1719                 if (num_bytes == 0 || (num_bytes & mask))
1720                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1721                 if (num_bytes + extent_offset >
1722                     btrfs_file_extent_ram_bytes(eb, fi))
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1725                     (btrfs_file_extent_compression(eb, fi) ||
1726                      btrfs_file_extent_encryption(eb, fi) ||
1727                      btrfs_file_extent_other_encoding(eb, fi)))
1728                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1729                 if (disk_bytenr > 0)
1730                         rec->found_size += num_bytes;
1731         } else {
1732                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733         }
1734         rec->extent_end = key->offset + num_bytes;
1735
1736         /*
1737          * The data reloc tree will copy full extents into its inode and then
1738          * copy the corresponding csums.  Because the extent it copied could be
1739          * a preallocated extent that hasn't been written to yet there may be no
1740          * csums to copy, ergo we won't have csums for our file extent.  This is
1741          * ok so just don't bother checking csums if the inode belongs to the
1742          * data reloc tree.
1743          */
1744         if (disk_bytenr > 0 &&
1745             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1746                 u64 found;
1747                 if (btrfs_file_extent_compression(eb, fi))
1748                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1749                 else
1750                         disk_bytenr += extent_offset;
1751
1752                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1753                 if (ret < 0)
1754                         return ret;
1755                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1756                         if (found > 0)
1757                                 rec->found_csum_item = 1;
1758                         if (found < num_bytes)
1759                                 rec->some_csum_missing = 1;
1760                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1761                         if (found > 0)
1762                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1763                 }
1764         }
1765         return 0;
1766 }
1767
1768 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1769                             struct walk_control *wc)
1770 {
1771         struct btrfs_key key;
1772         u32 nritems;
1773         int i;
1774         int ret = 0;
1775         struct cache_tree *inode_cache;
1776         struct shared_node *active_node;
1777
1778         if (wc->root_level == wc->active_node &&
1779             btrfs_root_refs(&root->root_item) == 0)
1780                 return 0;
1781
1782         active_node = wc->nodes[wc->active_node];
1783         inode_cache = &active_node->inode_cache;
1784         nritems = btrfs_header_nritems(eb);
1785         for (i = 0; i < nritems; i++) {
1786                 btrfs_item_key_to_cpu(eb, &key, i);
1787
1788                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1789                         continue;
1790                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1791                         continue;
1792
1793                 if (active_node->current == NULL ||
1794                     active_node->current->ino < key.objectid) {
1795                         if (active_node->current) {
1796                                 active_node->current->checked = 1;
1797                                 maybe_free_inode_rec(inode_cache,
1798                                                      active_node->current);
1799                         }
1800                         active_node->current = get_inode_rec(inode_cache,
1801                                                              key.objectid, 1);
1802                         BUG_ON(IS_ERR(active_node->current));
1803                 }
1804                 switch (key.type) {
1805                 case BTRFS_DIR_ITEM_KEY:
1806                 case BTRFS_DIR_INDEX_KEY:
1807                         ret = process_dir_item(root, eb, i, &key, active_node);
1808                         break;
1809                 case BTRFS_INODE_REF_KEY:
1810                         ret = process_inode_ref(eb, i, &key, active_node);
1811                         break;
1812                 case BTRFS_INODE_EXTREF_KEY:
1813                         ret = process_inode_extref(eb, i, &key, active_node);
1814                         break;
1815                 case BTRFS_INODE_ITEM_KEY:
1816                         ret = process_inode_item(eb, i, &key, active_node);
1817                         break;
1818                 case BTRFS_EXTENT_DATA_KEY:
1819                         ret = process_file_extent(root, eb, i, &key,
1820                                                   active_node);
1821                         break;
1822                 default:
1823                         break;
1824                 };
1825         }
1826         return ret;
1827 }
1828
1829 static void reada_walk_down(struct btrfs_root *root,
1830                             struct extent_buffer *node, int slot)
1831 {
1832         u64 bytenr;
1833         u64 ptr_gen;
1834         u32 nritems;
1835         u32 blocksize;
1836         int i;
1837         int level;
1838
1839         level = btrfs_header_level(node);
1840         if (level != 1)
1841                 return;
1842
1843         nritems = btrfs_header_nritems(node);
1844         blocksize = root->nodesize;
1845         for (i = slot; i < nritems; i++) {
1846                 bytenr = btrfs_node_blockptr(node, i);
1847                 ptr_gen = btrfs_node_ptr_generation(node, i);
1848                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1849         }
1850 }
1851
1852 /*
1853  * Check the child node/leaf by the following condition:
1854  * 1. the first item key of the node/leaf should be the same with the one
1855  *    in parent.
1856  * 2. block in parent node should match the child node/leaf.
1857  * 3. generation of parent node and child's header should be consistent.
1858  *
1859  * Or the child node/leaf pointed by the key in parent is not valid.
1860  *
1861  * We hope to check leaf owner too, but since subvol may share leaves,
1862  * which makes leaf owner check not so strong, key check should be
1863  * sufficient enough for that case.
1864  */
1865 static int check_child_node(struct btrfs_root *root,
1866                             struct extent_buffer *parent, int slot,
1867                             struct extent_buffer *child)
1868 {
1869         struct btrfs_key parent_key;
1870         struct btrfs_key child_key;
1871         int ret = 0;
1872
1873         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1874         if (btrfs_header_level(child) == 0)
1875                 btrfs_item_key_to_cpu(child, &child_key, 0);
1876         else
1877                 btrfs_node_key_to_cpu(child, &child_key, 0);
1878
1879         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1880                 ret = -EINVAL;
1881                 fprintf(stderr,
1882                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1883                         parent_key.objectid, parent_key.type, parent_key.offset,
1884                         child_key.objectid, child_key.type, child_key.offset);
1885         }
1886         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1887                 ret = -EINVAL;
1888                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1889                         btrfs_node_blockptr(parent, slot),
1890                         btrfs_header_bytenr(child));
1891         }
1892         if (btrfs_node_ptr_generation(parent, slot) !=
1893             btrfs_header_generation(child)) {
1894                 ret = -EINVAL;
1895                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1896                         btrfs_header_generation(child),
1897                         btrfs_node_ptr_generation(parent, slot));
1898         }
1899         return ret;
1900 }
1901
1902 struct node_refs {
1903         u64 bytenr[BTRFS_MAX_LEVEL];
1904         u64 refs[BTRFS_MAX_LEVEL];
1905 };
1906
1907 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1908                           struct walk_control *wc, int *level,
1909                           struct node_refs *nrefs)
1910 {
1911         enum btrfs_tree_block_status status;
1912         u64 bytenr;
1913         u64 ptr_gen;
1914         struct extent_buffer *next;
1915         struct extent_buffer *cur;
1916         u32 blocksize;
1917         int ret, err = 0;
1918         u64 refs;
1919
1920         WARN_ON(*level < 0);
1921         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1922
1923         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1924                 refs = nrefs->refs[*level];
1925                 ret = 0;
1926         } else {
1927                 ret = btrfs_lookup_extent_info(NULL, root,
1928                                        path->nodes[*level]->start,
1929                                        *level, 1, &refs, NULL);
1930                 if (ret < 0) {
1931                         err = ret;
1932                         goto out;
1933                 }
1934                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1935                 nrefs->refs[*level] = refs;
1936         }
1937
1938         if (refs > 1) {
1939                 ret = enter_shared_node(root, path->nodes[*level]->start,
1940                                         refs, wc, *level);
1941                 if (ret > 0) {
1942                         err = ret;
1943                         goto out;
1944                 }
1945         }
1946
1947         while (*level >= 0) {
1948                 WARN_ON(*level < 0);
1949                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1950                 cur = path->nodes[*level];
1951
1952                 if (btrfs_header_level(cur) != *level)
1953                         WARN_ON(1);
1954
1955                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1956                         break;
1957                 if (*level == 0) {
1958                         ret = process_one_leaf(root, cur, wc);
1959                         if (ret < 0)
1960                                 err = ret;
1961                         break;
1962                 }
1963                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1964                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1965                 blocksize = root->nodesize;
1966
1967                 if (bytenr == nrefs->bytenr[*level - 1]) {
1968                         refs = nrefs->refs[*level - 1];
1969                 } else {
1970                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1971                                         *level - 1, 1, &refs, NULL);
1972                         if (ret < 0) {
1973                                 refs = 0;
1974                         } else {
1975                                 nrefs->bytenr[*level - 1] = bytenr;
1976                                 nrefs->refs[*level - 1] = refs;
1977                         }
1978                 }
1979
1980                 if (refs > 1) {
1981                         ret = enter_shared_node(root, bytenr, refs,
1982                                                 wc, *level - 1);
1983                         if (ret > 0) {
1984                                 path->slots[*level]++;
1985                                 continue;
1986                         }
1987                 }
1988
1989                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1990                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1991                         free_extent_buffer(next);
1992                         reada_walk_down(root, cur, path->slots[*level]);
1993                         next = read_tree_block(root, bytenr, blocksize,
1994                                                ptr_gen);
1995                         if (!extent_buffer_uptodate(next)) {
1996                                 struct btrfs_key node_key;
1997
1998                                 btrfs_node_key_to_cpu(path->nodes[*level],
1999                                                       &node_key,
2000                                                       path->slots[*level]);
2001                                 btrfs_add_corrupt_extent_record(root->fs_info,
2002                                                 &node_key,
2003                                                 path->nodes[*level]->start,
2004                                                 root->nodesize, *level);
2005                                 err = -EIO;
2006                                 goto out;
2007                         }
2008                 }
2009
2010                 ret = check_child_node(root, cur, path->slots[*level], next);
2011                 if (ret) {
2012                         err = ret;
2013                         goto out;
2014                 }
2015
2016                 if (btrfs_is_leaf(next))
2017                         status = btrfs_check_leaf(root, NULL, next);
2018                 else
2019                         status = btrfs_check_node(root, NULL, next);
2020                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2021                         free_extent_buffer(next);
2022                         err = -EIO;
2023                         goto out;
2024                 }
2025
2026                 *level = *level - 1;
2027                 free_extent_buffer(path->nodes[*level]);
2028                 path->nodes[*level] = next;
2029                 path->slots[*level] = 0;
2030         }
2031 out:
2032         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2033         return err;
2034 }
2035
2036 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2037                         struct walk_control *wc, int *level)
2038 {
2039         int i;
2040         struct extent_buffer *leaf;
2041
2042         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2043                 leaf = path->nodes[i];
2044                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2045                         path->slots[i]++;
2046                         *level = i;
2047                         return 0;
2048                 } else {
2049                         free_extent_buffer(path->nodes[*level]);
2050                         path->nodes[*level] = NULL;
2051                         BUG_ON(*level > wc->active_node);
2052                         if (*level == wc->active_node)
2053                                 leave_shared_node(root, wc, *level);
2054                         *level = i + 1;
2055                 }
2056         }
2057         return 1;
2058 }
2059
2060 static int check_root_dir(struct inode_record *rec)
2061 {
2062         struct inode_backref *backref;
2063         int ret = -1;
2064
2065         if (!rec->found_inode_item || rec->errors)
2066                 goto out;
2067         if (rec->nlink != 1 || rec->found_link != 0)
2068                 goto out;
2069         if (list_empty(&rec->backrefs))
2070                 goto out;
2071         backref = to_inode_backref(rec->backrefs.next);
2072         if (!backref->found_inode_ref)
2073                 goto out;
2074         if (backref->index != 0 || backref->namelen != 2 ||
2075             memcmp(backref->name, "..", 2))
2076                 goto out;
2077         if (backref->found_dir_index || backref->found_dir_item)
2078                 goto out;
2079         ret = 0;
2080 out:
2081         return ret;
2082 }
2083
2084 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2085                               struct btrfs_root *root, struct btrfs_path *path,
2086                               struct inode_record *rec)
2087 {
2088         struct btrfs_inode_item *ei;
2089         struct btrfs_key key;
2090         int ret;
2091
2092         key.objectid = rec->ino;
2093         key.type = BTRFS_INODE_ITEM_KEY;
2094         key.offset = (u64)-1;
2095
2096         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2097         if (ret < 0)
2098                 goto out;
2099         if (ret) {
2100                 if (!path->slots[0]) {
2101                         ret = -ENOENT;
2102                         goto out;
2103                 }
2104                 path->slots[0]--;
2105                 ret = 0;
2106         }
2107         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2108         if (key.objectid != rec->ino) {
2109                 ret = -ENOENT;
2110                 goto out;
2111         }
2112
2113         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2114                             struct btrfs_inode_item);
2115         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2116         btrfs_mark_buffer_dirty(path->nodes[0]);
2117         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2118         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2119                root->root_key.objectid);
2120 out:
2121         btrfs_release_path(path);
2122         return ret;
2123 }
2124
2125 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2126                                     struct btrfs_root *root,
2127                                     struct btrfs_path *path,
2128                                     struct inode_record *rec)
2129 {
2130         int ret;
2131
2132         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2133         btrfs_release_path(path);
2134         if (!ret)
2135                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2136         return ret;
2137 }
2138
2139 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2140                                struct btrfs_root *root,
2141                                struct btrfs_path *path,
2142                                struct inode_record *rec)
2143 {
2144         struct btrfs_inode_item *ei;
2145         struct btrfs_key key;
2146         int ret = 0;
2147
2148         key.objectid = rec->ino;
2149         key.type = BTRFS_INODE_ITEM_KEY;
2150         key.offset = 0;
2151
2152         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2153         if (ret) {
2154                 if (ret > 0)
2155                         ret = -ENOENT;
2156                 goto out;
2157         }
2158
2159         /* Since ret == 0, no need to check anything */
2160         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2161                             struct btrfs_inode_item);
2162         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2163         btrfs_mark_buffer_dirty(path->nodes[0]);
2164         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2165         printf("reset nbytes for ino %llu root %llu\n",
2166                rec->ino, root->root_key.objectid);
2167 out:
2168         btrfs_release_path(path);
2169         return ret;
2170 }
2171
2172 static int add_missing_dir_index(struct btrfs_root *root,
2173                                  struct cache_tree *inode_cache,
2174                                  struct inode_record *rec,
2175                                  struct inode_backref *backref)
2176 {
2177         struct btrfs_path *path;
2178         struct btrfs_trans_handle *trans;
2179         struct btrfs_dir_item *dir_item;
2180         struct extent_buffer *leaf;
2181         struct btrfs_key key;
2182         struct btrfs_disk_key disk_key;
2183         struct inode_record *dir_rec;
2184         unsigned long name_ptr;
2185         u32 data_size = sizeof(*dir_item) + backref->namelen;
2186         int ret;
2187
2188         path = btrfs_alloc_path();
2189         if (!path)
2190                 return -ENOMEM;
2191
2192         trans = btrfs_start_transaction(root, 1);
2193         if (IS_ERR(trans)) {
2194                 btrfs_free_path(path);
2195                 return PTR_ERR(trans);
2196         }
2197
2198         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2199                 (unsigned long long)rec->ino);
2200         key.objectid = backref->dir;
2201         key.type = BTRFS_DIR_INDEX_KEY;
2202         key.offset = backref->index;
2203
2204         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2205         BUG_ON(ret);
2206
2207         leaf = path->nodes[0];
2208         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2209
2210         disk_key.objectid = cpu_to_le64(rec->ino);
2211         disk_key.type = BTRFS_INODE_ITEM_KEY;
2212         disk_key.offset = 0;
2213
2214         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2215         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2216         btrfs_set_dir_data_len(leaf, dir_item, 0);
2217         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2218         name_ptr = (unsigned long)(dir_item + 1);
2219         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2220         btrfs_mark_buffer_dirty(leaf);
2221         btrfs_free_path(path);
2222         btrfs_commit_transaction(trans, root);
2223
2224         backref->found_dir_index = 1;
2225         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2226         BUG_ON(IS_ERR(dir_rec));
2227         if (!dir_rec)
2228                 return 0;
2229         dir_rec->found_size += backref->namelen;
2230         if (dir_rec->found_size == dir_rec->isize &&
2231             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2232                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2233         if (dir_rec->found_size != dir_rec->isize)
2234                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2235
2236         return 0;
2237 }
2238
2239 static int delete_dir_index(struct btrfs_root *root,
2240                             struct cache_tree *inode_cache,
2241                             struct inode_record *rec,
2242                             struct inode_backref *backref)
2243 {
2244         struct btrfs_trans_handle *trans;
2245         struct btrfs_dir_item *di;
2246         struct btrfs_path *path;
2247         int ret = 0;
2248
2249         path = btrfs_alloc_path();
2250         if (!path)
2251                 return -ENOMEM;
2252
2253         trans = btrfs_start_transaction(root, 1);
2254         if (IS_ERR(trans)) {
2255                 btrfs_free_path(path);
2256                 return PTR_ERR(trans);
2257         }
2258
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2266                                     backref->name, backref->namelen,
2267                                     backref->index, -1);
2268         if (IS_ERR(di)) {
2269                 ret = PTR_ERR(di);
2270                 btrfs_free_path(path);
2271                 btrfs_commit_transaction(trans, root);
2272                 if (ret == -ENOENT)
2273                         return 0;
2274                 return ret;
2275         }
2276
2277         if (!di)
2278                 ret = btrfs_del_item(trans, root, path);
2279         else
2280                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2281         BUG_ON(ret);
2282         btrfs_free_path(path);
2283         btrfs_commit_transaction(trans, root);
2284         return ret;
2285 }
2286
2287 static int create_inode_item(struct btrfs_root *root,
2288                              struct inode_record *rec,
2289                              struct inode_backref *backref, int root_dir)
2290 {
2291         struct btrfs_trans_handle *trans;
2292         struct btrfs_inode_item inode_item;
2293         time_t now = time(NULL);
2294         int ret;
2295
2296         trans = btrfs_start_transaction(root, 1);
2297         if (IS_ERR(trans)) {
2298                 ret = PTR_ERR(trans);
2299                 return ret;
2300         }
2301
2302         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2303                 "be incomplete, please check permissions and content after "
2304                 "the fsck completes.\n", (unsigned long long)root->objectid,
2305                 (unsigned long long)rec->ino);
2306
2307         memset(&inode_item, 0, sizeof(inode_item));
2308         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2309         if (root_dir)
2310                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2311         else
2312                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2313         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2314         if (rec->found_dir_item) {
2315                 if (rec->found_file_extent)
2316                         fprintf(stderr, "root %llu inode %llu has both a dir "
2317                                 "item and extents, unsure if it is a dir or a "
2318                                 "regular file so setting it as a directory\n",
2319                                 (unsigned long long)root->objectid,
2320                                 (unsigned long long)rec->ino);
2321                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2322                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2323         } else if (!rec->found_dir_item) {
2324                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2325                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2326         }
2327         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2328         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2329         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2334         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2335
2336         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2337         BUG_ON(ret);
2338         btrfs_commit_transaction(trans, root);
2339         return 0;
2340 }
2341
2342 static int repair_inode_backrefs(struct btrfs_root *root,
2343                                  struct inode_record *rec,
2344                                  struct cache_tree *inode_cache,
2345                                  int delete)
2346 {
2347         struct inode_backref *tmp, *backref;
2348         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2349         int ret = 0;
2350         int repaired = 0;
2351
2352         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2353                 if (!delete && rec->ino == root_dirid) {
2354                         if (!rec->found_inode_item) {
2355                                 ret = create_inode_item(root, rec, backref, 1);
2356                                 if (ret)
2357                                         break;
2358                                 repaired++;
2359                         }
2360                 }
2361
2362                 /* Index 0 for root dir's are special, don't mess with it */
2363                 if (rec->ino == root_dirid && backref->index == 0)
2364                         continue;
2365
2366                 if (delete &&
2367                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2368                      (backref->found_dir_index && backref->found_inode_ref &&
2369                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2370                         ret = delete_dir_index(root, inode_cache, rec, backref);
2371                         if (ret)
2372                                 break;
2373                         repaired++;
2374                         list_del(&backref->list);
2375                         free(backref);
2376                 }
2377
2378                 if (!delete && !backref->found_dir_index &&
2379                     backref->found_dir_item && backref->found_inode_ref) {
2380                         ret = add_missing_dir_index(root, inode_cache, rec,
2381                                                     backref);
2382                         if (ret)
2383                                 break;
2384                         repaired++;
2385                         if (backref->found_dir_item &&
2386                             backref->found_dir_index &&
2387                             backref->found_dir_index) {
2388                                 if (!backref->errors &&
2389                                     backref->found_inode_ref) {
2390                                         list_del(&backref->list);
2391                                         free(backref);
2392                                 }
2393                         }
2394                 }
2395
2396                 if (!delete && (!backref->found_dir_index &&
2397                                 !backref->found_dir_item &&
2398                                 backref->found_inode_ref)) {
2399                         struct btrfs_trans_handle *trans;
2400                         struct btrfs_key location;
2401
2402                         ret = check_dir_conflict(root, backref->name,
2403                                                  backref->namelen,
2404                                                  backref->dir,
2405                                                  backref->index);
2406                         if (ret) {
2407                                 /*
2408                                  * let nlink fixing routine to handle it,
2409                                  * which can do it better.
2410                                  */
2411                                 ret = 0;
2412                                 break;
2413                         }
2414                         location.objectid = rec->ino;
2415                         location.type = BTRFS_INODE_ITEM_KEY;
2416                         location.offset = 0;
2417
2418                         trans = btrfs_start_transaction(root, 1);
2419                         if (IS_ERR(trans)) {
2420                                 ret = PTR_ERR(trans);
2421                                 break;
2422                         }
2423                         fprintf(stderr, "adding missing dir index/item pair "
2424                                 "for inode %llu\n",
2425                                 (unsigned long long)rec->ino);
2426                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2427                                                     backref->namelen,
2428                                                     backref->dir, &location,
2429                                                     imode_to_type(rec->imode),
2430                                                     backref->index);
2431                         BUG_ON(ret);
2432                         btrfs_commit_transaction(trans, root);
2433                         repaired++;
2434                 }
2435
2436                 if (!delete && (backref->found_inode_ref &&
2437                                 backref->found_dir_index &&
2438                                 backref->found_dir_item &&
2439                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2440                                 !rec->found_inode_item)) {
2441                         ret = create_inode_item(root, rec, backref, 0);
2442                         if (ret)
2443                                 break;
2444                         repaired++;
2445                 }
2446
2447         }
2448         return ret ? ret : repaired;
2449 }
2450
2451 /*
2452  * To determine the file type for nlink/inode_item repair
2453  *
2454  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2455  * Return -ENOENT if file type is not found.
2456  */
2457 static int find_file_type(struct inode_record *rec, u8 *type)
2458 {
2459         struct inode_backref *backref;
2460
2461         /* For inode item recovered case */
2462         if (rec->found_inode_item) {
2463                 *type = imode_to_type(rec->imode);
2464                 return 0;
2465         }
2466
2467         list_for_each_entry(backref, &rec->backrefs, list) {
2468                 if (backref->found_dir_index || backref->found_dir_item) {
2469                         *type = backref->filetype;
2470                         return 0;
2471                 }
2472         }
2473         return -ENOENT;
2474 }
2475
2476 /*
2477  * To determine the file name for nlink repair
2478  *
2479  * Return 0 if file name is found, set name and namelen.
2480  * Return -ENOENT if file name is not found.
2481  */
2482 static int find_file_name(struct inode_record *rec,
2483                           char *name, int *namelen)
2484 {
2485         struct inode_backref *backref;
2486
2487         list_for_each_entry(backref, &rec->backrefs, list) {
2488                 if (backref->found_dir_index || backref->found_dir_item ||
2489                     backref->found_inode_ref) {
2490                         memcpy(name, backref->name, backref->namelen);
2491                         *namelen = backref->namelen;
2492                         return 0;
2493                 }
2494         }
2495         return -ENOENT;
2496 }
2497
2498 /* Reset the nlink of the inode to the correct one */
2499 static int reset_nlink(struct btrfs_trans_handle *trans,
2500                        struct btrfs_root *root,
2501                        struct btrfs_path *path,
2502                        struct inode_record *rec)
2503 {
2504         struct inode_backref *backref;
2505         struct inode_backref *tmp;
2506         struct btrfs_key key;
2507         struct btrfs_inode_item *inode_item;
2508         int ret = 0;
2509
2510         /* We don't believe this either, reset it and iterate backref */
2511         rec->found_link = 0;
2512
2513         /* Remove all backref including the valid ones */
2514         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2515                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2516                                    backref->index, backref->name,
2517                                    backref->namelen, 0);
2518                 if (ret < 0)
2519                         goto out;
2520
2521                 /* remove invalid backref, so it won't be added back */
2522                 if (!(backref->found_dir_index &&
2523                       backref->found_dir_item &&
2524                       backref->found_inode_ref)) {
2525                         list_del(&backref->list);
2526                         free(backref);
2527                 } else {
2528                         rec->found_link++;
2529                 }
2530         }
2531
2532         /* Set nlink to 0 */
2533         key.objectid = rec->ino;
2534         key.type = BTRFS_INODE_ITEM_KEY;
2535         key.offset = 0;
2536         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2537         if (ret < 0)
2538                 goto out;
2539         if (ret > 0) {
2540                 ret = -ENOENT;
2541                 goto out;
2542         }
2543         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2544                                     struct btrfs_inode_item);
2545         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2546         btrfs_mark_buffer_dirty(path->nodes[0]);
2547         btrfs_release_path(path);
2548
2549         /*
2550          * Add back valid inode_ref/dir_item/dir_index,
2551          * add_link() will handle the nlink inc, so new nlink must be correct
2552          */
2553         list_for_each_entry(backref, &rec->backrefs, list) {
2554                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2555                                      backref->name, backref->namelen,
2556                                      backref->filetype, &backref->index, 1);
2557                 if (ret < 0)
2558                         goto out;
2559         }
2560 out:
2561         btrfs_release_path(path);
2562         return ret;
2563 }
2564
2565 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2566                                struct btrfs_root *root,
2567                                struct btrfs_path *path,
2568                                struct inode_record *rec)
2569 {
2570         char *dir_name = "lost+found";
2571         char namebuf[BTRFS_NAME_LEN] = {0};
2572         u64 lost_found_ino;
2573         u32 mode = 0700;
2574         u8 type = 0;
2575         int namelen = 0;
2576         int name_recovered = 0;
2577         int type_recovered = 0;
2578         int ret = 0;
2579
2580         /*
2581          * Get file name and type first before these invalid inode ref
2582          * are deleted by remove_all_invalid_backref()
2583          */
2584         name_recovered = !find_file_name(rec, namebuf, &namelen);
2585         type_recovered = !find_file_type(rec, &type);
2586
2587         if (!name_recovered) {
2588                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2589                        rec->ino, rec->ino);
2590                 namelen = count_digits(rec->ino);
2591                 sprintf(namebuf, "%llu", rec->ino);
2592                 name_recovered = 1;
2593         }
2594         if (!type_recovered) {
2595                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2596                        rec->ino);
2597                 type = BTRFS_FT_REG_FILE;
2598                 type_recovered = 1;
2599         }
2600
2601         ret = reset_nlink(trans, root, path, rec);
2602         if (ret < 0) {
2603                 fprintf(stderr,
2604                         "Failed to reset nlink for inode %llu: %s\n",
2605                         rec->ino, strerror(-ret));
2606                 goto out;
2607         }
2608
2609         if (rec->found_link == 0) {
2610                 lost_found_ino = root->highest_inode;
2611                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2612                         ret = -EOVERFLOW;
2613                         goto out;
2614                 }
2615                 lost_found_ino++;
2616                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2617                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2618                                   mode);
2619                 if (ret < 0) {
2620                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2621                                 dir_name, strerror(-ret));
2622                         goto out;
2623                 }
2624                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2625                                      namebuf, namelen, type, NULL, 1);
2626                 /*
2627                  * Add ".INO" suffix several times to handle case where
2628                  * "FILENAME.INO" is already taken by another file.
2629                  */
2630                 while (ret == -EEXIST) {
2631                         /*
2632                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2633                          */
2634                         if (namelen + count_digits(rec->ino) + 1 >
2635                             BTRFS_NAME_LEN) {
2636                                 ret = -EFBIG;
2637                                 goto out;
2638                         }
2639                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2640                                  ".%llu", rec->ino);
2641                         namelen += count_digits(rec->ino) + 1;
2642                         ret = btrfs_add_link(trans, root, rec->ino,
2643                                              lost_found_ino, namebuf,
2644                                              namelen, type, NULL, 1);
2645                 }
2646                 if (ret < 0) {
2647                         fprintf(stderr,
2648                                 "Failed to link the inode %llu to %s dir: %s\n",
2649                                 rec->ino, dir_name, strerror(-ret));
2650                         goto out;
2651                 }
2652                 /*
2653                  * Just increase the found_link, don't actually add the
2654                  * backref. This will make things easier and this inode
2655                  * record will be freed after the repair is done.
2656                  * So fsck will not report problem about this inode.
2657                  */
2658                 rec->found_link++;
2659                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2660                        namelen, namebuf, dir_name);
2661         }
2662         printf("Fixed the nlink of inode %llu\n", rec->ino);
2663 out:
2664         /*
2665          * Clear the flag anyway, or we will loop forever for the same inode
2666          * as it will not be removed from the bad inode list and the dead loop
2667          * happens.
2668          */
2669         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2670         btrfs_release_path(path);
2671         return ret;
2672 }
2673
2674 /*
2675  * Check if there is any normal(reg or prealloc) file extent for given
2676  * ino.
2677  * This is used to determine the file type when neither its dir_index/item or
2678  * inode_item exists.
2679  *
2680  * This will *NOT* report error, if any error happens, just consider it does
2681  * not have any normal file extent.
2682  */
2683 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2684 {
2685         struct btrfs_path *path;
2686         struct btrfs_key key;
2687         struct btrfs_key found_key;
2688         struct btrfs_file_extent_item *fi;
2689         u8 type;
2690         int ret = 0;
2691
2692         path = btrfs_alloc_path();
2693         if (!path)
2694                 goto out;
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2705                 ret = btrfs_next_leaf(root, path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2713                                       path->slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path->nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_free_path(path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path *path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         path = btrfs_alloc_path();
2930         if (!path)
2931                 return -ENOMEM;
2932
2933         /*
2934          * For nlink repair, it may create a dir and add link, so
2935          * 2 for parent(256)'s dir_index and dir_item
2936          * 2 for lost+found dir's inode_item and inode_ref
2937          * 1 for the new inode_ref of the file
2938          * 2 for lost+found dir's dir_index and dir_item for the file
2939          */
2940         trans = btrfs_start_transaction(root, 7);
2941         if (IS_ERR(trans)) {
2942                 btrfs_free_path(path);
2943                 return PTR_ERR(trans);
2944         }
2945
2946         if (rec->errors & I_ERR_NO_INODE_ITEM)
2947                 ret = repair_inode_no_item(trans, root, path, rec);
2948         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2949                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2950         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2951                 ret = repair_inode_discount_extent(trans, root, path, rec);
2952         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2953                 ret = repair_inode_isize(trans, root, path, rec);
2954         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2955                 ret = repair_inode_orphan_item(trans, root, path, rec);
2956         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2957                 ret = repair_inode_nlinks(trans, root, path, rec);
2958         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2959                 ret = repair_inode_nbytes(trans, root, path, rec);
2960         btrfs_commit_transaction(trans, root);
2961         btrfs_free_path(path);
2962         return ret;
2963 }
2964
2965 static int check_inode_recs(struct btrfs_root *root,
2966                             struct cache_tree *inode_cache)
2967 {
2968         struct cache_extent *cache;
2969         struct ptr_node *node;
2970         struct inode_record *rec;
2971         struct inode_backref *backref;
2972         int stage = 0;
2973         int ret = 0;
2974         int err = 0;
2975         u64 error = 0;
2976         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2977
2978         if (btrfs_root_refs(&root->root_item) == 0) {
2979                 if (!cache_tree_empty(inode_cache))
2980                         fprintf(stderr, "warning line %d\n", __LINE__);
2981                 return 0;
2982         }
2983
2984         /*
2985          * We need to record the highest inode number for later 'lost+found'
2986          * dir creation.
2987          * We must select an ino not used/referred by any existing inode, or
2988          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2989          * this may cause 'lost+found' dir has wrong nlinks.
2990          */
2991         cache = last_cache_extent(inode_cache);
2992         if (cache) {
2993                 node = container_of(cache, struct ptr_node, cache);
2994                 rec = node->data;
2995                 if (rec->ino > root->highest_inode)
2996                         root->highest_inode = rec->ino;
2997         }
2998
2999         /*
3000          * We need to repair backrefs first because we could change some of the
3001          * errors in the inode recs.
3002          *
3003          * We also need to go through and delete invalid backrefs first and then
3004          * add the correct ones second.  We do this because we may get EEXIST
3005          * when adding back the correct index because we hadn't yet deleted the
3006          * invalid index.
3007          *
3008          * For example, if we were missing a dir index then the directories
3009          * isize would be wrong, so if we fixed the isize to what we thought it
3010          * would be and then fixed the backref we'd still have a invalid fs, so
3011          * we need to add back the dir index and then check to see if the isize
3012          * is still wrong.
3013          */
3014         while (stage < 3) {
3015                 stage++;
3016                 if (stage == 3 && !err)
3017                         break;
3018
3019                 cache = search_cache_extent(inode_cache, 0);
3020                 while (repair && cache) {
3021                         node = container_of(cache, struct ptr_node, cache);
3022                         rec = node->data;
3023                         cache = next_cache_extent(cache);
3024
3025                         /* Need to free everything up and rescan */
3026                         if (stage == 3) {
3027                                 remove_cache_extent(inode_cache, &node->cache);
3028                                 free(node);
3029                                 free_inode_rec(rec);
3030                                 continue;
3031                         }
3032
3033                         if (list_empty(&rec->backrefs))
3034                                 continue;
3035
3036                         ret = repair_inode_backrefs(root, rec, inode_cache,
3037                                                     stage == 1);
3038                         if (ret < 0) {
3039                                 err = ret;
3040                                 stage = 2;
3041                                 break;
3042                         } if (ret > 0) {
3043                                 err = -EAGAIN;
3044                         }
3045                 }
3046         }
3047         if (err)
3048                 return err;
3049
3050         rec = get_inode_rec(inode_cache, root_dirid, 0);
3051         BUG_ON(IS_ERR(rec));
3052         if (rec) {
3053                 ret = check_root_dir(rec);
3054                 if (ret) {
3055                         fprintf(stderr, "root %llu root dir %llu error\n",
3056                                 (unsigned long long)root->root_key.objectid,
3057                                 (unsigned long long)root_dirid);
3058                         print_inode_error(root, rec);
3059                         error++;
3060                 }
3061         } else {
3062                 if (repair) {
3063                         struct btrfs_trans_handle *trans;
3064
3065                         trans = btrfs_start_transaction(root, 1);
3066                         if (IS_ERR(trans)) {
3067                                 err = PTR_ERR(trans);
3068                                 return err;
3069                         }
3070
3071                         fprintf(stderr,
3072                                 "root %llu missing its root dir, recreating\n",
3073                                 (unsigned long long)root->objectid);
3074
3075                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3076                         BUG_ON(ret);
3077
3078                         btrfs_commit_transaction(trans, root);
3079                         return -EAGAIN;
3080                 }
3081
3082                 fprintf(stderr, "root %llu root dir %llu not found\n",
3083                         (unsigned long long)root->root_key.objectid,
3084                         (unsigned long long)root_dirid);
3085         }
3086
3087         while (1) {
3088                 cache = search_cache_extent(inode_cache, 0);
3089                 if (!cache)
3090                         break;
3091                 node = container_of(cache, struct ptr_node, cache);
3092                 rec = node->data;
3093                 remove_cache_extent(inode_cache, &node->cache);
3094                 free(node);
3095                 if (rec->ino == root_dirid ||
3096                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3097                         free_inode_rec(rec);
3098                         continue;
3099                 }
3100
3101                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3102                         ret = check_orphan_item(root, rec->ino);
3103                         if (ret == 0)
3104                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3105                         if (can_free_inode_rec(rec)) {
3106                                 free_inode_rec(rec);
3107                                 continue;
3108                         }
3109                 }
3110
3111                 if (!rec->found_inode_item)
3112                         rec->errors |= I_ERR_NO_INODE_ITEM;
3113                 if (rec->found_link != rec->nlink)
3114                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3115                 if (repair) {
3116                         ret = try_repair_inode(root, rec);
3117                         if (ret == 0 && can_free_inode_rec(rec)) {
3118                                 free_inode_rec(rec);
3119                                 continue;
3120                         }
3121                         ret = 0;
3122                 }
3123
3124                 if (!(repair && ret == 0))
3125                         error++;
3126                 print_inode_error(root, rec);
3127                 list_for_each_entry(backref, &rec->backrefs, list) {
3128                         if (!backref->found_dir_item)
3129                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3130                         if (!backref->found_dir_index)
3131                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3132                         if (!backref->found_inode_ref)
3133                                 backref->errors |= REF_ERR_NO_INODE_REF;
3134                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3135                                 " namelen %u name %s filetype %d errors %x",
3136                                 (unsigned long long)backref->dir,
3137                                 (unsigned long long)backref->index,
3138                                 backref->namelen, backref->name,
3139                                 backref->filetype, backref->errors);
3140                         print_ref_error(backref->errors);
3141                 }
3142                 free_inode_rec(rec);
3143         }
3144         return (error > 0) ? -1 : 0;
3145 }
3146
3147 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3148                                         u64 objectid)
3149 {
3150         struct cache_extent *cache;
3151         struct root_record *rec = NULL;
3152         int ret;
3153
3154         cache = lookup_cache_extent(root_cache, objectid, 1);
3155         if (cache) {
3156                 rec = container_of(cache, struct root_record, cache);
3157         } else {
3158                 rec = calloc(1, sizeof(*rec));
3159                 if (!rec)
3160                         return ERR_PTR(-ENOMEM);
3161                 rec->objectid = objectid;
3162                 INIT_LIST_HEAD(&rec->backrefs);
3163                 rec->cache.start = objectid;
3164                 rec->cache.size = 1;
3165
3166                 ret = insert_cache_extent(root_cache, &rec->cache);
3167                 if (ret)
3168                         return ERR_PTR(-EEXIST);
3169         }
3170         return rec;
3171 }
3172
3173 static struct root_backref *get_root_backref(struct root_record *rec,
3174                                              u64 ref_root, u64 dir, u64 index,
3175                                              const char *name, int namelen)
3176 {
3177         struct root_backref *backref;
3178
3179         list_for_each_entry(backref, &rec->backrefs, list) {
3180                 if (backref->ref_root != ref_root || backref->dir != dir ||
3181                     backref->namelen != namelen)
3182                         continue;
3183                 if (memcmp(name, backref->name, namelen))
3184                         continue;
3185                 return backref;
3186         }
3187
3188         backref = calloc(1, sizeof(*backref) + namelen + 1);
3189         if (!backref)
3190                 return NULL;
3191         backref->ref_root = ref_root;
3192         backref->dir = dir;
3193         backref->index = index;
3194         backref->namelen = namelen;
3195         memcpy(backref->name, name, namelen);
3196         backref->name[namelen] = '\0';
3197         list_add_tail(&backref->list, &rec->backrefs);
3198         return backref;
3199 }
3200
3201 static void free_root_record(struct cache_extent *cache)
3202 {
3203         struct root_record *rec;
3204         struct root_backref *backref;
3205
3206         rec = container_of(cache, struct root_record, cache);
3207         while (!list_empty(&rec->backrefs)) {
3208                 backref = to_root_backref(rec->backrefs.next);
3209                 list_del(&backref->list);
3210                 free(backref);
3211         }
3212
3213         kfree(rec);
3214 }
3215
3216 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3217
3218 static int add_root_backref(struct cache_tree *root_cache,
3219                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3220                             const char *name, int namelen,
3221                             int item_type, int errors)
3222 {
3223         struct root_record *rec;
3224         struct root_backref *backref;
3225
3226         rec = get_root_rec(root_cache, root_id);
3227         BUG_ON(IS_ERR(rec));
3228         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3229         BUG_ON(!backref);
3230
3231         backref->errors |= errors;
3232
3233         if (item_type != BTRFS_DIR_ITEM_KEY) {
3234                 if (backref->found_dir_index || backref->found_back_ref ||
3235                     backref->found_forward_ref) {
3236                         if (backref->index != index)
3237                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3238                 } else {
3239                         backref->index = index;
3240                 }
3241         }
3242
3243         if (item_type == BTRFS_DIR_ITEM_KEY) {
3244                 if (backref->found_forward_ref)
3245                         rec->found_ref++;
3246                 backref->found_dir_item = 1;
3247         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3248                 backref->found_dir_index = 1;
3249         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3250                 if (backref->found_forward_ref)
3251                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3252                 else if (backref->found_dir_item)
3253                         rec->found_ref++;
3254                 backref->found_forward_ref = 1;
3255         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3256                 if (backref->found_back_ref)
3257                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3258                 backref->found_back_ref = 1;
3259         } else {
3260                 BUG_ON(1);
3261         }
3262
3263         if (backref->found_forward_ref && backref->found_dir_item)
3264                 backref->reachable = 1;
3265         return 0;
3266 }
3267
3268 static int merge_root_recs(struct btrfs_root *root,
3269                            struct cache_tree *src_cache,
3270                            struct cache_tree *dst_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int ret = 0;
3277
3278         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3279                 free_inode_recs_tree(src_cache);
3280                 return 0;
3281         }
3282
3283         while (1) {
3284                 cache = search_cache_extent(src_cache, 0);
3285                 if (!cache)
3286                         break;
3287                 node = container_of(cache, struct ptr_node, cache);
3288                 rec = node->data;
3289                 remove_cache_extent(src_cache, &node->cache);
3290                 free(node);
3291
3292                 ret = is_child_root(root, root->objectid, rec->ino);
3293                 if (ret < 0)
3294                         break;
3295                 else if (ret == 0)
3296                         goto skip;
3297
3298                 list_for_each_entry(backref, &rec->backrefs, list) {
3299                         BUG_ON(backref->found_inode_ref);
3300                         if (backref->found_dir_item)
3301                                 add_root_backref(dst_cache, rec->ino,
3302                                         root->root_key.objectid, backref->dir,
3303                                         backref->index, backref->name,
3304                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3305                                         backref->errors);
3306                         if (backref->found_dir_index)
3307                                 add_root_backref(dst_cache, rec->ino,
3308                                         root->root_key.objectid, backref->dir,
3309                                         backref->index, backref->name,
3310                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3311                                         backref->errors);
3312                 }
3313 skip:
3314                 free_inode_rec(rec);
3315         }
3316         if (ret < 0)
3317                 return ret;
3318         return 0;
3319 }
3320
3321 static int check_root_refs(struct btrfs_root *root,
3322                            struct cache_tree *root_cache)
3323 {
3324         struct root_record *rec;
3325         struct root_record *ref_root;
3326         struct root_backref *backref;
3327         struct cache_extent *cache;
3328         int loop = 1;
3329         int ret;
3330         int error;
3331         int errors = 0;
3332
3333         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3334         BUG_ON(IS_ERR(rec));
3335         rec->found_ref = 1;
3336
3337         /* fixme: this can not detect circular references */
3338         while (loop) {
3339                 loop = 0;
3340                 cache = search_cache_extent(root_cache, 0);
3341                 while (1) {
3342                         if (!cache)
3343                                 break;
3344                         rec = container_of(cache, struct root_record, cache);
3345                         cache = next_cache_extent(cache);
3346
3347                         if (rec->found_ref == 0)
3348                                 continue;
3349
3350                         list_for_each_entry(backref, &rec->backrefs, list) {
3351                                 if (!backref->reachable)
3352                                         continue;
3353
3354                                 ref_root = get_root_rec(root_cache,
3355                                                         backref->ref_root);
3356                                 BUG_ON(IS_ERR(ref_root));
3357                                 if (ref_root->found_ref > 0)
3358                                         continue;
3359
3360                                 backref->reachable = 0;
3361                                 rec->found_ref--;
3362                                 if (rec->found_ref == 0)
3363                                         loop = 1;
3364                         }
3365                 }
3366         }
3367
3368         cache = search_cache_extent(root_cache, 0);
3369         while (1) {
3370                 if (!cache)
3371                         break;
3372                 rec = container_of(cache, struct root_record, cache);
3373                 cache = next_cache_extent(cache);
3374
3375                 if (rec->found_ref == 0 &&
3376                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3377                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3378                         ret = check_orphan_item(root->fs_info->tree_root,
3379                                                 rec->objectid);
3380                         if (ret == 0)
3381                                 continue;
3382
3383                         /*
3384                          * If we don't have a root item then we likely just have
3385                          * a dir item in a snapshot for this root but no actual
3386                          * ref key or anything so it's meaningless.
3387                          */
3388                         if (!rec->found_root_item)
3389                                 continue;
3390                         errors++;
3391                         fprintf(stderr, "fs tree %llu not referenced\n",
3392                                 (unsigned long long)rec->objectid);
3393                 }
3394
3395                 error = 0;
3396                 if (rec->found_ref > 0 && !rec->found_root_item)
3397                         error = 1;
3398                 list_for_each_entry(backref, &rec->backrefs, list) {
3399                         if (!backref->found_dir_item)
3400                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3401                         if (!backref->found_dir_index)
3402                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3403                         if (!backref->found_back_ref)
3404                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3405                         if (!backref->found_forward_ref)
3406                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3407                         if (backref->reachable && backref->errors)
3408                                 error = 1;
3409                 }
3410                 if (!error)
3411                         continue;
3412
3413                 errors++;
3414                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3415                         (unsigned long long)rec->objectid, rec->found_ref,
3416                          rec->found_root_item ? "" : "not found");
3417
3418                 list_for_each_entry(backref, &rec->backrefs, list) {
3419                         if (!backref->reachable)
3420                                 continue;
3421                         if (!backref->errors && rec->found_root_item)
3422                                 continue;
3423                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3424                                 " index %llu namelen %u name %s errors %x\n",
3425                                 (unsigned long long)backref->ref_root,
3426                                 (unsigned long long)backref->dir,
3427                                 (unsigned long long)backref->index,
3428                                 backref->namelen, backref->name,
3429                                 backref->errors);
3430                         print_ref_error(backref->errors);
3431                 }
3432         }
3433         return errors > 0 ? 1 : 0;
3434 }
3435
3436 static int process_root_ref(struct extent_buffer *eb, int slot,
3437                             struct btrfs_key *key,
3438                             struct cache_tree *root_cache)
3439 {
3440         u64 dirid;
3441         u64 index;
3442         u32 len;
3443         u32 name_len;
3444         struct btrfs_root_ref *ref;
3445         char namebuf[BTRFS_NAME_LEN];
3446         int error;
3447
3448         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3449
3450         dirid = btrfs_root_ref_dirid(eb, ref);
3451         index = btrfs_root_ref_sequence(eb, ref);
3452         name_len = btrfs_root_ref_name_len(eb, ref);
3453
3454         if (name_len <= BTRFS_NAME_LEN) {
3455                 len = name_len;
3456                 error = 0;
3457         } else {
3458                 len = BTRFS_NAME_LEN;
3459                 error = REF_ERR_NAME_TOO_LONG;
3460         }
3461         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3462
3463         if (key->type == BTRFS_ROOT_REF_KEY) {
3464                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3465                                  index, namebuf, len, key->type, error);
3466         } else {
3467                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3468                                  index, namebuf, len, key->type, error);
3469         }
3470         return 0;
3471 }
3472
3473 static void free_corrupt_block(struct cache_extent *cache)
3474 {
3475         struct btrfs_corrupt_block *corrupt;
3476
3477         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3478         free(corrupt);
3479 }
3480
3481 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3482
3483 /*
3484  * Repair the btree of the given root.
3485  *
3486  * The fix is to remove the node key in corrupt_blocks cache_tree.
3487  * and rebalance the tree.
3488  * After the fix, the btree should be writeable.
3489  */
3490 static int repair_btree(struct btrfs_root *root,
3491                         struct cache_tree *corrupt_blocks)
3492 {
3493         struct btrfs_trans_handle *trans;
3494         struct btrfs_path *path;
3495         struct btrfs_corrupt_block *corrupt;
3496         struct cache_extent *cache;
3497         struct btrfs_key key;
3498         u64 offset;
3499         int level;
3500         int ret = 0;
3501
3502         if (cache_tree_empty(corrupt_blocks))
3503                 return 0;
3504
3505         path = btrfs_alloc_path();
3506         if (!path)
3507                 return -ENOMEM;
3508
3509         trans = btrfs_start_transaction(root, 1);
3510         if (IS_ERR(trans)) {
3511                 ret = PTR_ERR(trans);
3512                 fprintf(stderr, "Error starting transaction: %s\n",
3513                         strerror(-ret));
3514                 goto out_free_path;
3515         }
3516         cache = first_cache_extent(corrupt_blocks);
3517         while (cache) {
3518                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3519                                        cache);
3520                 level = corrupt->level;
3521                 path->lowest_level = level;
3522                 key.objectid = corrupt->key.objectid;
3523                 key.type = corrupt->key.type;
3524                 key.offset = corrupt->key.offset;
3525
3526                 /*
3527                  * Here we don't want to do any tree balance, since it may
3528                  * cause a balance with corrupted brother leaf/node,
3529                  * so ins_len set to 0 here.
3530                  * Balance will be done after all corrupt node/leaf is deleted.
3531                  */
3532                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3533                 if (ret < 0)
3534                         goto out;
3535                 offset = btrfs_node_blockptr(path->nodes[level],
3536                                              path->slots[level]);
3537
3538                 /* Remove the ptr */
3539                 ret = btrfs_del_ptr(trans, root, path, level,
3540                                     path->slots[level]);
3541                 if (ret < 0)
3542                         goto out;
3543                 /*
3544                  * Remove the corresponding extent
3545                  * return value is not concerned.
3546                  */
3547                 btrfs_release_path(path);
3548                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3549                                         0, root->root_key.objectid,
3550                                         level - 1, 0);
3551                 cache = next_cache_extent(cache);
3552         }
3553
3554         /* Balance the btree using btrfs_search_slot() */
3555         cache = first_cache_extent(corrupt_blocks);
3556         while (cache) {
3557                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3558                                        cache);
3559                 memcpy(&key, &corrupt->key, sizeof(key));
3560                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3561                 if (ret < 0)
3562                         goto out;
3563                 /* return will always >0 since it won't find the item */
3564                 ret = 0;
3565                 btrfs_release_path(path);
3566                 cache = next_cache_extent(cache);
3567         }
3568 out:
3569         btrfs_commit_transaction(trans, root);
3570 out_free_path:
3571         btrfs_free_path(path);
3572         return ret;
3573 }
3574
3575 static int check_fs_root(struct btrfs_root *root,
3576                          struct cache_tree *root_cache,
3577                          struct walk_control *wc)
3578 {
3579         int ret = 0;
3580         int err = 0;
3581         int wret;
3582         int level;
3583         struct btrfs_path path;
3584         struct shared_node root_node;
3585         struct root_record *rec;
3586         struct btrfs_root_item *root_item = &root->root_item;
3587         struct cache_tree corrupt_blocks;
3588         struct orphan_data_extent *orphan;
3589         struct orphan_data_extent *tmp;
3590         enum btrfs_tree_block_status status;
3591         struct node_refs nrefs;
3592
3593         /*
3594          * Reuse the corrupt_block cache tree to record corrupted tree block
3595          *
3596          * Unlike the usage in extent tree check, here we do it in a per
3597          * fs/subvol tree base.
3598          */
3599         cache_tree_init(&corrupt_blocks);
3600         root->fs_info->corrupt_blocks = &corrupt_blocks;
3601
3602         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3603                 rec = get_root_rec(root_cache, root->root_key.objectid);
3604                 BUG_ON(IS_ERR(rec));
3605                 if (btrfs_root_refs(root_item) > 0)
3606                         rec->found_root_item = 1;
3607         }
3608
3609         btrfs_init_path(&path);
3610         memset(&root_node, 0, sizeof(root_node));
3611         cache_tree_init(&root_node.root_cache);
3612         cache_tree_init(&root_node.inode_cache);
3613         memset(&nrefs, 0, sizeof(nrefs));
3614
3615         /* Move the orphan extent record to corresponding inode_record */
3616         list_for_each_entry_safe(orphan, tmp,
3617                                  &root->orphan_data_extents, list) {
3618                 struct inode_record *inode;
3619
3620                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3621                                       1);
3622                 BUG_ON(IS_ERR(inode));
3623                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3624                 list_move(&orphan->list, &inode->orphan_extents);
3625         }
3626
3627         level = btrfs_header_level(root->node);
3628         memset(wc->nodes, 0, sizeof(wc->nodes));
3629         wc->nodes[level] = &root_node;
3630         wc->active_node = level;
3631         wc->root_level = level;
3632
3633         /* We may not have checked the root block, lets do that now */
3634         if (btrfs_is_leaf(root->node))
3635                 status = btrfs_check_leaf(root, NULL, root->node);
3636         else
3637                 status = btrfs_check_node(root, NULL, root->node);
3638         if (status != BTRFS_TREE_BLOCK_CLEAN)
3639                 return -EIO;
3640
3641         if (btrfs_root_refs(root_item) > 0 ||
3642             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3643                 path.nodes[level] = root->node;
3644                 extent_buffer_get(root->node);
3645                 path.slots[level] = 0;
3646         } else {
3647                 struct btrfs_key key;
3648                 struct btrfs_disk_key found_key;
3649
3650                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3651                 level = root_item->drop_level;
3652                 path.lowest_level = level;
3653                 if (level > btrfs_header_level(root->node) ||
3654                     level >= BTRFS_MAX_LEVEL) {
3655                         error("ignoring invalid drop level: %u", level);
3656                         goto skip_walking;
3657                 }
3658                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3659                 if (wret < 0)
3660                         goto skip_walking;
3661                 btrfs_node_key(path.nodes[level], &found_key,
3662                                 path.slots[level]);
3663                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3664                                         sizeof(found_key)));
3665         }
3666
3667         while (1) {
3668                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3669                 if (wret < 0)
3670                         ret = wret;
3671                 if (wret != 0)
3672                         break;
3673
3674                 wret = walk_up_tree(root, &path, wc, &level);
3675                 if (wret < 0)
3676                         ret = wret;
3677                 if (wret != 0)
3678                         break;
3679         }
3680 skip_walking:
3681         btrfs_release_path(&path);
3682
3683         if (!cache_tree_empty(&corrupt_blocks)) {
3684                 struct cache_extent *cache;
3685                 struct btrfs_corrupt_block *corrupt;
3686
3687                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3688                        root->root_key.objectid);
3689                 cache = first_cache_extent(&corrupt_blocks);
3690                 while (cache) {
3691                         corrupt = container_of(cache,
3692                                                struct btrfs_corrupt_block,
3693                                                cache);
3694                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3695                                cache->start, corrupt->level,
3696                                corrupt->key.objectid, corrupt->key.type,
3697                                corrupt->key.offset);
3698                         cache = next_cache_extent(cache);
3699                 }
3700                 if (repair) {
3701                         printf("Try to repair the btree for root %llu\n",
3702                                root->root_key.objectid);
3703                         ret = repair_btree(root, &corrupt_blocks);
3704                         if (ret < 0)
3705                                 fprintf(stderr, "Failed to repair btree: %s\n",
3706                                         strerror(-ret));
3707                         if (!ret)
3708                                 printf("Btree for root %llu is fixed\n",
3709                                        root->root_key.objectid);
3710                 }
3711         }
3712
3713         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3714         if (err < 0)
3715                 ret = err;
3716
3717         if (root_node.current) {
3718                 root_node.current->checked = 1;
3719                 maybe_free_inode_rec(&root_node.inode_cache,
3720                                 root_node.current);
3721         }
3722
3723         err = check_inode_recs(root, &root_node.inode_cache);
3724         if (!ret)
3725                 ret = err;
3726
3727         free_corrupt_blocks_tree(&corrupt_blocks);
3728         root->fs_info->corrupt_blocks = NULL;
3729         free_orphan_data_extents(&root->orphan_data_extents);
3730         return ret;
3731 }
3732
3733 static int fs_root_objectid(u64 objectid)
3734 {
3735         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3736             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3737                 return 1;
3738         return is_fstree(objectid);
3739 }
3740
3741 static int check_fs_roots(struct btrfs_root *root,
3742                           struct cache_tree *root_cache)
3743 {
3744         struct btrfs_path path;
3745         struct btrfs_key key;
3746         struct walk_control wc;
3747         struct extent_buffer *leaf, *tree_node;
3748         struct btrfs_root *tmp_root;
3749         struct btrfs_root *tree_root = root->fs_info->tree_root;
3750         int ret;
3751         int err = 0;
3752
3753         if (ctx.progress_enabled) {
3754                 ctx.tp = TASK_FS_ROOTS;
3755                 task_start(ctx.info);
3756         }
3757
3758         /*
3759          * Just in case we made any changes to the extent tree that weren't
3760          * reflected into the free space cache yet.
3761          */
3762         if (repair)
3763                 reset_cached_block_groups(root->fs_info);
3764         memset(&wc, 0, sizeof(wc));
3765         cache_tree_init(&wc.shared);
3766         btrfs_init_path(&path);
3767
3768 again:
3769         key.offset = 0;
3770         key.objectid = 0;
3771         key.type = BTRFS_ROOT_ITEM_KEY;
3772         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3773         if (ret < 0) {
3774                 err = 1;
3775                 goto out;
3776         }
3777         tree_node = tree_root->node;
3778         while (1) {
3779                 if (tree_node != tree_root->node) {
3780                         free_root_recs_tree(root_cache);
3781                         btrfs_release_path(&path);
3782                         goto again;
3783                 }
3784                 leaf = path.nodes[0];
3785                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3786                         ret = btrfs_next_leaf(tree_root, &path);
3787                         if (ret) {
3788                                 if (ret < 0)
3789                                         err = 1;
3790                                 break;
3791                         }
3792                         leaf = path.nodes[0];
3793                 }
3794                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3795                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3796                     fs_root_objectid(key.objectid)) {
3797                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3798                                 tmp_root = btrfs_read_fs_root_no_cache(
3799                                                 root->fs_info, &key);
3800                         } else {
3801                                 key.offset = (u64)-1;
3802                                 tmp_root = btrfs_read_fs_root(
3803                                                 root->fs_info, &key);
3804                         }
3805                         if (IS_ERR(tmp_root)) {
3806                                 err = 1;
3807                                 goto next;
3808                         }
3809                         ret = check_fs_root(tmp_root, root_cache, &wc);
3810                         if (ret == -EAGAIN) {
3811                                 free_root_recs_tree(root_cache);
3812                                 btrfs_release_path(&path);
3813                                 goto again;
3814                         }
3815                         if (ret)
3816                                 err = 1;
3817                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3818                                 btrfs_free_fs_root(tmp_root);
3819                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3820                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3821                         process_root_ref(leaf, path.slots[0], &key,
3822                                          root_cache);
3823                 }
3824 next:
3825                 path.slots[0]++;
3826         }
3827 out:
3828         btrfs_release_path(&path);
3829         if (err)
3830                 free_extent_cache_tree(&wc.shared);
3831         if (!cache_tree_empty(&wc.shared))
3832                 fprintf(stderr, "warning line %d\n", __LINE__);
3833
3834         task_stop(ctx.info);
3835
3836         return err;
3837 }
3838
3839 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3840 {
3841         struct list_head *cur = rec->backrefs.next;
3842         struct extent_backref *back;
3843         struct tree_backref *tback;
3844         struct data_backref *dback;
3845         u64 found = 0;
3846         int err = 0;
3847
3848         while(cur != &rec->backrefs) {
3849                 back = to_extent_backref(cur);
3850                 cur = cur->next;
3851                 if (!back->found_extent_tree) {
3852                         err = 1;
3853                         if (!print_errs)
3854                                 goto out;
3855                         if (back->is_data) {
3856                                 dback = to_data_backref(back);
3857                                 fprintf(stderr, "Backref %llu %s %llu"
3858                                         " owner %llu offset %llu num_refs %lu"
3859                                         " not found in extent tree\n",
3860                                         (unsigned long long)rec->start,
3861                                         back->full_backref ?
3862                                         "parent" : "root",
3863                                         back->full_backref ?
3864                                         (unsigned long long)dback->parent:
3865                                         (unsigned long long)dback->root,
3866                                         (unsigned long long)dback->owner,
3867                                         (unsigned long long)dback->offset,
3868                                         (unsigned long)dback->num_refs);
3869                         } else {
3870                                 tback = to_tree_backref(back);
3871                                 fprintf(stderr, "Backref %llu parent %llu"
3872                                         " root %llu not found in extent tree\n",
3873                                         (unsigned long long)rec->start,
3874                                         (unsigned long long)tback->parent,
3875                                         (unsigned long long)tback->root);
3876                         }
3877                 }
3878                 if (!back->is_data && !back->found_ref) {
3879                         err = 1;
3880                         if (!print_errs)
3881                                 goto out;
3882                         tback = to_tree_backref(back);
3883                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3884                                 (unsigned long long)rec->start,
3885                                 back->full_backref ? "parent" : "root",
3886                                 back->full_backref ?
3887                                 (unsigned long long)tback->parent :
3888                                 (unsigned long long)tback->root, back);
3889                 }
3890                 if (back->is_data) {
3891                         dback = to_data_backref(back);
3892                         if (dback->found_ref != dback->num_refs) {
3893                                 err = 1;
3894                                 if (!print_errs)
3895                                         goto out;
3896                                 fprintf(stderr, "Incorrect local backref count"
3897                                         " on %llu %s %llu owner %llu"
3898                                         " offset %llu found %u wanted %u back %p\n",
3899                                         (unsigned long long)rec->start,
3900                                         back->full_backref ?
3901                                         "parent" : "root",
3902                                         back->full_backref ?
3903                                         (unsigned long long)dback->parent:
3904                                         (unsigned long long)dback->root,
3905                                         (unsigned long long)dback->owner,
3906                                         (unsigned long long)dback->offset,
3907                                         dback->found_ref, dback->num_refs, back);
3908                         }
3909                         if (dback->disk_bytenr != rec->start) {
3910                                 err = 1;
3911                                 if (!print_errs)
3912                                         goto out;
3913                                 fprintf(stderr, "Backref disk bytenr does not"
3914                                         " match extent record, bytenr=%llu, "
3915                                         "ref bytenr=%llu\n",
3916                                         (unsigned long long)rec->start,
3917                                         (unsigned long long)dback->disk_bytenr);
3918                         }
3919
3920                         if (dback->bytes != rec->nr) {
3921                                 err = 1;
3922                                 if (!print_errs)
3923                                         goto out;
3924                                 fprintf(stderr, "Backref bytes do not match "
3925                                         "extent backref, bytenr=%llu, ref "
3926                                         "bytes=%llu, backref bytes=%llu\n",
3927                                         (unsigned long long)rec->start,
3928                                         (unsigned long long)rec->nr,
3929                                         (unsigned long long)dback->bytes);
3930                         }
3931                 }
3932                 if (!back->is_data) {
3933                         found += 1;
3934                 } else {
3935                         dback = to_data_backref(back);
3936                         found += dback->found_ref;
3937                 }
3938         }
3939         if (found != rec->refs) {
3940                 err = 1;
3941                 if (!print_errs)
3942                         goto out;
3943                 fprintf(stderr, "Incorrect global backref count "
3944                         "on %llu found %llu wanted %llu\n",
3945                         (unsigned long long)rec->start,
3946                         (unsigned long long)found,
3947                         (unsigned long long)rec->refs);
3948         }
3949 out:
3950         return err;
3951 }
3952
3953 static int free_all_extent_backrefs(struct extent_record *rec)
3954 {
3955         struct extent_backref *back;
3956         struct list_head *cur;
3957         while (!list_empty(&rec->backrefs)) {
3958                 cur = rec->backrefs.next;
3959                 back = to_extent_backref(cur);
3960                 list_del(cur);
3961                 free(back);
3962         }
3963         return 0;
3964 }
3965
3966 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3967                                      struct cache_tree *extent_cache)
3968 {
3969         struct cache_extent *cache;
3970         struct extent_record *rec;
3971
3972         while (1) {
3973                 cache = first_cache_extent(extent_cache);
3974                 if (!cache)
3975                         break;
3976                 rec = container_of(cache, struct extent_record, cache);
3977                 remove_cache_extent(extent_cache, cache);
3978                 free_all_extent_backrefs(rec);
3979                 free(rec);
3980         }
3981 }
3982
3983 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3984                                  struct extent_record *rec)
3985 {
3986         if (rec->content_checked && rec->owner_ref_checked &&
3987             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3988             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3989             !rec->bad_full_backref && !rec->crossing_stripes &&
3990             !rec->wrong_chunk_type) {
3991                 remove_cache_extent(extent_cache, &rec->cache);
3992                 free_all_extent_backrefs(rec);
3993                 list_del_init(&rec->list);
3994                 free(rec);
3995         }
3996         return 0;
3997 }
3998
3999 static int check_owner_ref(struct btrfs_root *root,
4000                             struct extent_record *rec,
4001                             struct extent_buffer *buf)
4002 {
4003         struct extent_backref *node;
4004         struct tree_backref *back;
4005         struct btrfs_root *ref_root;
4006         struct btrfs_key key;
4007         struct btrfs_path path;
4008         struct extent_buffer *parent;
4009         int level;
4010         int found = 0;
4011         int ret;
4012
4013         list_for_each_entry(node, &rec->backrefs, list) {
4014                 if (node->is_data)
4015                         continue;
4016                 if (!node->found_ref)
4017                         continue;
4018                 if (node->full_backref)
4019                         continue;
4020                 back = to_tree_backref(node);
4021                 if (btrfs_header_owner(buf) == back->root)
4022                         return 0;
4023         }
4024         BUG_ON(rec->is_root);
4025
4026         /* try to find the block by search corresponding fs tree */
4027         key.objectid = btrfs_header_owner(buf);
4028         key.type = BTRFS_ROOT_ITEM_KEY;
4029         key.offset = (u64)-1;
4030
4031         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4032         if (IS_ERR(ref_root))
4033                 return 1;
4034
4035         level = btrfs_header_level(buf);
4036         if (level == 0)
4037                 btrfs_item_key_to_cpu(buf, &key, 0);
4038         else
4039                 btrfs_node_key_to_cpu(buf, &key, 0);
4040
4041         btrfs_init_path(&path);
4042         path.lowest_level = level + 1;
4043         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4044         if (ret < 0)
4045                 return 0;
4046
4047         parent = path.nodes[level + 1];
4048         if (parent && buf->start == btrfs_node_blockptr(parent,
4049                                                         path.slots[level + 1]))
4050                 found = 1;
4051
4052         btrfs_release_path(&path);
4053         return found ? 0 : 1;
4054 }
4055
4056 static int is_extent_tree_record(struct extent_record *rec)
4057 {
4058         struct list_head *cur = rec->backrefs.next;
4059         struct extent_backref *node;
4060         struct tree_backref *back;
4061         int is_extent = 0;
4062
4063         while(cur != &rec->backrefs) {
4064                 node = to_extent_backref(cur);
4065                 cur = cur->next;
4066                 if (node->is_data)
4067                         return 0;
4068                 back = to_tree_backref(node);
4069                 if (node->full_backref)
4070                         return 0;
4071                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4072                         is_extent = 1;
4073         }
4074         return is_extent;
4075 }
4076
4077
4078 static int record_bad_block_io(struct btrfs_fs_info *info,
4079                                struct cache_tree *extent_cache,
4080                                u64 start, u64 len)
4081 {
4082         struct extent_record *rec;
4083         struct cache_extent *cache;
4084         struct btrfs_key key;
4085
4086         cache = lookup_cache_extent(extent_cache, start, len);
4087         if (!cache)
4088                 return 0;
4089
4090         rec = container_of(cache, struct extent_record, cache);
4091         if (!is_extent_tree_record(rec))
4092                 return 0;
4093
4094         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4095         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4096 }
4097
4098 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4099                        struct extent_buffer *buf, int slot)
4100 {
4101         if (btrfs_header_level(buf)) {
4102                 struct btrfs_key_ptr ptr1, ptr2;
4103
4104                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4105                                    sizeof(struct btrfs_key_ptr));
4106                 read_extent_buffer(buf, &ptr2,
4107                                    btrfs_node_key_ptr_offset(slot + 1),
4108                                    sizeof(struct btrfs_key_ptr));
4109                 write_extent_buffer(buf, &ptr1,
4110                                     btrfs_node_key_ptr_offset(slot + 1),
4111                                     sizeof(struct btrfs_key_ptr));
4112                 write_extent_buffer(buf, &ptr2,
4113                                     btrfs_node_key_ptr_offset(slot),
4114                                     sizeof(struct btrfs_key_ptr));
4115                 if (slot == 0) {
4116                         struct btrfs_disk_key key;
4117                         btrfs_node_key(buf, &key, 0);
4118                         btrfs_fixup_low_keys(root, path, &key,
4119                                              btrfs_header_level(buf) + 1);
4120                 }
4121         } else {
4122                 struct btrfs_item *item1, *item2;
4123                 struct btrfs_key k1, k2;
4124                 char *item1_data, *item2_data;
4125                 u32 item1_offset, item2_offset, item1_size, item2_size;
4126
4127                 item1 = btrfs_item_nr(slot);
4128                 item2 = btrfs_item_nr(slot + 1);
4129                 btrfs_item_key_to_cpu(buf, &k1, slot);
4130                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4131                 item1_offset = btrfs_item_offset(buf, item1);
4132                 item2_offset = btrfs_item_offset(buf, item2);
4133                 item1_size = btrfs_item_size(buf, item1);
4134                 item2_size = btrfs_item_size(buf, item2);
4135
4136                 item1_data = malloc(item1_size);
4137                 if (!item1_data)
4138                         return -ENOMEM;
4139                 item2_data = malloc(item2_size);
4140                 if (!item2_data) {
4141                         free(item1_data);
4142                         return -ENOMEM;
4143                 }
4144
4145                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4146                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4147
4148                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4149                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4150                 free(item1_data);
4151                 free(item2_data);
4152
4153                 btrfs_set_item_offset(buf, item1, item2_offset);
4154                 btrfs_set_item_offset(buf, item2, item1_offset);
4155                 btrfs_set_item_size(buf, item1, item2_size);
4156                 btrfs_set_item_size(buf, item2, item1_size);
4157
4158                 path->slots[0] = slot;
4159                 btrfs_set_item_key_unsafe(root, path, &k2);
4160                 path->slots[0] = slot + 1;
4161                 btrfs_set_item_key_unsafe(root, path, &k1);
4162         }
4163         return 0;
4164 }
4165
4166 static int fix_key_order(struct btrfs_trans_handle *trans,
4167                          struct btrfs_root *root,
4168                          struct btrfs_path *path)
4169 {
4170         struct extent_buffer *buf;
4171         struct btrfs_key k1, k2;
4172         int i;
4173         int level = path->lowest_level;
4174         int ret = -EIO;
4175
4176         buf = path->nodes[level];
4177         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4178                 if (level) {
4179                         btrfs_node_key_to_cpu(buf, &k1, i);
4180                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4181                 } else {
4182                         btrfs_item_key_to_cpu(buf, &k1, i);
4183                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4184                 }
4185                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4186                         continue;
4187                 ret = swap_values(root, path, buf, i);
4188                 if (ret)
4189                         break;
4190                 btrfs_mark_buffer_dirty(buf);
4191                 i = 0;
4192         }
4193         return ret;
4194 }
4195
4196 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4197                              struct btrfs_root *root,
4198                              struct btrfs_path *path,
4199                              struct extent_buffer *buf, int slot)
4200 {
4201         struct btrfs_key key;
4202         int nritems = btrfs_header_nritems(buf);
4203
4204         btrfs_item_key_to_cpu(buf, &key, slot);
4205
4206         /* These are all the keys we can deal with missing. */
4207         if (key.type != BTRFS_DIR_INDEX_KEY &&
4208             key.type != BTRFS_EXTENT_ITEM_KEY &&
4209             key.type != BTRFS_METADATA_ITEM_KEY &&
4210             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4211             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4212                 return -1;
4213
4214         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4215                (unsigned long long)key.objectid, key.type,
4216                (unsigned long long)key.offset, slot, buf->start);
4217         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4218                               btrfs_item_nr_offset(slot + 1),
4219                               sizeof(struct btrfs_item) *
4220                               (nritems - slot - 1));
4221         btrfs_set_header_nritems(buf, nritems - 1);
4222         if (slot == 0) {
4223                 struct btrfs_disk_key disk_key;
4224
4225                 btrfs_item_key(buf, &disk_key, 0);
4226                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4227         }
4228         btrfs_mark_buffer_dirty(buf);
4229         return 0;
4230 }
4231
4232 static int fix_item_offset(struct btrfs_trans_handle *trans,
4233                            struct btrfs_root *root,
4234                            struct btrfs_path *path)
4235 {
4236         struct extent_buffer *buf;
4237         int i;
4238         int ret = 0;
4239
4240         /* We should only get this for leaves */
4241         BUG_ON(path->lowest_level);
4242         buf = path->nodes[0];
4243 again:
4244         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4245                 unsigned int shift = 0, offset;
4246
4247                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4248                     BTRFS_LEAF_DATA_SIZE(root)) {
4249                         if (btrfs_item_end_nr(buf, i) >
4250                             BTRFS_LEAF_DATA_SIZE(root)) {
4251                                 ret = delete_bogus_item(trans, root, path,
4252                                                         buf, i);
4253                                 if (!ret)
4254                                         goto again;
4255                                 fprintf(stderr, "item is off the end of the "
4256                                         "leaf, can't fix\n");
4257                                 ret = -EIO;
4258                                 break;
4259                         }
4260                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4261                                 btrfs_item_end_nr(buf, i);
4262                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4263                            btrfs_item_offset_nr(buf, i - 1)) {
4264                         if (btrfs_item_end_nr(buf, i) >
4265                             btrfs_item_offset_nr(buf, i - 1)) {
4266                                 ret = delete_bogus_item(trans, root, path,
4267                                                         buf, i);
4268                                 if (!ret)
4269                                         goto again;
4270                                 fprintf(stderr, "items overlap, can't fix\n");
4271                                 ret = -EIO;
4272                                 break;
4273                         }
4274                         shift = btrfs_item_offset_nr(buf, i - 1) -
4275                                 btrfs_item_end_nr(buf, i);
4276                 }
4277                 if (!shift)
4278                         continue;
4279
4280                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4281                        i, shift, (unsigned long long)buf->start);
4282                 offset = btrfs_item_offset_nr(buf, i);
4283                 memmove_extent_buffer(buf,
4284                                       btrfs_leaf_data(buf) + offset + shift,
4285                                       btrfs_leaf_data(buf) + offset,
4286                                       btrfs_item_size_nr(buf, i));
4287                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4288                                       offset + shift);
4289                 btrfs_mark_buffer_dirty(buf);
4290         }
4291
4292         /*
4293          * We may have moved things, in which case we want to exit so we don't
4294          * write those changes out.  Once we have proper abort functionality in
4295          * progs this can be changed to something nicer.
4296          */
4297         BUG_ON(ret);
4298         return ret;
4299 }
4300
4301 /*
4302  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4303  * then just return -EIO.
4304  */
4305 static int try_to_fix_bad_block(struct btrfs_root *root,
4306                                 struct extent_buffer *buf,
4307                                 enum btrfs_tree_block_status status)
4308 {
4309         struct btrfs_trans_handle *trans;
4310         struct ulist *roots;
4311         struct ulist_node *node;
4312         struct btrfs_root *search_root;
4313         struct btrfs_path *path;
4314         struct ulist_iterator iter;
4315         struct btrfs_key root_key, key;
4316         int ret;
4317
4318         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4319             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4320                 return -EIO;
4321
4322         path = btrfs_alloc_path();
4323         if (!path)
4324                 return -EIO;
4325
4326         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4327                                    0, &roots);
4328         if (ret) {
4329                 btrfs_free_path(path);
4330                 return -EIO;
4331         }
4332
4333         ULIST_ITER_INIT(&iter);
4334         while ((node = ulist_next(roots, &iter))) {
4335                 root_key.objectid = node->val;
4336                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4337                 root_key.offset = (u64)-1;
4338
4339                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4340                 if (IS_ERR(root)) {
4341                         ret = -EIO;
4342                         break;
4343                 }
4344
4345
4346                 trans = btrfs_start_transaction(search_root, 0);
4347                 if (IS_ERR(trans)) {
4348                         ret = PTR_ERR(trans);
4349                         break;
4350                 }
4351
4352                 path->lowest_level = btrfs_header_level(buf);
4353                 path->skip_check_block = 1;
4354                 if (path->lowest_level)
4355                         btrfs_node_key_to_cpu(buf, &key, 0);
4356                 else
4357                         btrfs_item_key_to_cpu(buf, &key, 0);
4358                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4359                 if (ret) {
4360                         ret = -EIO;
4361                         btrfs_commit_transaction(trans, search_root);
4362                         break;
4363                 }
4364                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4365                         ret = fix_key_order(trans, search_root, path);
4366                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4367                         ret = fix_item_offset(trans, search_root, path);
4368                 if (ret) {
4369                         btrfs_commit_transaction(trans, search_root);
4370                         break;
4371                 }
4372                 btrfs_release_path(path);
4373                 btrfs_commit_transaction(trans, search_root);
4374         }
4375         ulist_free(roots);
4376         btrfs_free_path(path);
4377         return ret;
4378 }
4379
4380 static int check_block(struct btrfs_root *root,
4381                        struct cache_tree *extent_cache,
4382                        struct extent_buffer *buf, u64 flags)
4383 {
4384         struct extent_record *rec;
4385         struct cache_extent *cache;
4386         struct btrfs_key key;
4387         enum btrfs_tree_block_status status;
4388         int ret = 0;
4389         int level;
4390
4391         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4392         if (!cache)
4393                 return 1;
4394         rec = container_of(cache, struct extent_record, cache);
4395         rec->generation = btrfs_header_generation(buf);
4396
4397         level = btrfs_header_level(buf);
4398         if (btrfs_header_nritems(buf) > 0) {
4399
4400                 if (level == 0)
4401                         btrfs_item_key_to_cpu(buf, &key, 0);
4402                 else
4403                         btrfs_node_key_to_cpu(buf, &key, 0);
4404
4405                 rec->info_objectid = key.objectid;
4406         }
4407         rec->info_level = level;
4408
4409         if (btrfs_is_leaf(buf))
4410                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4411         else
4412                 status = btrfs_check_node(root, &rec->parent_key, buf);
4413
4414         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4415                 if (repair)
4416                         status = try_to_fix_bad_block(root, buf, status);
4417                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4418                         ret = -EIO;
4419                         fprintf(stderr, "bad block %llu\n",
4420                                 (unsigned long long)buf->start);
4421                 } else {
4422                         /*
4423                          * Signal to callers we need to start the scan over
4424                          * again since we'll have cowed blocks.
4425                          */
4426                         ret = -EAGAIN;
4427                 }
4428         } else {
4429                 rec->content_checked = 1;
4430                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4431                         rec->owner_ref_checked = 1;
4432                 else {
4433                         ret = check_owner_ref(root, rec, buf);
4434                         if (!ret)
4435                                 rec->owner_ref_checked = 1;
4436                 }
4437         }
4438         if (!ret)
4439                 maybe_free_extent_rec(extent_cache, rec);
4440         return ret;
4441 }
4442
4443 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4444                                                 u64 parent, u64 root)
4445 {
4446         struct list_head *cur = rec->backrefs.next;
4447         struct extent_backref *node;
4448         struct tree_backref *back;
4449
4450         while(cur != &rec->backrefs) {
4451                 node = to_extent_backref(cur);
4452                 cur = cur->next;
4453                 if (node->is_data)
4454                         continue;
4455                 back = to_tree_backref(node);
4456                 if (parent > 0) {
4457                         if (!node->full_backref)
4458                                 continue;
4459                         if (parent == back->parent)
4460                                 return back;
4461                 } else {
4462                         if (node->full_backref)
4463                                 continue;
4464                         if (back->root == root)
4465                                 return back;
4466                 }
4467         }
4468         return NULL;
4469 }
4470
4471 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4472                                                 u64 parent, u64 root)
4473 {
4474         struct tree_backref *ref = malloc(sizeof(*ref));
4475
4476         if (!ref)
4477                 return NULL;
4478         memset(&ref->node, 0, sizeof(ref->node));
4479         if (parent > 0) {
4480                 ref->parent = parent;
4481                 ref->node.full_backref = 1;
4482         } else {
4483                 ref->root = root;
4484                 ref->node.full_backref = 0;
4485         }
4486         list_add_tail(&ref->node.list, &rec->backrefs);
4487
4488         return ref;
4489 }
4490
4491 static struct data_backref *find_data_backref(struct extent_record *rec,
4492                                                 u64 parent, u64 root,
4493                                                 u64 owner, u64 offset,
4494                                                 int found_ref,
4495                                                 u64 disk_bytenr, u64 bytes)
4496 {
4497         struct list_head *cur = rec->backrefs.next;
4498         struct extent_backref *node;
4499         struct data_backref *back;
4500
4501         while(cur != &rec->backrefs) {
4502                 node = to_extent_backref(cur);
4503                 cur = cur->next;
4504                 if (!node->is_data)
4505                         continue;
4506                 back = to_data_backref(node);
4507                 if (parent > 0) {
4508                         if (!node->full_backref)
4509                                 continue;
4510                         if (parent == back->parent)
4511                                 return back;
4512                 } else {
4513                         if (node->full_backref)
4514                                 continue;
4515                         if (back->root == root && back->owner == owner &&
4516                             back->offset == offset) {
4517                                 if (found_ref && node->found_ref &&
4518                                     (back->bytes != bytes ||
4519                                     back->disk_bytenr != disk_bytenr))
4520                                         continue;
4521                                 return back;
4522                         }
4523                 }
4524         }
4525         return NULL;
4526 }
4527
4528 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root,
4530                                                 u64 owner, u64 offset,
4531                                                 u64 max_size)
4532 {
4533         struct data_backref *ref = malloc(sizeof(*ref));
4534
4535         if (!ref)
4536                 return NULL;
4537         memset(&ref->node, 0, sizeof(ref->node));
4538         ref->node.is_data = 1;
4539
4540         if (parent > 0) {
4541                 ref->parent = parent;
4542                 ref->owner = 0;
4543                 ref->offset = 0;
4544                 ref->node.full_backref = 1;
4545         } else {
4546                 ref->root = root;
4547                 ref->owner = owner;
4548                 ref->offset = offset;
4549                 ref->node.full_backref = 0;
4550         }
4551         ref->bytes = max_size;
4552         ref->found_ref = 0;
4553         ref->num_refs = 0;
4554         list_add_tail(&ref->node.list, &rec->backrefs);
4555         if (max_size > rec->max_size)
4556                 rec->max_size = max_size;
4557         return ref;
4558 }
4559
4560 /* Check if the type of extent matches with its chunk */
4561 static void check_extent_type(struct extent_record *rec)
4562 {
4563         struct btrfs_block_group_cache *bg_cache;
4564
4565         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4566         if (!bg_cache)
4567                 return;
4568
4569         /* data extent, check chunk directly*/
4570         if (!rec->metadata) {
4571                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4572                         rec->wrong_chunk_type = 1;
4573                 return;
4574         }
4575
4576         /* metadata extent, check the obvious case first */
4577         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4578                                  BTRFS_BLOCK_GROUP_METADATA))) {
4579                 rec->wrong_chunk_type = 1;
4580                 return;
4581         }
4582
4583         /*
4584          * Check SYSTEM extent, as it's also marked as metadata, we can only
4585          * make sure it's a SYSTEM extent by its backref
4586          */
4587         if (!list_empty(&rec->backrefs)) {
4588                 struct extent_backref *node;
4589                 struct tree_backref *tback;
4590                 u64 bg_type;
4591
4592                 node = to_extent_backref(rec->backrefs.next);
4593                 if (node->is_data) {
4594                         /* tree block shouldn't have data backref */
4595                         rec->wrong_chunk_type = 1;
4596                         return;
4597                 }
4598                 tback = container_of(node, struct tree_backref, node);
4599
4600                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4601                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4602                 else
4603                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4604                 if (!(bg_cache->flags & bg_type))
4605                         rec->wrong_chunk_type = 1;
4606         }
4607 }
4608
4609 /*
4610  * Allocate a new extent record, fill default values from @tmpl and insert int
4611  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4612  * the cache, otherwise it fails.
4613  */
4614 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4615                 struct extent_record *tmpl)
4616 {
4617         struct extent_record *rec;
4618         int ret = 0;
4619
4620         rec = malloc(sizeof(*rec));
4621         if (!rec)
4622                 return -ENOMEM;
4623         rec->start = tmpl->start;
4624         rec->max_size = tmpl->max_size;
4625         rec->nr = max(tmpl->nr, tmpl->max_size);
4626         rec->found_rec = tmpl->found_rec;
4627         rec->content_checked = tmpl->content_checked;
4628         rec->owner_ref_checked = tmpl->owner_ref_checked;
4629         rec->num_duplicates = 0;
4630         rec->metadata = tmpl->metadata;
4631         rec->flag_block_full_backref = FLAG_UNSET;
4632         rec->bad_full_backref = 0;
4633         rec->crossing_stripes = 0;
4634         rec->wrong_chunk_type = 0;
4635         rec->is_root = tmpl->is_root;
4636         rec->refs = tmpl->refs;
4637         rec->extent_item_refs = tmpl->extent_item_refs;
4638         rec->parent_generation = tmpl->parent_generation;
4639         INIT_LIST_HEAD(&rec->backrefs);
4640         INIT_LIST_HEAD(&rec->dups);
4641         INIT_LIST_HEAD(&rec->list);
4642         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4643         rec->cache.start = tmpl->start;
4644         rec->cache.size = tmpl->nr;
4645         ret = insert_cache_extent(extent_cache, &rec->cache);
4646         BUG_ON(ret);
4647         bytes_used += rec->nr;
4648
4649         if (tmpl->metadata)
4650                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4651                                 global_info->tree_root->nodesize);
4652         check_extent_type(rec);
4653         return ret;
4654 }
4655
4656 /*
4657  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4658  * some are hints:
4659  * - refs              - if found, increase refs
4660  * - is_root           - if found, set
4661  * - content_checked   - if found, set
4662  * - owner_ref_checked - if found, set
4663  *
4664  * If not found, create a new one, initialize and insert.
4665  */
4666 static int add_extent_rec(struct cache_tree *extent_cache,
4667                 struct extent_record *tmpl)
4668 {
4669         struct extent_record *rec;
4670         struct cache_extent *cache;
4671         int ret = 0;
4672         int dup = 0;
4673
4674         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4675         if (cache) {
4676                 rec = container_of(cache, struct extent_record, cache);
4677                 if (tmpl->refs)
4678                         rec->refs++;
4679                 if (rec->nr == 1)
4680                         rec->nr = max(tmpl->nr, tmpl->max_size);
4681
4682                 /*
4683                  * We need to make sure to reset nr to whatever the extent
4684                  * record says was the real size, this way we can compare it to
4685                  * the backrefs.
4686                  */
4687                 if (tmpl->found_rec) {
4688                         if (tmpl->start != rec->start || rec->found_rec) {
4689                                 struct extent_record *tmp;
4690
4691                                 dup = 1;
4692                                 if (list_empty(&rec->list))
4693                                         list_add_tail(&rec->list,
4694                                                       &duplicate_extents);
4695
4696                                 /*
4697                                  * We have to do this song and dance in case we
4698                                  * find an extent record that falls inside of
4699                                  * our current extent record but does not have
4700                                  * the same objectid.
4701                                  */
4702                                 tmp = malloc(sizeof(*tmp));
4703                                 if (!tmp)
4704                                         return -ENOMEM;
4705                                 tmp->start = tmpl->start;
4706                                 tmp->max_size = tmpl->max_size;
4707                                 tmp->nr = tmpl->nr;
4708                                 tmp->found_rec = 1;
4709                                 tmp->metadata = tmpl->metadata;
4710                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4711                                 INIT_LIST_HEAD(&tmp->list);
4712                                 list_add_tail(&tmp->list, &rec->dups);
4713                                 rec->num_duplicates++;
4714                         } else {
4715                                 rec->nr = tmpl->nr;
4716                                 rec->found_rec = 1;
4717                         }
4718                 }
4719
4720                 if (tmpl->extent_item_refs && !dup) {
4721                         if (rec->extent_item_refs) {
4722                                 fprintf(stderr, "block %llu rec "
4723                                         "extent_item_refs %llu, passed %llu\n",
4724                                         (unsigned long long)tmpl->start,
4725                                         (unsigned long long)
4726                                                         rec->extent_item_refs,
4727                                         (unsigned long long)tmpl->extent_item_refs);
4728                         }
4729                         rec->extent_item_refs = tmpl->extent_item_refs;
4730                 }
4731                 if (tmpl->is_root)
4732                         rec->is_root = 1;
4733                 if (tmpl->content_checked)
4734                         rec->content_checked = 1;
4735                 if (tmpl->owner_ref_checked)
4736                         rec->owner_ref_checked = 1;
4737                 memcpy(&rec->parent_key, &tmpl->parent_key,
4738                                 sizeof(tmpl->parent_key));
4739                 if (tmpl->parent_generation)
4740                         rec->parent_generation = tmpl->parent_generation;
4741                 if (rec->max_size < tmpl->max_size)
4742                         rec->max_size = tmpl->max_size;
4743
4744                 /*
4745                  * A metadata extent can't cross stripe_len boundary, otherwise
4746                  * kernel scrub won't be able to handle it.
4747                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4748                  * it.
4749                  */
4750                 if (tmpl->metadata)
4751                         rec->crossing_stripes = check_crossing_stripes(
4752                                 rec->start, global_info->tree_root->nodesize);
4753                 check_extent_type(rec);
4754                 maybe_free_extent_rec(extent_cache, rec);
4755                 return ret;
4756         }
4757
4758         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4759
4760         return ret;
4761 }
4762
4763 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4764                             u64 parent, u64 root, int found_ref)
4765 {
4766         struct extent_record *rec;
4767         struct tree_backref *back;
4768         struct cache_extent *cache;
4769
4770         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4771         if (!cache) {
4772                 struct extent_record tmpl;
4773
4774                 memset(&tmpl, 0, sizeof(tmpl));
4775                 tmpl.start = bytenr;
4776                 tmpl.nr = 1;
4777                 tmpl.metadata = 1;
4778
4779                 add_extent_rec_nolookup(extent_cache, &tmpl);
4780
4781                 /* really a bug in cache_extent implement now */
4782                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4783                 if (!cache)
4784                         return -ENOENT;
4785         }
4786
4787         rec = container_of(cache, struct extent_record, cache);
4788         if (rec->start != bytenr) {
4789                 /*
4790                  * Several cause, from unaligned bytenr to over lapping extents
4791                  */
4792                 return -EEXIST;
4793         }
4794
4795         back = find_tree_backref(rec, parent, root);
4796         if (!back) {
4797                 back = alloc_tree_backref(rec, parent, root);
4798                 if (!back)
4799                         return -ENOMEM;
4800         }
4801
4802         if (found_ref) {
4803                 if (back->node.found_ref) {
4804                         fprintf(stderr, "Extent back ref already exists "
4805                                 "for %llu parent %llu root %llu \n",
4806                                 (unsigned long long)bytenr,
4807                                 (unsigned long long)parent,
4808                                 (unsigned long long)root);
4809                 }
4810                 back->node.found_ref = 1;
4811         } else {
4812                 if (back->node.found_extent_tree) {
4813                         fprintf(stderr, "Extent back ref already exists "
4814                                 "for %llu parent %llu root %llu \n",
4815                                 (unsigned long long)bytenr,
4816                                 (unsigned long long)parent,
4817                                 (unsigned long long)root);
4818                 }
4819                 back->node.found_extent_tree = 1;
4820         }
4821         check_extent_type(rec);
4822         maybe_free_extent_rec(extent_cache, rec);
4823         return 0;
4824 }
4825
4826 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4827                             u64 parent, u64 root, u64 owner, u64 offset,
4828                             u32 num_refs, int found_ref, u64 max_size)
4829 {
4830         struct extent_record *rec;
4831         struct data_backref *back;
4832         struct cache_extent *cache;
4833
4834         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4835         if (!cache) {
4836                 struct extent_record tmpl;
4837
4838                 memset(&tmpl, 0, sizeof(tmpl));
4839                 tmpl.start = bytenr;
4840                 tmpl.nr = 1;
4841                 tmpl.max_size = max_size;
4842
4843                 add_extent_rec_nolookup(extent_cache, &tmpl);
4844
4845                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4846                 if (!cache)
4847                         abort();
4848         }
4849
4850         rec = container_of(cache, struct extent_record, cache);
4851         if (rec->max_size < max_size)
4852                 rec->max_size = max_size;
4853
4854         /*
4855          * If found_ref is set then max_size is the real size and must match the
4856          * existing refs.  So if we have already found a ref then we need to
4857          * make sure that this ref matches the existing one, otherwise we need
4858          * to add a new backref so we can notice that the backrefs don't match
4859          * and we need to figure out who is telling the truth.  This is to
4860          * account for that awful fsync bug I introduced where we'd end up with
4861          * a btrfs_file_extent_item that would have its length include multiple
4862          * prealloc extents or point inside of a prealloc extent.
4863          */
4864         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4865                                  bytenr, max_size);
4866         if (!back) {
4867                 back = alloc_data_backref(rec, parent, root, owner, offset,
4868                                           max_size);
4869                 BUG_ON(!back);
4870         }
4871
4872         if (found_ref) {
4873                 BUG_ON(num_refs != 1);
4874                 if (back->node.found_ref)
4875                         BUG_ON(back->bytes != max_size);
4876                 back->node.found_ref = 1;
4877                 back->found_ref += 1;
4878                 back->bytes = max_size;
4879                 back->disk_bytenr = bytenr;
4880                 rec->refs += 1;
4881                 rec->content_checked = 1;
4882                 rec->owner_ref_checked = 1;
4883         } else {
4884                 if (back->node.found_extent_tree) {
4885                         fprintf(stderr, "Extent back ref already exists "
4886                                 "for %llu parent %llu root %llu "
4887                                 "owner %llu offset %llu num_refs %lu\n",
4888                                 (unsigned long long)bytenr,
4889                                 (unsigned long long)parent,
4890                                 (unsigned long long)root,
4891                                 (unsigned long long)owner,
4892                                 (unsigned long long)offset,
4893                                 (unsigned long)num_refs);
4894                 }
4895                 back->num_refs = num_refs;
4896                 back->node.found_extent_tree = 1;
4897         }
4898         maybe_free_extent_rec(extent_cache, rec);
4899         return 0;
4900 }
4901
4902 static int add_pending(struct cache_tree *pending,
4903                        struct cache_tree *seen, u64 bytenr, u32 size)
4904 {
4905         int ret;
4906         ret = add_cache_extent(seen, bytenr, size);
4907         if (ret)
4908                 return ret;
4909         add_cache_extent(pending, bytenr, size);
4910         return 0;
4911 }
4912
4913 static int pick_next_pending(struct cache_tree *pending,
4914                         struct cache_tree *reada,
4915                         struct cache_tree *nodes,
4916                         u64 last, struct block_info *bits, int bits_nr,
4917                         int *reada_bits)
4918 {
4919         unsigned long node_start = last;
4920         struct cache_extent *cache;
4921         int ret;
4922
4923         cache = search_cache_extent(reada, 0);
4924         if (cache) {
4925                 bits[0].start = cache->start;
4926                 bits[0].size = cache->size;
4927                 *reada_bits = 1;
4928                 return 1;
4929         }
4930         *reada_bits = 0;
4931         if (node_start > 32768)
4932                 node_start -= 32768;
4933
4934         cache = search_cache_extent(nodes, node_start);
4935         if (!cache)
4936                 cache = search_cache_extent(nodes, 0);
4937
4938         if (!cache) {
4939                  cache = search_cache_extent(pending, 0);
4940                  if (!cache)
4941                          return 0;
4942                  ret = 0;
4943                  do {
4944                          bits[ret].start = cache->start;
4945                          bits[ret].size = cache->size;
4946                          cache = next_cache_extent(cache);
4947                          ret++;
4948                  } while (cache && ret < bits_nr);
4949                  return ret;
4950         }
4951
4952         ret = 0;
4953         do {
4954                 bits[ret].start = cache->start;
4955                 bits[ret].size = cache->size;
4956                 cache = next_cache_extent(cache);
4957                 ret++;
4958         } while (cache && ret < bits_nr);
4959
4960         if (bits_nr - ret > 8) {
4961                 u64 lookup = bits[0].start + bits[0].size;
4962                 struct cache_extent *next;
4963                 next = search_cache_extent(pending, lookup);
4964                 while(next) {
4965                         if (next->start - lookup > 32768)
4966                                 break;
4967                         bits[ret].start = next->start;
4968                         bits[ret].size = next->size;
4969                         lookup = next->start + next->size;
4970                         ret++;
4971                         if (ret == bits_nr)
4972                                 break;
4973                         next = next_cache_extent(next);
4974                         if (!next)
4975                                 break;
4976                 }
4977         }
4978         return ret;
4979 }
4980
4981 static void free_chunk_record(struct cache_extent *cache)
4982 {
4983         struct chunk_record *rec;
4984
4985         rec = container_of(cache, struct chunk_record, cache);
4986         list_del_init(&rec->list);
4987         list_del_init(&rec->dextents);
4988         free(rec);
4989 }
4990
4991 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4992 {
4993         cache_tree_free_extents(chunk_cache, free_chunk_record);
4994 }
4995
4996 static void free_device_record(struct rb_node *node)
4997 {
4998         struct device_record *rec;
4999
5000         rec = container_of(node, struct device_record, node);
5001         free(rec);
5002 }
5003
5004 FREE_RB_BASED_TREE(device_cache, free_device_record);
5005
5006 int insert_block_group_record(struct block_group_tree *tree,
5007                               struct block_group_record *bg_rec)
5008 {
5009         int ret;
5010
5011         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5012         if (ret)
5013                 return ret;
5014
5015         list_add_tail(&bg_rec->list, &tree->block_groups);
5016         return 0;
5017 }
5018
5019 static void free_block_group_record(struct cache_extent *cache)
5020 {
5021         struct block_group_record *rec;
5022
5023         rec = container_of(cache, struct block_group_record, cache);
5024         list_del_init(&rec->list);
5025         free(rec);
5026 }
5027
5028 void free_block_group_tree(struct block_group_tree *tree)
5029 {
5030         cache_tree_free_extents(&tree->tree, free_block_group_record);
5031 }
5032
5033 int insert_device_extent_record(struct device_extent_tree *tree,
5034                                 struct device_extent_record *de_rec)
5035 {
5036         int ret;
5037
5038         /*
5039          * Device extent is a bit different from the other extents, because
5040          * the extents which belong to the different devices may have the
5041          * same start and size, so we need use the special extent cache
5042          * search/insert functions.
5043          */
5044         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5045         if (ret)
5046                 return ret;
5047
5048         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5049         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5050         return 0;
5051 }
5052
5053 static void free_device_extent_record(struct cache_extent *cache)
5054 {
5055         struct device_extent_record *rec;
5056
5057         rec = container_of(cache, struct device_extent_record, cache);
5058         if (!list_empty(&rec->chunk_list))
5059                 list_del_init(&rec->chunk_list);
5060         if (!list_empty(&rec->device_list))
5061                 list_del_init(&rec->device_list);
5062         free(rec);
5063 }
5064
5065 void free_device_extent_tree(struct device_extent_tree *tree)
5066 {
5067         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5068 }
5069
5070 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5071 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5072                                  struct extent_buffer *leaf, int slot)
5073 {
5074         struct btrfs_extent_ref_v0 *ref0;
5075         struct btrfs_key key;
5076         int ret;
5077
5078         btrfs_item_key_to_cpu(leaf, &key, slot);
5079         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5080         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5081                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5082                                 0, 0);
5083         } else {
5084                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5085                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5086         }
5087         return ret;
5088 }
5089 #endif
5090
5091 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5092                                             struct btrfs_key *key,
5093                                             int slot)
5094 {
5095         struct btrfs_chunk *ptr;
5096         struct chunk_record *rec;
5097         int num_stripes, i;
5098
5099         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5100         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5101
5102         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5103         if (!rec) {
5104                 fprintf(stderr, "memory allocation failed\n");
5105                 exit(-1);
5106         }
5107
5108         INIT_LIST_HEAD(&rec->list);
5109         INIT_LIST_HEAD(&rec->dextents);
5110         rec->bg_rec = NULL;
5111
5112         rec->cache.start = key->offset;
5113         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5114
5115         rec->generation = btrfs_header_generation(leaf);
5116
5117         rec->objectid = key->objectid;
5118         rec->type = key->type;
5119         rec->offset = key->offset;
5120
5121         rec->length = rec->cache.size;
5122         rec->owner = btrfs_chunk_owner(leaf, ptr);
5123         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5124         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5125         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5126         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5127         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5128         rec->num_stripes = num_stripes;
5129         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5130
5131         for (i = 0; i < rec->num_stripes; ++i) {
5132                 rec->stripes[i].devid =
5133                         btrfs_stripe_devid_nr(leaf, ptr, i);
5134                 rec->stripes[i].offset =
5135                         btrfs_stripe_offset_nr(leaf, ptr, i);
5136                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5137                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5138                                 BTRFS_UUID_SIZE);
5139         }
5140
5141         return rec;
5142 }
5143
5144 static int process_chunk_item(struct cache_tree *chunk_cache,
5145                               struct btrfs_key *key, struct extent_buffer *eb,
5146                               int slot)
5147 {
5148         struct chunk_record *rec;
5149         struct btrfs_chunk *chunk;
5150         int ret = 0;
5151
5152         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5153         /*
5154          * Do extra check for this chunk item,
5155          *
5156          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5157          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5158          * and owner<->key_type check.
5159          */
5160         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5161                                       key->offset);
5162         if (ret < 0) {
5163                 error("chunk(%llu, %llu) is not valid, ignore it",
5164                       key->offset, btrfs_chunk_length(eb, chunk));
5165                 return 0;
5166         }
5167         rec = btrfs_new_chunk_record(eb, key, slot);
5168         ret = insert_cache_extent(chunk_cache, &rec->cache);
5169         if (ret) {
5170                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5171                         rec->offset, rec->length);
5172                 free(rec);
5173         }
5174
5175         return ret;
5176 }
5177
5178 static int process_device_item(struct rb_root *dev_cache,
5179                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5180 {
5181         struct btrfs_dev_item *ptr;
5182         struct device_record *rec;
5183         int ret = 0;
5184
5185         ptr = btrfs_item_ptr(eb,
5186                 slot, struct btrfs_dev_item);
5187
5188         rec = malloc(sizeof(*rec));
5189         if (!rec) {
5190                 fprintf(stderr, "memory allocation failed\n");
5191                 return -ENOMEM;
5192         }
5193
5194         rec->devid = key->offset;
5195         rec->generation = btrfs_header_generation(eb);
5196
5197         rec->objectid = key->objectid;
5198         rec->type = key->type;
5199         rec->offset = key->offset;
5200
5201         rec->devid = btrfs_device_id(eb, ptr);
5202         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5203         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5204
5205         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5206         if (ret) {
5207                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5208                 free(rec);
5209         }
5210
5211         return ret;
5212 }
5213
5214 struct block_group_record *
5215 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5216                              int slot)
5217 {
5218         struct btrfs_block_group_item *ptr;
5219         struct block_group_record *rec;
5220
5221         rec = calloc(1, sizeof(*rec));
5222         if (!rec) {
5223                 fprintf(stderr, "memory allocation failed\n");
5224                 exit(-1);
5225         }
5226
5227         rec->cache.start = key->objectid;
5228         rec->cache.size = key->offset;
5229
5230         rec->generation = btrfs_header_generation(leaf);
5231
5232         rec->objectid = key->objectid;
5233         rec->type = key->type;
5234         rec->offset = key->offset;
5235
5236         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5237         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5238
5239         INIT_LIST_HEAD(&rec->list);
5240
5241         return rec;
5242 }
5243
5244 static int process_block_group_item(struct block_group_tree *block_group_cache,
5245                                     struct btrfs_key *key,
5246                                     struct extent_buffer *eb, int slot)
5247 {
5248         struct block_group_record *rec;
5249         int ret = 0;
5250
5251         rec = btrfs_new_block_group_record(eb, key, slot);
5252         ret = insert_block_group_record(block_group_cache, rec);
5253         if (ret) {
5254                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5255                         rec->objectid, rec->offset);
5256                 free(rec);
5257         }
5258
5259         return ret;
5260 }
5261
5262 struct device_extent_record *
5263 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5264                                struct btrfs_key *key, int slot)
5265 {
5266         struct device_extent_record *rec;
5267         struct btrfs_dev_extent *ptr;
5268
5269         rec = calloc(1, sizeof(*rec));
5270         if (!rec) {
5271                 fprintf(stderr, "memory allocation failed\n");
5272                 exit(-1);
5273         }
5274
5275         rec->cache.objectid = key->objectid;
5276         rec->cache.start = key->offset;
5277
5278         rec->generation = btrfs_header_generation(leaf);
5279
5280         rec->objectid = key->objectid;
5281         rec->type = key->type;
5282         rec->offset = key->offset;
5283
5284         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5285         rec->chunk_objecteid =
5286                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5287         rec->chunk_offset =
5288                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5289         rec->length = btrfs_dev_extent_length(leaf, ptr);
5290         rec->cache.size = rec->length;
5291
5292         INIT_LIST_HEAD(&rec->chunk_list);
5293         INIT_LIST_HEAD(&rec->device_list);
5294
5295         return rec;
5296 }
5297
5298 static int
5299 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5300                            struct btrfs_key *key, struct extent_buffer *eb,
5301                            int slot)
5302 {
5303         struct device_extent_record *rec;
5304         int ret;
5305
5306         rec = btrfs_new_device_extent_record(eb, key, slot);
5307         ret = insert_device_extent_record(dev_extent_cache, rec);
5308         if (ret) {
5309                 fprintf(stderr,
5310                         "Device extent[%llu, %llu, %llu] existed.\n",
5311                         rec->objectid, rec->offset, rec->length);
5312                 free(rec);
5313         }
5314
5315         return ret;
5316 }
5317
5318 static int process_extent_item(struct btrfs_root *root,
5319                                struct cache_tree *extent_cache,
5320                                struct extent_buffer *eb, int slot)
5321 {
5322         struct btrfs_extent_item *ei;
5323         struct btrfs_extent_inline_ref *iref;
5324         struct btrfs_extent_data_ref *dref;
5325         struct btrfs_shared_data_ref *sref;
5326         struct btrfs_key key;
5327         struct extent_record tmpl;
5328         unsigned long end;
5329         unsigned long ptr;
5330         int ret;
5331         int type;
5332         u32 item_size = btrfs_item_size_nr(eb, slot);
5333         u64 refs = 0;
5334         u64 offset;
5335         u64 num_bytes;
5336         int metadata = 0;
5337
5338         btrfs_item_key_to_cpu(eb, &key, slot);
5339
5340         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5341                 metadata = 1;
5342                 num_bytes = root->nodesize;
5343         } else {
5344                 num_bytes = key.offset;
5345         }
5346
5347         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5348                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5349                       key.objectid, root->sectorsize);
5350                 return -EIO;
5351         }
5352         if (item_size < sizeof(*ei)) {
5353 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5354                 struct btrfs_extent_item_v0 *ei0;
5355                 BUG_ON(item_size != sizeof(*ei0));
5356                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5357                 refs = btrfs_extent_refs_v0(eb, ei0);
5358 #else
5359                 BUG();
5360 #endif
5361                 memset(&tmpl, 0, sizeof(tmpl));
5362                 tmpl.start = key.objectid;
5363                 tmpl.nr = num_bytes;
5364                 tmpl.extent_item_refs = refs;
5365                 tmpl.metadata = metadata;
5366                 tmpl.found_rec = 1;
5367                 tmpl.max_size = num_bytes;
5368
5369                 return add_extent_rec(extent_cache, &tmpl);
5370         }
5371
5372         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5373         refs = btrfs_extent_refs(eb, ei);
5374         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5375                 metadata = 1;
5376         else
5377                 metadata = 0;
5378         if (metadata && num_bytes != root->nodesize) {
5379                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5380                       num_bytes, root->nodesize);
5381                 return -EIO;
5382         }
5383         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5384                 error("ignore invalid data extent, length %llu is not aligned to %u",
5385                       num_bytes, root->sectorsize);
5386                 return -EIO;
5387         }
5388
5389         memset(&tmpl, 0, sizeof(tmpl));
5390         tmpl.start = key.objectid;
5391         tmpl.nr = num_bytes;
5392         tmpl.extent_item_refs = refs;
5393         tmpl.metadata = metadata;
5394         tmpl.found_rec = 1;
5395         tmpl.max_size = num_bytes;
5396         add_extent_rec(extent_cache, &tmpl);
5397
5398         ptr = (unsigned long)(ei + 1);
5399         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5400             key.type == BTRFS_EXTENT_ITEM_KEY)
5401                 ptr += sizeof(struct btrfs_tree_block_info);
5402
5403         end = (unsigned long)ei + item_size;
5404         while (ptr < end) {
5405                 iref = (struct btrfs_extent_inline_ref *)ptr;
5406                 type = btrfs_extent_inline_ref_type(eb, iref);
5407                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5408                 switch (type) {
5409                 case BTRFS_TREE_BLOCK_REF_KEY:
5410                         ret = add_tree_backref(extent_cache, key.objectid,
5411                                         0, offset, 0);
5412                         if (ret < 0)
5413                                 error("add_tree_backref failed: %s",
5414                                       strerror(-ret));
5415                         break;
5416                 case BTRFS_SHARED_BLOCK_REF_KEY:
5417                         ret = add_tree_backref(extent_cache, key.objectid,
5418                                         offset, 0, 0);
5419                         if (ret < 0)
5420                                 error("add_tree_backref failed: %s",
5421                                       strerror(-ret));
5422                         break;
5423                 case BTRFS_EXTENT_DATA_REF_KEY:
5424                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5425                         add_data_backref(extent_cache, key.objectid, 0,
5426                                         btrfs_extent_data_ref_root(eb, dref),
5427                                         btrfs_extent_data_ref_objectid(eb,
5428                                                                        dref),
5429                                         btrfs_extent_data_ref_offset(eb, dref),
5430                                         btrfs_extent_data_ref_count(eb, dref),
5431                                         0, num_bytes);
5432                         break;
5433                 case BTRFS_SHARED_DATA_REF_KEY:
5434                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5435                         add_data_backref(extent_cache, key.objectid, offset,
5436                                         0, 0, 0,
5437                                         btrfs_shared_data_ref_count(eb, sref),
5438                                         0, num_bytes);
5439                         break;
5440                 default:
5441                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5442                                 key.objectid, key.type, num_bytes);
5443                         goto out;
5444                 }
5445                 ptr += btrfs_extent_inline_ref_size(type);
5446         }
5447         WARN_ON(ptr > end);
5448 out:
5449         return 0;
5450 }
5451
5452 static int check_cache_range(struct btrfs_root *root,
5453                              struct btrfs_block_group_cache *cache,
5454                              u64 offset, u64 bytes)
5455 {
5456         struct btrfs_free_space *entry;
5457         u64 *logical;
5458         u64 bytenr;
5459         int stripe_len;
5460         int i, nr, ret;
5461
5462         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5463                 bytenr = btrfs_sb_offset(i);
5464                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5465                                        cache->key.objectid, bytenr, 0,
5466                                        &logical, &nr, &stripe_len);
5467                 if (ret)
5468                         return ret;
5469
5470                 while (nr--) {
5471                         if (logical[nr] + stripe_len <= offset)
5472                                 continue;
5473                         if (offset + bytes <= logical[nr])
5474                                 continue;
5475                         if (logical[nr] == offset) {
5476                                 if (stripe_len >= bytes) {
5477                                         kfree(logical);
5478                                         return 0;
5479                                 }
5480                                 bytes -= stripe_len;
5481                                 offset += stripe_len;
5482                         } else if (logical[nr] < offset) {
5483                                 if (logical[nr] + stripe_len >=
5484                                     offset + bytes) {
5485                                         kfree(logical);
5486                                         return 0;
5487                                 }
5488                                 bytes = (offset + bytes) -
5489                                         (logical[nr] + stripe_len);
5490                                 offset = logical[nr] + stripe_len;
5491                         } else {
5492                                 /*
5493                                  * Could be tricky, the super may land in the
5494                                  * middle of the area we're checking.  First
5495                                  * check the easiest case, it's at the end.
5496                                  */
5497                                 if (logical[nr] + stripe_len >=
5498                                     bytes + offset) {
5499                                         bytes = logical[nr] - offset;
5500                                         continue;
5501                                 }
5502
5503                                 /* Check the left side */
5504                                 ret = check_cache_range(root, cache,
5505                                                         offset,
5506                                                         logical[nr] - offset);
5507                                 if (ret) {
5508                                         kfree(logical);
5509                                         return ret;
5510                                 }
5511
5512                                 /* Now we continue with the right side */
5513                                 bytes = (offset + bytes) -
5514                                         (logical[nr] + stripe_len);
5515                                 offset = logical[nr] + stripe_len;
5516                         }
5517                 }
5518
5519                 kfree(logical);
5520         }
5521
5522         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5523         if (!entry) {
5524                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5525                         offset, offset+bytes);
5526                 return -EINVAL;
5527         }
5528
5529         if (entry->offset != offset) {
5530                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5531                         entry->offset);
5532                 return -EINVAL;
5533         }
5534
5535         if (entry->bytes != bytes) {
5536                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5537                         bytes, entry->bytes, offset);
5538                 return -EINVAL;
5539         }
5540
5541         unlink_free_space(cache->free_space_ctl, entry);
5542         free(entry);
5543         return 0;
5544 }
5545
5546 static int verify_space_cache(struct btrfs_root *root,
5547                               struct btrfs_block_group_cache *cache)
5548 {
5549         struct btrfs_path *path;
5550         struct extent_buffer *leaf;
5551         struct btrfs_key key;
5552         u64 last;
5553         int ret = 0;
5554
5555         path = btrfs_alloc_path();
5556         if (!path)
5557                 return -ENOMEM;
5558
5559         root = root->fs_info->extent_root;
5560
5561         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5562
5563         key.objectid = last;
5564         key.offset = 0;
5565         key.type = BTRFS_EXTENT_ITEM_KEY;
5566
5567         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5568         if (ret < 0)
5569                 goto out;
5570         ret = 0;
5571         while (1) {
5572                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5573                         ret = btrfs_next_leaf(root, path);
5574                         if (ret < 0)
5575                                 goto out;
5576                         if (ret > 0) {
5577                                 ret = 0;
5578                                 break;
5579                         }
5580                 }
5581                 leaf = path->nodes[0];
5582                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5583                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5584                         break;
5585                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5586                     key.type != BTRFS_METADATA_ITEM_KEY) {
5587                         path->slots[0]++;
5588                         continue;
5589                 }
5590
5591                 if (last == key.objectid) {
5592                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5593                                 last = key.objectid + key.offset;
5594                         else
5595                                 last = key.objectid + root->nodesize;
5596                         path->slots[0]++;
5597                         continue;
5598                 }
5599
5600                 ret = check_cache_range(root, cache, last,
5601                                         key.objectid - last);
5602                 if (ret)
5603                         break;
5604                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5605                         last = key.objectid + key.offset;
5606                 else
5607                         last = key.objectid + root->nodesize;
5608                 path->slots[0]++;
5609         }
5610
5611         if (last < cache->key.objectid + cache->key.offset)
5612                 ret = check_cache_range(root, cache, last,
5613                                         cache->key.objectid +
5614                                         cache->key.offset - last);
5615
5616 out:
5617         btrfs_free_path(path);
5618
5619         if (!ret &&
5620             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5621                 fprintf(stderr, "There are still entries left in the space "
5622                         "cache\n");
5623                 ret = -EINVAL;
5624         }
5625
5626         return ret;
5627 }
5628
5629 static int check_space_cache(struct btrfs_root *root)
5630 {
5631         struct btrfs_block_group_cache *cache;
5632         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5633         int ret;
5634         int error = 0;
5635
5636         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5637             btrfs_super_generation(root->fs_info->super_copy) !=
5638             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5639                 printf("cache and super generation don't match, space cache "
5640                        "will be invalidated\n");
5641                 return 0;
5642         }
5643
5644         if (ctx.progress_enabled) {
5645                 ctx.tp = TASK_FREE_SPACE;
5646                 task_start(ctx.info);
5647         }
5648
5649         while (1) {
5650                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5651                 if (!cache)
5652                         break;
5653
5654                 start = cache->key.objectid + cache->key.offset;
5655                 if (!cache->free_space_ctl) {
5656                         if (btrfs_init_free_space_ctl(cache,
5657                                                       root->sectorsize)) {
5658                                 ret = -ENOMEM;
5659                                 break;
5660                         }
5661                 } else {
5662                         btrfs_remove_free_space_cache(cache);
5663                 }
5664
5665                 if (btrfs_fs_compat_ro(root->fs_info,
5666                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5667                         ret = exclude_super_stripes(root, cache);
5668                         if (ret) {
5669                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5670                                         strerror(-ret));
5671                                 error++;
5672                                 continue;
5673                         }
5674                         ret = load_free_space_tree(root->fs_info, cache);
5675                         free_excluded_extents(root, cache);
5676                         if (ret < 0) {
5677                                 fprintf(stderr, "could not load free space tree: %s\n",
5678                                         strerror(-ret));
5679                                 error++;
5680                                 continue;
5681                         }
5682                         error += ret;
5683                 } else {
5684                         ret = load_free_space_cache(root->fs_info, cache);
5685                         if (!ret)
5686                                 continue;
5687                 }
5688
5689                 ret = verify_space_cache(root, cache);
5690                 if (ret) {
5691                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5692                                 cache->key.objectid);
5693                         error++;
5694                 }
5695         }
5696
5697         task_stop(ctx.info);
5698
5699         return error ? -EINVAL : 0;
5700 }
5701
5702 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5703                         u64 num_bytes, unsigned long leaf_offset,
5704                         struct extent_buffer *eb) {
5705
5706         u64 offset = 0;
5707         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5708         char *data;
5709         unsigned long csum_offset;
5710         u32 csum;
5711         u32 csum_expected;
5712         u64 read_len;
5713         u64 data_checked = 0;
5714         u64 tmp;
5715         int ret = 0;
5716         int mirror;
5717         int num_copies;
5718
5719         if (num_bytes % root->sectorsize)
5720                 return -EINVAL;
5721
5722         data = malloc(num_bytes);
5723         if (!data)
5724                 return -ENOMEM;
5725
5726         while (offset < num_bytes) {
5727                 mirror = 0;
5728 again:
5729                 read_len = num_bytes - offset;
5730                 /* read as much space once a time */
5731                 ret = read_extent_data(root, data + offset,
5732                                 bytenr + offset, &read_len, mirror);
5733                 if (ret)
5734                         goto out;
5735                 data_checked = 0;
5736                 /* verify every 4k data's checksum */
5737                 while (data_checked < read_len) {
5738                         csum = ~(u32)0;
5739                         tmp = offset + data_checked;
5740
5741                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5742                                                csum, root->sectorsize);
5743                         btrfs_csum_final(csum, (char *)&csum);
5744
5745                         csum_offset = leaf_offset +
5746                                  tmp / root->sectorsize * csum_size;
5747                         read_extent_buffer(eb, (char *)&csum_expected,
5748                                            csum_offset, csum_size);
5749                         /* try another mirror */
5750                         if (csum != csum_expected) {
5751                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5752                                                 mirror, bytenr + tmp,
5753                                                 csum, csum_expected);
5754                                 num_copies = btrfs_num_copies(
5755                                                 &root->fs_info->mapping_tree,
5756                                                 bytenr, num_bytes);
5757                                 if (mirror < num_copies - 1) {
5758                                         mirror += 1;
5759                                         goto again;
5760                                 }
5761                         }
5762                         data_checked += root->sectorsize;
5763                 }
5764                 offset += read_len;
5765         }
5766 out:
5767         free(data);
5768         return ret;
5769 }
5770
5771 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5772                                u64 num_bytes)
5773 {
5774         struct btrfs_path *path;
5775         struct extent_buffer *leaf;
5776         struct btrfs_key key;
5777         int ret;
5778
5779         path = btrfs_alloc_path();
5780         if (!path) {
5781                 fprintf(stderr, "Error allocating path\n");
5782                 return -ENOMEM;
5783         }
5784
5785         key.objectid = bytenr;
5786         key.type = BTRFS_EXTENT_ITEM_KEY;
5787         key.offset = (u64)-1;
5788
5789 again:
5790         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5791                                 0, 0);
5792         if (ret < 0) {
5793                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5794                 btrfs_free_path(path);
5795                 return ret;
5796         } else if (ret) {
5797                 if (path->slots[0] > 0) {
5798                         path->slots[0]--;
5799                 } else {
5800                         ret = btrfs_prev_leaf(root, path);
5801                         if (ret < 0) {
5802                                 goto out;
5803                         } else if (ret > 0) {
5804                                 ret = 0;
5805                                 goto out;
5806                         }
5807                 }
5808         }
5809
5810         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5811
5812         /*
5813          * Block group items come before extent items if they have the same
5814          * bytenr, so walk back one more just in case.  Dear future traveller,
5815          * first congrats on mastering time travel.  Now if it's not too much
5816          * trouble could you go back to 2006 and tell Chris to make the
5817          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5818          * EXTENT_ITEM_KEY please?
5819          */
5820         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5821                 if (path->slots[0] > 0) {
5822                         path->slots[0]--;
5823                 } else {
5824                         ret = btrfs_prev_leaf(root, path);
5825                         if (ret < 0) {
5826                                 goto out;
5827                         } else if (ret > 0) {
5828                                 ret = 0;
5829                                 goto out;
5830                         }
5831                 }
5832                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5833         }
5834
5835         while (num_bytes) {
5836                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5837                         ret = btrfs_next_leaf(root, path);
5838                         if (ret < 0) {
5839                                 fprintf(stderr, "Error going to next leaf "
5840                                         "%d\n", ret);
5841                                 btrfs_free_path(path);
5842                                 return ret;
5843                         } else if (ret) {
5844                                 break;
5845                         }
5846                 }
5847                 leaf = path->nodes[0];
5848                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5849                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5850                         path->slots[0]++;
5851                         continue;
5852                 }
5853                 if (key.objectid + key.offset < bytenr) {
5854                         path->slots[0]++;
5855                         continue;
5856                 }
5857                 if (key.objectid > bytenr + num_bytes)
5858                         break;
5859
5860                 if (key.objectid == bytenr) {
5861                         if (key.offset >= num_bytes) {
5862                                 num_bytes = 0;
5863                                 break;
5864                         }
5865                         num_bytes -= key.offset;
5866                         bytenr += key.offset;
5867                 } else if (key.objectid < bytenr) {
5868                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5869                                 num_bytes = 0;
5870                                 break;
5871                         }
5872                         num_bytes = (bytenr + num_bytes) -
5873                                 (key.objectid + key.offset);
5874                         bytenr = key.objectid + key.offset;
5875                 } else {
5876                         if (key.objectid + key.offset < bytenr + num_bytes) {
5877                                 u64 new_start = key.objectid + key.offset;
5878                                 u64 new_bytes = bytenr + num_bytes - new_start;
5879
5880                                 /*
5881                                  * Weird case, the extent is in the middle of
5882                                  * our range, we'll have to search one side
5883                                  * and then the other.  Not sure if this happens
5884                                  * in real life, but no harm in coding it up
5885                                  * anyway just in case.
5886                                  */
5887                                 btrfs_release_path(path);
5888                                 ret = check_extent_exists(root, new_start,
5889                                                           new_bytes);
5890                                 if (ret) {
5891                                         fprintf(stderr, "Right section didn't "
5892                                                 "have a record\n");
5893                                         break;
5894                                 }
5895                                 num_bytes = key.objectid - bytenr;
5896                                 goto again;
5897                         }
5898                         num_bytes = key.objectid - bytenr;
5899                 }
5900                 path->slots[0]++;
5901         }
5902         ret = 0;
5903
5904 out:
5905         if (num_bytes && !ret) {
5906                 fprintf(stderr, "There are no extents for csum range "
5907                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5908                 ret = 1;
5909         }
5910
5911         btrfs_free_path(path);
5912         return ret;
5913 }
5914
5915 static int check_csums(struct btrfs_root *root)
5916 {
5917         struct btrfs_path *path;
5918         struct extent_buffer *leaf;
5919         struct btrfs_key key;
5920         u64 offset = 0, num_bytes = 0;
5921         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5922         int errors = 0;
5923         int ret;
5924         u64 data_len;
5925         unsigned long leaf_offset;
5926
5927         root = root->fs_info->csum_root;
5928         if (!extent_buffer_uptodate(root->node)) {
5929                 fprintf(stderr, "No valid csum tree found\n");
5930                 return -ENOENT;
5931         }
5932
5933         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5934         key.type = BTRFS_EXTENT_CSUM_KEY;
5935         key.offset = 0;
5936
5937         path = btrfs_alloc_path();
5938         if (!path)
5939                 return -ENOMEM;
5940
5941         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5942         if (ret < 0) {
5943                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5944                 btrfs_free_path(path);
5945                 return ret;
5946         }
5947
5948         if (ret > 0 && path->slots[0])
5949                 path->slots[0]--;
5950         ret = 0;
5951
5952         while (1) {
5953                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5954                         ret = btrfs_next_leaf(root, path);
5955                         if (ret < 0) {
5956                                 fprintf(stderr, "Error going to next leaf "
5957                                         "%d\n", ret);
5958                                 break;
5959                         }
5960                         if (ret)
5961                                 break;
5962                 }
5963                 leaf = path->nodes[0];
5964
5965                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5966                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5967                         path->slots[0]++;
5968                         continue;
5969                 }
5970
5971                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5972                               csum_size) * root->sectorsize;
5973                 if (!check_data_csum)
5974                         goto skip_csum_check;
5975                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5976                 ret = check_extent_csums(root, key.offset, data_len,
5977                                          leaf_offset, leaf);
5978                 if (ret)
5979                         break;
5980 skip_csum_check:
5981                 if (!num_bytes) {
5982                         offset = key.offset;
5983                 } else if (key.offset != offset + num_bytes) {
5984                         ret = check_extent_exists(root, offset, num_bytes);
5985                         if (ret) {
5986                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5987                                         "there is no extent record\n",
5988                                         offset, offset+num_bytes);
5989                                 errors++;
5990                         }
5991                         offset = key.offset;
5992                         num_bytes = 0;
5993                 }
5994                 num_bytes += data_len;
5995                 path->slots[0]++;
5996         }
5997
5998         btrfs_free_path(path);
5999         return errors;
6000 }
6001
6002 static int is_dropped_key(struct btrfs_key *key,
6003                           struct btrfs_key *drop_key) {
6004         if (key->objectid < drop_key->objectid)
6005                 return 1;
6006         else if (key->objectid == drop_key->objectid) {
6007                 if (key->type < drop_key->type)
6008                         return 1;
6009                 else if (key->type == drop_key->type) {
6010                         if (key->offset < drop_key->offset)
6011                                 return 1;
6012                 }
6013         }
6014         return 0;
6015 }
6016
6017 /*
6018  * Here are the rules for FULL_BACKREF.
6019  *
6020  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6021  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6022  *      FULL_BACKREF set.
6023  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6024  *    if it happened after the relocation occurred since we'll have dropped the
6025  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6026  *    have no real way to know for sure.
6027  *
6028  * We process the blocks one root at a time, and we start from the lowest root
6029  * objectid and go to the highest.  So we can just lookup the owner backref for
6030  * the record and if we don't find it then we know it doesn't exist and we have
6031  * a FULL BACKREF.
6032  *
6033  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6034  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6035  * be set or not and then we can check later once we've gathered all the refs.
6036  */
6037 static int calc_extent_flag(struct btrfs_root *root,
6038                            struct cache_tree *extent_cache,
6039                            struct extent_buffer *buf,
6040                            struct root_item_record *ri,
6041                            u64 *flags)
6042 {
6043         struct extent_record *rec;
6044         struct cache_extent *cache;
6045         struct tree_backref *tback;
6046         u64 owner = 0;
6047
6048         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6049         /* we have added this extent before */
6050         BUG_ON(!cache);
6051         rec = container_of(cache, struct extent_record, cache);
6052
6053         /*
6054          * Except file/reloc tree, we can not have
6055          * FULL BACKREF MODE
6056          */
6057         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6058                 goto normal;
6059         /*
6060          * root node
6061          */
6062         if (buf->start == ri->bytenr)
6063                 goto normal;
6064
6065         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6066                 goto full_backref;
6067
6068         owner = btrfs_header_owner(buf);
6069         if (owner == ri->objectid)
6070                 goto normal;
6071
6072         tback = find_tree_backref(rec, 0, owner);
6073         if (!tback)
6074                 goto full_backref;
6075 normal:
6076         *flags = 0;
6077         if (rec->flag_block_full_backref != FLAG_UNSET &&
6078             rec->flag_block_full_backref != 0)
6079                 rec->bad_full_backref = 1;
6080         return 0;
6081 full_backref:
6082         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6083         if (rec->flag_block_full_backref != FLAG_UNSET &&
6084             rec->flag_block_full_backref != 1)
6085                 rec->bad_full_backref = 1;
6086         return 0;
6087 }
6088
6089 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6090 {
6091         fprintf(stderr, "Invalid key type(");
6092         print_key_type(stderr, 0, key_type);
6093         fprintf(stderr, ") found in root(");
6094         print_objectid(stderr, rootid, 0);
6095         fprintf(stderr, ")\n");
6096 }
6097
6098 /*
6099  * Check if the key is valid with its extent buffer.
6100  *
6101  * This is a early check in case invalid key exists in a extent buffer
6102  * This is not comprehensive yet, but should prevent wrong key/item passed
6103  * further
6104  */
6105 static int check_type_with_root(u64 rootid, u8 key_type)
6106 {
6107         switch (key_type) {
6108         /* Only valid in chunk tree */
6109         case BTRFS_DEV_ITEM_KEY:
6110         case BTRFS_CHUNK_ITEM_KEY:
6111                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6112                         goto err;
6113                 break;
6114         /* valid in csum and log tree */
6115         case BTRFS_CSUM_TREE_OBJECTID:
6116                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6117                       is_fstree(rootid)))
6118                         goto err;
6119                 break;
6120         case BTRFS_EXTENT_ITEM_KEY:
6121         case BTRFS_METADATA_ITEM_KEY:
6122         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6123                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6124                         goto err;
6125                 break;
6126         case BTRFS_ROOT_ITEM_KEY:
6127                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6128                         goto err;
6129                 break;
6130         case BTRFS_DEV_EXTENT_KEY:
6131                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6132                         goto err;
6133                 break;
6134         }
6135         return 0;
6136 err:
6137         report_mismatch_key_root(key_type, rootid);
6138         return -EINVAL;
6139 }
6140
6141 static int run_next_block(struct btrfs_root *root,
6142                           struct block_info *bits,
6143                           int bits_nr,
6144                           u64 *last,
6145                           struct cache_tree *pending,
6146                           struct cache_tree *seen,
6147                           struct cache_tree *reada,
6148                           struct cache_tree *nodes,
6149                           struct cache_tree *extent_cache,
6150                           struct cache_tree *chunk_cache,
6151                           struct rb_root *dev_cache,
6152                           struct block_group_tree *block_group_cache,
6153                           struct device_extent_tree *dev_extent_cache,
6154                           struct root_item_record *ri)
6155 {
6156         struct extent_buffer *buf;
6157         struct extent_record *rec = NULL;
6158         u64 bytenr;
6159         u32 size;
6160         u64 parent;
6161         u64 owner;
6162         u64 flags;
6163         u64 ptr;
6164         u64 gen = 0;
6165         int ret = 0;
6166         int i;
6167         int nritems;
6168         struct btrfs_key key;
6169         struct cache_extent *cache;
6170         int reada_bits;
6171
6172         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6173                                     bits_nr, &reada_bits);
6174         if (nritems == 0)
6175                 return 1;
6176
6177         if (!reada_bits) {
6178                 for(i = 0; i < nritems; i++) {
6179                         ret = add_cache_extent(reada, bits[i].start,
6180                                                bits[i].size);
6181                         if (ret == -EEXIST)
6182                                 continue;
6183
6184                         /* fixme, get the parent transid */
6185                         readahead_tree_block(root, bits[i].start,
6186                                              bits[i].size, 0);
6187                 }
6188         }
6189         *last = bits[0].start;
6190         bytenr = bits[0].start;
6191         size = bits[0].size;
6192
6193         cache = lookup_cache_extent(pending, bytenr, size);
6194         if (cache) {
6195                 remove_cache_extent(pending, cache);
6196                 free(cache);
6197         }
6198         cache = lookup_cache_extent(reada, bytenr, size);
6199         if (cache) {
6200                 remove_cache_extent(reada, cache);
6201                 free(cache);
6202         }
6203         cache = lookup_cache_extent(nodes, bytenr, size);
6204         if (cache) {
6205                 remove_cache_extent(nodes, cache);
6206                 free(cache);
6207         }
6208         cache = lookup_cache_extent(extent_cache, bytenr, size);
6209         if (cache) {
6210                 rec = container_of(cache, struct extent_record, cache);
6211                 gen = rec->parent_generation;
6212         }
6213
6214         /* fixme, get the real parent transid */
6215         buf = read_tree_block(root, bytenr, size, gen);
6216         if (!extent_buffer_uptodate(buf)) {
6217                 record_bad_block_io(root->fs_info,
6218                                     extent_cache, bytenr, size);
6219                 goto out;
6220         }
6221
6222         nritems = btrfs_header_nritems(buf);
6223
6224         flags = 0;
6225         if (!init_extent_tree) {
6226                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6227                                        btrfs_header_level(buf), 1, NULL,
6228                                        &flags);
6229                 if (ret < 0) {
6230                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6231                         if (ret < 0) {
6232                                 fprintf(stderr, "Couldn't calc extent flags\n");
6233                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6234                         }
6235                 }
6236         } else {
6237                 flags = 0;
6238                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6239                 if (ret < 0) {
6240                         fprintf(stderr, "Couldn't calc extent flags\n");
6241                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6242                 }
6243         }
6244
6245         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6246                 if (ri != NULL &&
6247                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6248                     ri->objectid == btrfs_header_owner(buf)) {
6249                         /*
6250                          * Ok we got to this block from it's original owner and
6251                          * we have FULL_BACKREF set.  Relocation can leave
6252                          * converted blocks over so this is altogether possible,
6253                          * however it's not possible if the generation > the
6254                          * last snapshot, so check for this case.
6255                          */
6256                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6257                             btrfs_header_generation(buf) > ri->last_snapshot) {
6258                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6259                                 rec->bad_full_backref = 1;
6260                         }
6261                 }
6262         } else {
6263                 if (ri != NULL &&
6264                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6265                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6266                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6267                         rec->bad_full_backref = 1;
6268                 }
6269         }
6270
6271         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6272                 rec->flag_block_full_backref = 1;
6273                 parent = bytenr;
6274                 owner = 0;
6275         } else {
6276                 rec->flag_block_full_backref = 0;
6277                 parent = 0;
6278                 owner = btrfs_header_owner(buf);
6279         }
6280
6281         ret = check_block(root, extent_cache, buf, flags);
6282         if (ret)
6283                 goto out;
6284
6285         if (btrfs_is_leaf(buf)) {
6286                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6287                 for (i = 0; i < nritems; i++) {
6288                         struct btrfs_file_extent_item *fi;
6289                         btrfs_item_key_to_cpu(buf, &key, i);
6290                         /*
6291                          * Check key type against the leaf owner.
6292                          * Could filter quite a lot of early error if
6293                          * owner is correct
6294                          */
6295                         if (check_type_with_root(btrfs_header_owner(buf),
6296                                                  key.type)) {
6297                                 fprintf(stderr, "ignoring invalid key\n");
6298                                 continue;
6299                         }
6300                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6301                                 process_extent_item(root, extent_cache, buf,
6302                                                     i);
6303                                 continue;
6304                         }
6305                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6306                                 process_extent_item(root, extent_cache, buf,
6307                                                     i);
6308                                 continue;
6309                         }
6310                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6311                                 total_csum_bytes +=
6312                                         btrfs_item_size_nr(buf, i);
6313                                 continue;
6314                         }
6315                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6316                                 process_chunk_item(chunk_cache, &key, buf, i);
6317                                 continue;
6318                         }
6319                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6320                                 process_device_item(dev_cache, &key, buf, i);
6321                                 continue;
6322                         }
6323                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6324                                 process_block_group_item(block_group_cache,
6325                                         &key, buf, i);
6326                                 continue;
6327                         }
6328                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6329                                 process_device_extent_item(dev_extent_cache,
6330                                         &key, buf, i);
6331                                 continue;
6332
6333                         }
6334                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6335 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6336                                 process_extent_ref_v0(extent_cache, buf, i);
6337 #else
6338                                 BUG();
6339 #endif
6340                                 continue;
6341                         }
6342
6343                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6344                                 ret = add_tree_backref(extent_cache,
6345                                                 key.objectid, 0, key.offset, 0);
6346                                 if (ret < 0)
6347                                         error("add_tree_backref failed: %s",
6348                                               strerror(-ret));
6349                                 continue;
6350                         }
6351                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6352                                 ret = add_tree_backref(extent_cache,
6353                                                 key.objectid, key.offset, 0, 0);
6354                                 if (ret < 0)
6355                                         error("add_tree_backref failed: %s",
6356                                               strerror(-ret));
6357                                 continue;
6358                         }
6359                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6360                                 struct btrfs_extent_data_ref *ref;
6361                                 ref = btrfs_item_ptr(buf, i,
6362                                                 struct btrfs_extent_data_ref);
6363                                 add_data_backref(extent_cache,
6364                                         key.objectid, 0,
6365                                         btrfs_extent_data_ref_root(buf, ref),
6366                                         btrfs_extent_data_ref_objectid(buf,
6367                                                                        ref),
6368                                         btrfs_extent_data_ref_offset(buf, ref),
6369                                         btrfs_extent_data_ref_count(buf, ref),
6370                                         0, root->sectorsize);
6371                                 continue;
6372                         }
6373                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6374                                 struct btrfs_shared_data_ref *ref;
6375                                 ref = btrfs_item_ptr(buf, i,
6376                                                 struct btrfs_shared_data_ref);
6377                                 add_data_backref(extent_cache,
6378                                         key.objectid, key.offset, 0, 0, 0,
6379                                         btrfs_shared_data_ref_count(buf, ref),
6380                                         0, root->sectorsize);
6381                                 continue;
6382                         }
6383                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6384                                 struct bad_item *bad;
6385
6386                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6387                                         continue;
6388                                 if (!owner)
6389                                         continue;
6390                                 bad = malloc(sizeof(struct bad_item));
6391                                 if (!bad)
6392                                         continue;
6393                                 INIT_LIST_HEAD(&bad->list);
6394                                 memcpy(&bad->key, &key,
6395                                        sizeof(struct btrfs_key));
6396                                 bad->root_id = owner;
6397                                 list_add_tail(&bad->list, &delete_items);
6398                                 continue;
6399                         }
6400                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6401                                 continue;
6402                         fi = btrfs_item_ptr(buf, i,
6403                                             struct btrfs_file_extent_item);
6404                         if (btrfs_file_extent_type(buf, fi) ==
6405                             BTRFS_FILE_EXTENT_INLINE)
6406                                 continue;
6407                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6408                                 continue;
6409
6410                         data_bytes_allocated +=
6411                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6412                         if (data_bytes_allocated < root->sectorsize) {
6413                                 abort();
6414                         }
6415                         data_bytes_referenced +=
6416                                 btrfs_file_extent_num_bytes(buf, fi);
6417                         add_data_backref(extent_cache,
6418                                 btrfs_file_extent_disk_bytenr(buf, fi),
6419                                 parent, owner, key.objectid, key.offset -
6420                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6421                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6422                 }
6423         } else {
6424                 int level;
6425                 struct btrfs_key first_key;
6426
6427                 first_key.objectid = 0;
6428
6429                 if (nritems > 0)
6430                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6431                 level = btrfs_header_level(buf);
6432                 for (i = 0; i < nritems; i++) {
6433                         struct extent_record tmpl;
6434
6435                         ptr = btrfs_node_blockptr(buf, i);
6436                         size = root->nodesize;
6437                         btrfs_node_key_to_cpu(buf, &key, i);
6438                         if (ri != NULL) {
6439                                 if ((level == ri->drop_level)
6440                                     && is_dropped_key(&key, &ri->drop_key)) {
6441                                         continue;
6442                                 }
6443                         }
6444
6445                         memset(&tmpl, 0, sizeof(tmpl));
6446                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6447                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6448                         tmpl.start = ptr;
6449                         tmpl.nr = size;
6450                         tmpl.refs = 1;
6451                         tmpl.metadata = 1;
6452                         tmpl.max_size = size;
6453                         ret = add_extent_rec(extent_cache, &tmpl);
6454                         if (ret < 0)
6455                                 goto out;
6456
6457                         ret = add_tree_backref(extent_cache, ptr, parent,
6458                                         owner, 1);
6459                         if (ret < 0) {
6460                                 error("add_tree_backref failed: %s",
6461                                       strerror(-ret));
6462                                 continue;
6463                         }
6464
6465                         if (level > 1) {
6466                                 add_pending(nodes, seen, ptr, size);
6467                         } else {
6468                                 add_pending(pending, seen, ptr, size);
6469                         }
6470                 }
6471                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6472                                       nritems) * sizeof(struct btrfs_key_ptr);
6473         }
6474         total_btree_bytes += buf->len;
6475         if (fs_root_objectid(btrfs_header_owner(buf)))
6476                 total_fs_tree_bytes += buf->len;
6477         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6478                 total_extent_tree_bytes += buf->len;
6479         if (!found_old_backref &&
6480             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6481             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6482             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6483                 found_old_backref = 1;
6484 out:
6485         free_extent_buffer(buf);
6486         return ret;
6487 }
6488
6489 static int add_root_to_pending(struct extent_buffer *buf,
6490                                struct cache_tree *extent_cache,
6491                                struct cache_tree *pending,
6492                                struct cache_tree *seen,
6493                                struct cache_tree *nodes,
6494                                u64 objectid)
6495 {
6496         struct extent_record tmpl;
6497         int ret;
6498
6499         if (btrfs_header_level(buf) > 0)
6500                 add_pending(nodes, seen, buf->start, buf->len);
6501         else
6502                 add_pending(pending, seen, buf->start, buf->len);
6503
6504         memset(&tmpl, 0, sizeof(tmpl));
6505         tmpl.start = buf->start;
6506         tmpl.nr = buf->len;
6507         tmpl.is_root = 1;
6508         tmpl.refs = 1;
6509         tmpl.metadata = 1;
6510         tmpl.max_size = buf->len;
6511         add_extent_rec(extent_cache, &tmpl);
6512
6513         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6514             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6515                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6516                                 0, 1);
6517         else
6518                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6519                                 1);
6520         return ret;
6521 }
6522
6523 /* as we fix the tree, we might be deleting blocks that
6524  * we're tracking for repair.  This hook makes sure we
6525  * remove any backrefs for blocks as we are fixing them.
6526  */
6527 static int free_extent_hook(struct btrfs_trans_handle *trans,
6528                             struct btrfs_root *root,
6529                             u64 bytenr, u64 num_bytes, u64 parent,
6530                             u64 root_objectid, u64 owner, u64 offset,
6531                             int refs_to_drop)
6532 {
6533         struct extent_record *rec;
6534         struct cache_extent *cache;
6535         int is_data;
6536         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6537
6538         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6539         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6540         if (!cache)
6541                 return 0;
6542
6543         rec = container_of(cache, struct extent_record, cache);
6544         if (is_data) {
6545                 struct data_backref *back;
6546                 back = find_data_backref(rec, parent, root_objectid, owner,
6547                                          offset, 1, bytenr, num_bytes);
6548                 if (!back)
6549                         goto out;
6550                 if (back->node.found_ref) {
6551                         back->found_ref -= refs_to_drop;
6552                         if (rec->refs)
6553                                 rec->refs -= refs_to_drop;
6554                 }
6555                 if (back->node.found_extent_tree) {
6556                         back->num_refs -= refs_to_drop;
6557                         if (rec->extent_item_refs)
6558                                 rec->extent_item_refs -= refs_to_drop;
6559                 }
6560                 if (back->found_ref == 0)
6561                         back->node.found_ref = 0;
6562                 if (back->num_refs == 0)
6563                         back->node.found_extent_tree = 0;
6564
6565                 if (!back->node.found_extent_tree && back->node.found_ref) {
6566                         list_del(&back->node.list);
6567                         free(back);
6568                 }
6569         } else {
6570                 struct tree_backref *back;
6571                 back = find_tree_backref(rec, parent, root_objectid);
6572                 if (!back)
6573                         goto out;
6574                 if (back->node.found_ref) {
6575                         if (rec->refs)
6576                                 rec->refs--;
6577                         back->node.found_ref = 0;
6578                 }
6579                 if (back->node.found_extent_tree) {
6580                         if (rec->extent_item_refs)
6581                                 rec->extent_item_refs--;
6582                         back->node.found_extent_tree = 0;
6583                 }
6584                 if (!back->node.found_extent_tree && back->node.found_ref) {
6585                         list_del(&back->node.list);
6586                         free(back);
6587                 }
6588         }
6589         maybe_free_extent_rec(extent_cache, rec);
6590 out:
6591         return 0;
6592 }
6593
6594 static int delete_extent_records(struct btrfs_trans_handle *trans,
6595                                  struct btrfs_root *root,
6596                                  struct btrfs_path *path,
6597                                  u64 bytenr, u64 new_len)
6598 {
6599         struct btrfs_key key;
6600         struct btrfs_key found_key;
6601         struct extent_buffer *leaf;
6602         int ret;
6603         int slot;
6604
6605
6606         key.objectid = bytenr;
6607         key.type = (u8)-1;
6608         key.offset = (u64)-1;
6609
6610         while(1) {
6611                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6612                                         &key, path, 0, 1);
6613                 if (ret < 0)
6614                         break;
6615
6616                 if (ret > 0) {
6617                         ret = 0;
6618                         if (path->slots[0] == 0)
6619                                 break;
6620                         path->slots[0]--;
6621                 }
6622                 ret = 0;
6623
6624                 leaf = path->nodes[0];
6625                 slot = path->slots[0];
6626
6627                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6628                 if (found_key.objectid != bytenr)
6629                         break;
6630
6631                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6632                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6633                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6634                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6635                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6636                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6637                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6638                         btrfs_release_path(path);
6639                         if (found_key.type == 0) {
6640                                 if (found_key.offset == 0)
6641                                         break;
6642                                 key.offset = found_key.offset - 1;
6643                                 key.type = found_key.type;
6644                         }
6645                         key.type = found_key.type - 1;
6646                         key.offset = (u64)-1;
6647                         continue;
6648                 }
6649
6650                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6651                         found_key.objectid, found_key.type, found_key.offset);
6652
6653                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6654                 if (ret)
6655                         break;
6656                 btrfs_release_path(path);
6657
6658                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6659                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6660                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6661                                 found_key.offset : root->nodesize;
6662
6663                         ret = btrfs_update_block_group(trans, root, bytenr,
6664                                                        bytes, 0, 0);
6665                         if (ret)
6666                                 break;
6667                 }
6668         }
6669
6670         btrfs_release_path(path);
6671         return ret;
6672 }
6673
6674 /*
6675  * for a single backref, this will allocate a new extent
6676  * and add the backref to it.
6677  */
6678 static int record_extent(struct btrfs_trans_handle *trans,
6679                          struct btrfs_fs_info *info,
6680                          struct btrfs_path *path,
6681                          struct extent_record *rec,
6682                          struct extent_backref *back,
6683                          int allocated, u64 flags)
6684 {
6685         int ret;
6686         struct btrfs_root *extent_root = info->extent_root;
6687         struct extent_buffer *leaf;
6688         struct btrfs_key ins_key;
6689         struct btrfs_extent_item *ei;
6690         struct tree_backref *tback;
6691         struct data_backref *dback;
6692         struct btrfs_tree_block_info *bi;
6693
6694         if (!back->is_data)
6695                 rec->max_size = max_t(u64, rec->max_size,
6696                                     info->extent_root->nodesize);
6697
6698         if (!allocated) {
6699                 u32 item_size = sizeof(*ei);
6700
6701                 if (!back->is_data)
6702                         item_size += sizeof(*bi);
6703
6704                 ins_key.objectid = rec->start;
6705                 ins_key.offset = rec->max_size;
6706                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6707
6708                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6709                                         &ins_key, item_size);
6710                 if (ret)
6711                         goto fail;
6712
6713                 leaf = path->nodes[0];
6714                 ei = btrfs_item_ptr(leaf, path->slots[0],
6715                                     struct btrfs_extent_item);
6716
6717                 btrfs_set_extent_refs(leaf, ei, 0);
6718                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6719
6720                 if (back->is_data) {
6721                         btrfs_set_extent_flags(leaf, ei,
6722                                                BTRFS_EXTENT_FLAG_DATA);
6723                 } else {
6724                         struct btrfs_disk_key copy_key;;
6725
6726                         tback = to_tree_backref(back);
6727                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6728                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6729                                              sizeof(*bi));
6730
6731                         btrfs_set_disk_key_objectid(&copy_key,
6732                                                     rec->info_objectid);
6733                         btrfs_set_disk_key_type(&copy_key, 0);
6734                         btrfs_set_disk_key_offset(&copy_key, 0);
6735
6736                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6737                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6738
6739                         btrfs_set_extent_flags(leaf, ei,
6740                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6741                 }
6742
6743                 btrfs_mark_buffer_dirty(leaf);
6744                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6745                                                rec->max_size, 1, 0);
6746                 if (ret)
6747                         goto fail;
6748                 btrfs_release_path(path);
6749         }
6750
6751         if (back->is_data) {
6752                 u64 parent;
6753                 int i;
6754
6755                 dback = to_data_backref(back);
6756                 if (back->full_backref)
6757                         parent = dback->parent;
6758                 else
6759                         parent = 0;
6760
6761                 for (i = 0; i < dback->found_ref; i++) {
6762                         /* if parent != 0, we're doing a full backref
6763                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6764                          * just makes the backref allocator create a data
6765                          * backref
6766                          */
6767                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6768                                                    rec->start, rec->max_size,
6769                                                    parent,
6770                                                    dback->root,
6771                                                    parent ?
6772                                                    BTRFS_FIRST_FREE_OBJECTID :
6773                                                    dback->owner,
6774                                                    dback->offset);
6775                         if (ret)
6776                                 break;
6777                 }
6778                 fprintf(stderr, "adding new data backref"
6779                                 " on %llu %s %llu owner %llu"
6780                                 " offset %llu found %d\n",
6781                                 (unsigned long long)rec->start,
6782                                 back->full_backref ?
6783                                 "parent" : "root",
6784                                 back->full_backref ?
6785                                 (unsigned long long)parent :
6786                                 (unsigned long long)dback->root,
6787                                 (unsigned long long)dback->owner,
6788                                 (unsigned long long)dback->offset,
6789                                 dback->found_ref);
6790         } else {
6791                 u64 parent;
6792
6793                 tback = to_tree_backref(back);
6794                 if (back->full_backref)
6795                         parent = tback->parent;
6796                 else
6797                         parent = 0;
6798
6799                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6800                                            rec->start, rec->max_size,
6801                                            parent, tback->root, 0, 0);
6802                 fprintf(stderr, "adding new tree backref on "
6803                         "start %llu len %llu parent %llu root %llu\n",
6804                         rec->start, rec->max_size, parent, tback->root);
6805         }
6806 fail:
6807         btrfs_release_path(path);
6808         return ret;
6809 }
6810
6811 static struct extent_entry *find_entry(struct list_head *entries,
6812                                        u64 bytenr, u64 bytes)
6813 {
6814         struct extent_entry *entry = NULL;
6815
6816         list_for_each_entry(entry, entries, list) {
6817                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6818                         return entry;
6819         }
6820
6821         return NULL;
6822 }
6823
6824 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6825 {
6826         struct extent_entry *entry, *best = NULL, *prev = NULL;
6827
6828         list_for_each_entry(entry, entries, list) {
6829                 if (!prev) {
6830                         prev = entry;
6831                         continue;
6832                 }
6833
6834                 /*
6835                  * If there are as many broken entries as entries then we know
6836                  * not to trust this particular entry.
6837                  */
6838                 if (entry->broken == entry->count)
6839                         continue;
6840
6841                 /*
6842                  * If our current entry == best then we can't be sure our best
6843                  * is really the best, so we need to keep searching.
6844                  */
6845                 if (best && best->count == entry->count) {
6846                         prev = entry;
6847                         best = NULL;
6848                         continue;
6849                 }
6850
6851                 /* Prev == entry, not good enough, have to keep searching */
6852                 if (!prev->broken && prev->count == entry->count)
6853                         continue;
6854
6855                 if (!best)
6856                         best = (prev->count > entry->count) ? prev : entry;
6857                 else if (best->count < entry->count)
6858                         best = entry;
6859                 prev = entry;
6860         }
6861
6862         return best;
6863 }
6864
6865 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6866                       struct data_backref *dback, struct extent_entry *entry)
6867 {
6868         struct btrfs_trans_handle *trans;
6869         struct btrfs_root *root;
6870         struct btrfs_file_extent_item *fi;
6871         struct extent_buffer *leaf;
6872         struct btrfs_key key;
6873         u64 bytenr, bytes;
6874         int ret, err;
6875
6876         key.objectid = dback->root;
6877         key.type = BTRFS_ROOT_ITEM_KEY;
6878         key.offset = (u64)-1;
6879         root = btrfs_read_fs_root(info, &key);
6880         if (IS_ERR(root)) {
6881                 fprintf(stderr, "Couldn't find root for our ref\n");
6882                 return -EINVAL;
6883         }
6884
6885         /*
6886          * The backref points to the original offset of the extent if it was
6887          * split, so we need to search down to the offset we have and then walk
6888          * forward until we find the backref we're looking for.
6889          */
6890         key.objectid = dback->owner;
6891         key.type = BTRFS_EXTENT_DATA_KEY;
6892         key.offset = dback->offset;
6893         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6894         if (ret < 0) {
6895                 fprintf(stderr, "Error looking up ref %d\n", ret);
6896                 return ret;
6897         }
6898
6899         while (1) {
6900                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6901                         ret = btrfs_next_leaf(root, path);
6902                         if (ret) {
6903                                 fprintf(stderr, "Couldn't find our ref, next\n");
6904                                 return -EINVAL;
6905                         }
6906                 }
6907                 leaf = path->nodes[0];
6908                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6909                 if (key.objectid != dback->owner ||
6910                     key.type != BTRFS_EXTENT_DATA_KEY) {
6911                         fprintf(stderr, "Couldn't find our ref, search\n");
6912                         return -EINVAL;
6913                 }
6914                 fi = btrfs_item_ptr(leaf, path->slots[0],
6915                                     struct btrfs_file_extent_item);
6916                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6917                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6918
6919                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6920                         break;
6921                 path->slots[0]++;
6922         }
6923
6924         btrfs_release_path(path);
6925
6926         trans = btrfs_start_transaction(root, 1);
6927         if (IS_ERR(trans))
6928                 return PTR_ERR(trans);
6929
6930         /*
6931          * Ok we have the key of the file extent we want to fix, now we can cow
6932          * down to the thing and fix it.
6933          */
6934         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6935         if (ret < 0) {
6936                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6937                         key.objectid, key.type, key.offset, ret);
6938                 goto out;
6939         }
6940         if (ret > 0) {
6941                 fprintf(stderr, "Well that's odd, we just found this key "
6942                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6943                         key.offset);
6944                 ret = -EINVAL;
6945                 goto out;
6946         }
6947         leaf = path->nodes[0];
6948         fi = btrfs_item_ptr(leaf, path->slots[0],
6949                             struct btrfs_file_extent_item);
6950
6951         if (btrfs_file_extent_compression(leaf, fi) &&
6952             dback->disk_bytenr != entry->bytenr) {
6953                 fprintf(stderr, "Ref doesn't match the record start and is "
6954                         "compressed, please take a btrfs-image of this file "
6955                         "system and send it to a btrfs developer so they can "
6956                         "complete this functionality for bytenr %Lu\n",
6957                         dback->disk_bytenr);
6958                 ret = -EINVAL;
6959                 goto out;
6960         }
6961
6962         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6963                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6964         } else if (dback->disk_bytenr > entry->bytenr) {
6965                 u64 off_diff, offset;
6966
6967                 off_diff = dback->disk_bytenr - entry->bytenr;
6968                 offset = btrfs_file_extent_offset(leaf, fi);
6969                 if (dback->disk_bytenr + offset +
6970                     btrfs_file_extent_num_bytes(leaf, fi) >
6971                     entry->bytenr + entry->bytes) {
6972                         fprintf(stderr, "Ref is past the entry end, please "
6973                                 "take a btrfs-image of this file system and "
6974                                 "send it to a btrfs developer, ref %Lu\n",
6975                                 dback->disk_bytenr);
6976                         ret = -EINVAL;
6977                         goto out;
6978                 }
6979                 offset += off_diff;
6980                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6981                 btrfs_set_file_extent_offset(leaf, fi, offset);
6982         } else if (dback->disk_bytenr < entry->bytenr) {
6983                 u64 offset;
6984
6985                 offset = btrfs_file_extent_offset(leaf, fi);
6986                 if (dback->disk_bytenr + offset < entry->bytenr) {
6987                         fprintf(stderr, "Ref is before the entry start, please"
6988                                 " take a btrfs-image of this file system and "
6989                                 "send it to a btrfs developer, ref %Lu\n",
6990                                 dback->disk_bytenr);
6991                         ret = -EINVAL;
6992                         goto out;
6993                 }
6994
6995                 offset += dback->disk_bytenr;
6996                 offset -= entry->bytenr;
6997                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6998                 btrfs_set_file_extent_offset(leaf, fi, offset);
6999         }
7000
7001         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7002
7003         /*
7004          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7005          * only do this if we aren't using compression, otherwise it's a
7006          * trickier case.
7007          */
7008         if (!btrfs_file_extent_compression(leaf, fi))
7009                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7010         else
7011                 printf("ram bytes may be wrong?\n");
7012         btrfs_mark_buffer_dirty(leaf);
7013 out:
7014         err = btrfs_commit_transaction(trans, root);
7015         btrfs_release_path(path);
7016         return ret ? ret : err;
7017 }
7018
7019 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7020                            struct extent_record *rec)
7021 {
7022         struct extent_backref *back;
7023         struct data_backref *dback;
7024         struct extent_entry *entry, *best = NULL;
7025         LIST_HEAD(entries);
7026         int nr_entries = 0;
7027         int broken_entries = 0;
7028         int ret = 0;
7029         short mismatch = 0;
7030
7031         /*
7032          * Metadata is easy and the backrefs should always agree on bytenr and
7033          * size, if not we've got bigger issues.
7034          */
7035         if (rec->metadata)
7036                 return 0;
7037
7038         list_for_each_entry(back, &rec->backrefs, list) {
7039                 if (back->full_backref || !back->is_data)
7040                         continue;
7041
7042                 dback = to_data_backref(back);
7043
7044                 /*
7045                  * We only pay attention to backrefs that we found a real
7046                  * backref for.
7047                  */
7048                 if (dback->found_ref == 0)
7049                         continue;
7050
7051                 /*
7052                  * For now we only catch when the bytes don't match, not the
7053                  * bytenr.  We can easily do this at the same time, but I want
7054                  * to have a fs image to test on before we just add repair
7055                  * functionality willy-nilly so we know we won't screw up the
7056                  * repair.
7057                  */
7058
7059                 entry = find_entry(&entries, dback->disk_bytenr,
7060                                    dback->bytes);
7061                 if (!entry) {
7062                         entry = malloc(sizeof(struct extent_entry));
7063                         if (!entry) {
7064                                 ret = -ENOMEM;
7065                                 goto out;
7066                         }
7067                         memset(entry, 0, sizeof(*entry));
7068                         entry->bytenr = dback->disk_bytenr;
7069                         entry->bytes = dback->bytes;
7070                         list_add_tail(&entry->list, &entries);
7071                         nr_entries++;
7072                 }
7073
7074                 /*
7075                  * If we only have on entry we may think the entries agree when
7076                  * in reality they don't so we have to do some extra checking.
7077                  */
7078                 if (dback->disk_bytenr != rec->start ||
7079                     dback->bytes != rec->nr || back->broken)
7080                         mismatch = 1;
7081
7082                 if (back->broken) {
7083                         entry->broken++;
7084                         broken_entries++;
7085                 }
7086
7087                 entry->count++;
7088         }
7089
7090         /* Yay all the backrefs agree, carry on good sir */
7091         if (nr_entries <= 1 && !mismatch)
7092                 goto out;
7093
7094         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7095                 "%Lu\n", rec->start);
7096
7097         /*
7098          * First we want to see if the backrefs can agree amongst themselves who
7099          * is right, so figure out which one of the entries has the highest
7100          * count.
7101          */
7102         best = find_most_right_entry(&entries);
7103
7104         /*
7105          * Ok so we may have an even split between what the backrefs think, so
7106          * this is where we use the extent ref to see what it thinks.
7107          */
7108         if (!best) {
7109                 entry = find_entry(&entries, rec->start, rec->nr);
7110                 if (!entry && (!broken_entries || !rec->found_rec)) {
7111                         fprintf(stderr, "Backrefs don't agree with each other "
7112                                 "and extent record doesn't agree with anybody,"
7113                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7114                                 rec->start, rec->nr);
7115                         ret = -EINVAL;
7116                         goto out;
7117                 } else if (!entry) {
7118                         /*
7119                          * Ok our backrefs were broken, we'll assume this is the
7120                          * correct value and add an entry for this range.
7121                          */
7122                         entry = malloc(sizeof(struct extent_entry));
7123                         if (!entry) {
7124                                 ret = -ENOMEM;
7125                                 goto out;
7126                         }
7127                         memset(entry, 0, sizeof(*entry));
7128                         entry->bytenr = rec->start;
7129                         entry->bytes = rec->nr;
7130                         list_add_tail(&entry->list, &entries);
7131                         nr_entries++;
7132                 }
7133                 entry->count++;
7134                 best = find_most_right_entry(&entries);
7135                 if (!best) {
7136                         fprintf(stderr, "Backrefs and extent record evenly "
7137                                 "split on who is right, this is going to "
7138                                 "require user input to fix bytenr %Lu bytes "
7139                                 "%Lu\n", rec->start, rec->nr);
7140                         ret = -EINVAL;
7141                         goto out;
7142                 }
7143         }
7144
7145         /*
7146          * I don't think this can happen currently as we'll abort() if we catch
7147          * this case higher up, but in case somebody removes that we still can't
7148          * deal with it properly here yet, so just bail out of that's the case.
7149          */
7150         if (best->bytenr != rec->start) {
7151                 fprintf(stderr, "Extent start and backref starts don't match, "
7152                         "please use btrfs-image on this file system and send "
7153                         "it to a btrfs developer so they can make fsck fix "
7154                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7155                         rec->start, rec->nr);
7156                 ret = -EINVAL;
7157                 goto out;
7158         }
7159
7160         /*
7161          * Ok great we all agreed on an extent record, let's go find the real
7162          * references and fix up the ones that don't match.
7163          */
7164         list_for_each_entry(back, &rec->backrefs, list) {
7165                 if (back->full_backref || !back->is_data)
7166                         continue;
7167
7168                 dback = to_data_backref(back);
7169
7170                 /*
7171                  * Still ignoring backrefs that don't have a real ref attached
7172                  * to them.
7173                  */
7174                 if (dback->found_ref == 0)
7175                         continue;
7176
7177                 if (dback->bytes == best->bytes &&
7178                     dback->disk_bytenr == best->bytenr)
7179                         continue;
7180
7181                 ret = repair_ref(info, path, dback, best);
7182                 if (ret)
7183                         goto out;
7184         }
7185
7186         /*
7187          * Ok we messed with the actual refs, which means we need to drop our
7188          * entire cache and go back and rescan.  I know this is a huge pain and
7189          * adds a lot of extra work, but it's the only way to be safe.  Once all
7190          * the backrefs agree we may not need to do anything to the extent
7191          * record itself.
7192          */
7193         ret = -EAGAIN;
7194 out:
7195         while (!list_empty(&entries)) {
7196                 entry = list_entry(entries.next, struct extent_entry, list);
7197                 list_del_init(&entry->list);
7198                 free(entry);
7199         }
7200         return ret;
7201 }
7202
7203 static int process_duplicates(struct btrfs_root *root,
7204                               struct cache_tree *extent_cache,
7205                               struct extent_record *rec)
7206 {
7207         struct extent_record *good, *tmp;
7208         struct cache_extent *cache;
7209         int ret;
7210
7211         /*
7212          * If we found a extent record for this extent then return, or if we
7213          * have more than one duplicate we are likely going to need to delete
7214          * something.
7215          */
7216         if (rec->found_rec || rec->num_duplicates > 1)
7217                 return 0;
7218
7219         /* Shouldn't happen but just in case */
7220         BUG_ON(!rec->num_duplicates);
7221
7222         /*
7223          * So this happens if we end up with a backref that doesn't match the
7224          * actual extent entry.  So either the backref is bad or the extent
7225          * entry is bad.  Either way we want to have the extent_record actually
7226          * reflect what we found in the extent_tree, so we need to take the
7227          * duplicate out and use that as the extent_record since the only way we
7228          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7229          */
7230         remove_cache_extent(extent_cache, &rec->cache);
7231
7232         good = to_extent_record(rec->dups.next);
7233         list_del_init(&good->list);
7234         INIT_LIST_HEAD(&good->backrefs);
7235         INIT_LIST_HEAD(&good->dups);
7236         good->cache.start = good->start;
7237         good->cache.size = good->nr;
7238         good->content_checked = 0;
7239         good->owner_ref_checked = 0;
7240         good->num_duplicates = 0;
7241         good->refs = rec->refs;
7242         list_splice_init(&rec->backrefs, &good->backrefs);
7243         while (1) {
7244                 cache = lookup_cache_extent(extent_cache, good->start,
7245                                             good->nr);
7246                 if (!cache)
7247                         break;
7248                 tmp = container_of(cache, struct extent_record, cache);
7249
7250                 /*
7251                  * If we find another overlapping extent and it's found_rec is
7252                  * set then it's a duplicate and we need to try and delete
7253                  * something.
7254                  */
7255                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7256                         if (list_empty(&good->list))
7257                                 list_add_tail(&good->list,
7258                                               &duplicate_extents);
7259                         good->num_duplicates += tmp->num_duplicates + 1;
7260                         list_splice_init(&tmp->dups, &good->dups);
7261                         list_del_init(&tmp->list);
7262                         list_add_tail(&tmp->list, &good->dups);
7263                         remove_cache_extent(extent_cache, &tmp->cache);
7264                         continue;
7265                 }
7266
7267                 /*
7268                  * Ok we have another non extent item backed extent rec, so lets
7269                  * just add it to this extent and carry on like we did above.
7270                  */
7271                 good->refs += tmp->refs;
7272                 list_splice_init(&tmp->backrefs, &good->backrefs);
7273                 remove_cache_extent(extent_cache, &tmp->cache);
7274                 free(tmp);
7275         }
7276         ret = insert_cache_extent(extent_cache, &good->cache);
7277         BUG_ON(ret);
7278         free(rec);
7279         return good->num_duplicates ? 0 : 1;
7280 }
7281
7282 static int delete_duplicate_records(struct btrfs_root *root,
7283                                     struct extent_record *rec)
7284 {
7285         struct btrfs_trans_handle *trans;
7286         LIST_HEAD(delete_list);
7287         struct btrfs_path *path;
7288         struct extent_record *tmp, *good, *n;
7289         int nr_del = 0;
7290         int ret = 0, err;
7291         struct btrfs_key key;
7292
7293         path = btrfs_alloc_path();
7294         if (!path) {
7295                 ret = -ENOMEM;
7296                 goto out;
7297         }
7298
7299         good = rec;
7300         /* Find the record that covers all of the duplicates. */
7301         list_for_each_entry(tmp, &rec->dups, list) {
7302                 if (good->start < tmp->start)
7303                         continue;
7304                 if (good->nr > tmp->nr)
7305                         continue;
7306
7307                 if (tmp->start + tmp->nr < good->start + good->nr) {
7308                         fprintf(stderr, "Ok we have overlapping extents that "
7309                                 "aren't completely covered by each other, this "
7310                                 "is going to require more careful thought.  "
7311                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7312                                 tmp->start, tmp->nr, good->start, good->nr);
7313                         abort();
7314                 }
7315                 good = tmp;
7316         }
7317
7318         if (good != rec)
7319                 list_add_tail(&rec->list, &delete_list);
7320
7321         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7322                 if (tmp == good)
7323                         continue;
7324                 list_move_tail(&tmp->list, &delete_list);
7325         }
7326
7327         root = root->fs_info->extent_root;
7328         trans = btrfs_start_transaction(root, 1);
7329         if (IS_ERR(trans)) {
7330                 ret = PTR_ERR(trans);
7331                 goto out;
7332         }
7333
7334         list_for_each_entry(tmp, &delete_list, list) {
7335                 if (tmp->found_rec == 0)
7336                         continue;
7337                 key.objectid = tmp->start;
7338                 key.type = BTRFS_EXTENT_ITEM_KEY;
7339                 key.offset = tmp->nr;
7340
7341                 /* Shouldn't happen but just in case */
7342                 if (tmp->metadata) {
7343                         fprintf(stderr, "Well this shouldn't happen, extent "
7344                                 "record overlaps but is metadata? "
7345                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7346                         abort();
7347                 }
7348
7349                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7350                 if (ret) {
7351                         if (ret > 0)
7352                                 ret = -EINVAL;
7353                         break;
7354                 }
7355                 ret = btrfs_del_item(trans, root, path);
7356                 if (ret)
7357                         break;
7358                 btrfs_release_path(path);
7359                 nr_del++;
7360         }
7361         err = btrfs_commit_transaction(trans, root);
7362         if (err && !ret)
7363                 ret = err;
7364 out:
7365         while (!list_empty(&delete_list)) {
7366                 tmp = to_extent_record(delete_list.next);
7367                 list_del_init(&tmp->list);
7368                 if (tmp == rec)
7369                         continue;
7370                 free(tmp);
7371         }
7372
7373         while (!list_empty(&rec->dups)) {
7374                 tmp = to_extent_record(rec->dups.next);
7375                 list_del_init(&tmp->list);
7376                 free(tmp);
7377         }
7378
7379         btrfs_free_path(path);
7380
7381         if (!ret && !nr_del)
7382                 rec->num_duplicates = 0;
7383
7384         return ret ? ret : nr_del;
7385 }
7386
7387 static int find_possible_backrefs(struct btrfs_fs_info *info,
7388                                   struct btrfs_path *path,
7389                                   struct cache_tree *extent_cache,
7390                                   struct extent_record *rec)
7391 {
7392         struct btrfs_root *root;
7393         struct extent_backref *back;
7394         struct data_backref *dback;
7395         struct cache_extent *cache;
7396         struct btrfs_file_extent_item *fi;
7397         struct btrfs_key key;
7398         u64 bytenr, bytes;
7399         int ret;
7400
7401         list_for_each_entry(back, &rec->backrefs, list) {
7402                 /* Don't care about full backrefs (poor unloved backrefs) */
7403                 if (back->full_backref || !back->is_data)
7404                         continue;
7405
7406                 dback = to_data_backref(back);
7407
7408                 /* We found this one, we don't need to do a lookup */
7409                 if (dback->found_ref)
7410                         continue;
7411
7412                 key.objectid = dback->root;
7413                 key.type = BTRFS_ROOT_ITEM_KEY;
7414                 key.offset = (u64)-1;
7415
7416                 root = btrfs_read_fs_root(info, &key);
7417
7418                 /* No root, definitely a bad ref, skip */
7419                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7420                         continue;
7421                 /* Other err, exit */
7422                 if (IS_ERR(root))
7423                         return PTR_ERR(root);
7424
7425                 key.objectid = dback->owner;
7426                 key.type = BTRFS_EXTENT_DATA_KEY;
7427                 key.offset = dback->offset;
7428                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7429                 if (ret) {
7430                         btrfs_release_path(path);
7431                         if (ret < 0)
7432                                 return ret;
7433                         /* Didn't find it, we can carry on */
7434                         ret = 0;
7435                         continue;
7436                 }
7437
7438                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7439                                     struct btrfs_file_extent_item);
7440                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7441                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7442                 btrfs_release_path(path);
7443                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7444                 if (cache) {
7445                         struct extent_record *tmp;
7446                         tmp = container_of(cache, struct extent_record, cache);
7447
7448                         /*
7449                          * If we found an extent record for the bytenr for this
7450                          * particular backref then we can't add it to our
7451                          * current extent record.  We only want to add backrefs
7452                          * that don't have a corresponding extent item in the
7453                          * extent tree since they likely belong to this record
7454                          * and we need to fix it if it doesn't match bytenrs.
7455                          */
7456                         if  (tmp->found_rec)
7457                                 continue;
7458                 }
7459
7460                 dback->found_ref += 1;
7461                 dback->disk_bytenr = bytenr;
7462                 dback->bytes = bytes;
7463
7464                 /*
7465                  * Set this so the verify backref code knows not to trust the
7466                  * values in this backref.
7467                  */
7468                 back->broken = 1;
7469         }
7470
7471         return 0;
7472 }
7473
7474 /*
7475  * Record orphan data ref into corresponding root.
7476  *
7477  * Return 0 if the extent item contains data ref and recorded.
7478  * Return 1 if the extent item contains no useful data ref
7479  *   On that case, it may contains only shared_dataref or metadata backref
7480  *   or the file extent exists(this should be handled by the extent bytenr
7481  *   recovery routine)
7482  * Return <0 if something goes wrong.
7483  */
7484 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7485                                       struct extent_record *rec)
7486 {
7487         struct btrfs_key key;
7488         struct btrfs_root *dest_root;
7489         struct extent_backref *back;
7490         struct data_backref *dback;
7491         struct orphan_data_extent *orphan;
7492         struct btrfs_path *path;
7493         int recorded_data_ref = 0;
7494         int ret = 0;
7495
7496         if (rec->metadata)
7497                 return 1;
7498         path = btrfs_alloc_path();
7499         if (!path)
7500                 return -ENOMEM;
7501         list_for_each_entry(back, &rec->backrefs, list) {
7502                 if (back->full_backref || !back->is_data ||
7503                     !back->found_extent_tree)
7504                         continue;
7505                 dback = to_data_backref(back);
7506                 if (dback->found_ref)
7507                         continue;
7508                 key.objectid = dback->root;
7509                 key.type = BTRFS_ROOT_ITEM_KEY;
7510                 key.offset = (u64)-1;
7511
7512                 dest_root = btrfs_read_fs_root(fs_info, &key);
7513
7514                 /* For non-exist root we just skip it */
7515                 if (IS_ERR(dest_root) || !dest_root)
7516                         continue;
7517
7518                 key.objectid = dback->owner;
7519                 key.type = BTRFS_EXTENT_DATA_KEY;
7520                 key.offset = dback->offset;
7521
7522                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7523                 /*
7524                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7525                  * we need to record it for inode/file extent rebuild.
7526                  * For ret > 0, we record it only for file extent rebuild.
7527                  * For ret == 0, the file extent exists but only bytenr
7528                  * mismatch, let the original bytenr fix routine to handle,
7529                  * don't record it.
7530                  */
7531                 if (ret == 0)
7532                         continue;
7533                 ret = 0;
7534                 orphan = malloc(sizeof(*orphan));
7535                 if (!orphan) {
7536                         ret = -ENOMEM;
7537                         goto out;
7538                 }
7539                 INIT_LIST_HEAD(&orphan->list);
7540                 orphan->root = dback->root;
7541                 orphan->objectid = dback->owner;
7542                 orphan->offset = dback->offset;
7543                 orphan->disk_bytenr = rec->cache.start;
7544                 orphan->disk_len = rec->cache.size;
7545                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7546                 recorded_data_ref = 1;
7547         }
7548 out:
7549         btrfs_free_path(path);
7550         if (!ret)
7551                 return !recorded_data_ref;
7552         else
7553                 return ret;
7554 }
7555
7556 /*
7557  * when an incorrect extent item is found, this will delete
7558  * all of the existing entries for it and recreate them
7559  * based on what the tree scan found.
7560  */
7561 static int fixup_extent_refs(struct btrfs_fs_info *info,
7562                              struct cache_tree *extent_cache,
7563                              struct extent_record *rec)
7564 {
7565         struct btrfs_trans_handle *trans = NULL;
7566         int ret;
7567         struct btrfs_path *path;
7568         struct list_head *cur = rec->backrefs.next;
7569         struct cache_extent *cache;
7570         struct extent_backref *back;
7571         int allocated = 0;
7572         u64 flags = 0;
7573
7574         if (rec->flag_block_full_backref)
7575                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7576
7577         path = btrfs_alloc_path();
7578         if (!path)
7579                 return -ENOMEM;
7580
7581         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7582                 /*
7583                  * Sometimes the backrefs themselves are so broken they don't
7584                  * get attached to any meaningful rec, so first go back and
7585                  * check any of our backrefs that we couldn't find and throw
7586                  * them into the list if we find the backref so that
7587                  * verify_backrefs can figure out what to do.
7588                  */
7589                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7590                 if (ret < 0)
7591                         goto out;
7592         }
7593
7594         /* step one, make sure all of the backrefs agree */
7595         ret = verify_backrefs(info, path, rec);
7596         if (ret < 0)
7597                 goto out;
7598
7599         trans = btrfs_start_transaction(info->extent_root, 1);
7600         if (IS_ERR(trans)) {
7601                 ret = PTR_ERR(trans);
7602                 goto out;
7603         }
7604
7605         /* step two, delete all the existing records */
7606         ret = delete_extent_records(trans, info->extent_root, path,
7607                                     rec->start, rec->max_size);
7608
7609         if (ret < 0)
7610                 goto out;
7611
7612         /* was this block corrupt?  If so, don't add references to it */
7613         cache = lookup_cache_extent(info->corrupt_blocks,
7614                                     rec->start, rec->max_size);
7615         if (cache) {
7616                 ret = 0;
7617                 goto out;
7618         }
7619
7620         /* step three, recreate all the refs we did find */
7621         while(cur != &rec->backrefs) {
7622                 back = to_extent_backref(cur);
7623                 cur = cur->next;
7624
7625                 /*
7626                  * if we didn't find any references, don't create a
7627                  * new extent record
7628                  */
7629                 if (!back->found_ref)
7630                         continue;
7631
7632                 rec->bad_full_backref = 0;
7633                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7634                 allocated = 1;
7635
7636                 if (ret)
7637                         goto out;
7638         }
7639 out:
7640         if (trans) {
7641                 int err = btrfs_commit_transaction(trans, info->extent_root);
7642                 if (!ret)
7643                         ret = err;
7644         }
7645
7646         btrfs_free_path(path);
7647         return ret;
7648 }
7649
7650 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7651                               struct extent_record *rec)
7652 {
7653         struct btrfs_trans_handle *trans;
7654         struct btrfs_root *root = fs_info->extent_root;
7655         struct btrfs_path *path;
7656         struct btrfs_extent_item *ei;
7657         struct btrfs_key key;
7658         u64 flags;
7659         int ret = 0;
7660
7661         key.objectid = rec->start;
7662         if (rec->metadata) {
7663                 key.type = BTRFS_METADATA_ITEM_KEY;
7664                 key.offset = rec->info_level;
7665         } else {
7666                 key.type = BTRFS_EXTENT_ITEM_KEY;
7667                 key.offset = rec->max_size;
7668         }
7669
7670         path = btrfs_alloc_path();
7671         if (!path)
7672                 return -ENOMEM;
7673
7674         trans = btrfs_start_transaction(root, 0);
7675         if (IS_ERR(trans)) {
7676                 btrfs_free_path(path);
7677                 return PTR_ERR(trans);
7678         }
7679
7680         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7681         if (ret < 0) {
7682                 btrfs_free_path(path);
7683                 btrfs_commit_transaction(trans, root);
7684                 return ret;
7685         } else if (ret) {
7686                 fprintf(stderr, "Didn't find extent for %llu\n",
7687                         (unsigned long long)rec->start);
7688                 btrfs_free_path(path);
7689                 btrfs_commit_transaction(trans, root);
7690                 return -ENOENT;
7691         }
7692
7693         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7694                             struct btrfs_extent_item);
7695         flags = btrfs_extent_flags(path->nodes[0], ei);
7696         if (rec->flag_block_full_backref) {
7697                 fprintf(stderr, "setting full backref on %llu\n",
7698                         (unsigned long long)key.objectid);
7699                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7700         } else {
7701                 fprintf(stderr, "clearing full backref on %llu\n",
7702                         (unsigned long long)key.objectid);
7703                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7704         }
7705         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7706         btrfs_mark_buffer_dirty(path->nodes[0]);
7707         btrfs_free_path(path);
7708         return btrfs_commit_transaction(trans, root);
7709 }
7710
7711 /* right now we only prune from the extent allocation tree */
7712 static int prune_one_block(struct btrfs_trans_handle *trans,
7713                            struct btrfs_fs_info *info,
7714                            struct btrfs_corrupt_block *corrupt)
7715 {
7716         int ret;
7717         struct btrfs_path path;
7718         struct extent_buffer *eb;
7719         u64 found;
7720         int slot;
7721         int nritems;
7722         int level = corrupt->level + 1;
7723
7724         btrfs_init_path(&path);
7725 again:
7726         /* we want to stop at the parent to our busted block */
7727         path.lowest_level = level;
7728
7729         ret = btrfs_search_slot(trans, info->extent_root,
7730                                 &corrupt->key, &path, -1, 1);
7731
7732         if (ret < 0)
7733                 goto out;
7734
7735         eb = path.nodes[level];
7736         if (!eb) {
7737                 ret = -ENOENT;
7738                 goto out;
7739         }
7740
7741         /*
7742          * hopefully the search gave us the block we want to prune,
7743          * lets try that first
7744          */
7745         slot = path.slots[level];
7746         found =  btrfs_node_blockptr(eb, slot);
7747         if (found == corrupt->cache.start)
7748                 goto del_ptr;
7749
7750         nritems = btrfs_header_nritems(eb);
7751
7752         /* the search failed, lets scan this node and hope we find it */
7753         for (slot = 0; slot < nritems; slot++) {
7754                 found =  btrfs_node_blockptr(eb, slot);
7755                 if (found == corrupt->cache.start)
7756                         goto del_ptr;
7757         }
7758         /*
7759          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7760          * to this block
7761          */
7762         if (eb == info->extent_root->node) {
7763                 ret = -ENOENT;
7764                 goto out;
7765         } else {
7766                 level++;
7767                 btrfs_release_path(&path);
7768                 goto again;
7769         }
7770
7771 del_ptr:
7772         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7773         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7774
7775 out:
7776         btrfs_release_path(&path);
7777         return ret;
7778 }
7779
7780 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7781 {
7782         struct btrfs_trans_handle *trans = NULL;
7783         struct cache_extent *cache;
7784         struct btrfs_corrupt_block *corrupt;
7785
7786         while (1) {
7787                 cache = search_cache_extent(info->corrupt_blocks, 0);
7788                 if (!cache)
7789                         break;
7790                 if (!trans) {
7791                         trans = btrfs_start_transaction(info->extent_root, 1);
7792                         if (IS_ERR(trans))
7793                                 return PTR_ERR(trans);
7794                 }
7795                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7796                 prune_one_block(trans, info, corrupt);
7797                 remove_cache_extent(info->corrupt_blocks, cache);
7798         }
7799         if (trans)
7800                 return btrfs_commit_transaction(trans, info->extent_root);
7801         return 0;
7802 }
7803
7804 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7805 {
7806         struct btrfs_block_group_cache *cache;
7807         u64 start, end;
7808         int ret;
7809
7810         while (1) {
7811                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7812                                             &start, &end, EXTENT_DIRTY);
7813                 if (ret)
7814                         break;
7815                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7816                                    GFP_NOFS);
7817         }
7818
7819         start = 0;
7820         while (1) {
7821                 cache = btrfs_lookup_first_block_group(fs_info, start);
7822                 if (!cache)
7823                         break;
7824                 if (cache->cached)
7825                         cache->cached = 0;
7826                 start = cache->key.objectid + cache->key.offset;
7827         }
7828 }
7829
7830 static int check_extent_refs(struct btrfs_root *root,
7831                              struct cache_tree *extent_cache)
7832 {
7833         struct extent_record *rec;
7834         struct cache_extent *cache;
7835         int err = 0;
7836         int ret = 0;
7837         int fixed = 0;
7838         int had_dups = 0;
7839         int recorded = 0;
7840
7841         if (repair) {
7842                 /*
7843                  * if we're doing a repair, we have to make sure
7844                  * we don't allocate from the problem extents.
7845                  * In the worst case, this will be all the
7846                  * extents in the FS
7847                  */
7848                 cache = search_cache_extent(extent_cache, 0);
7849                 while(cache) {
7850                         rec = container_of(cache, struct extent_record, cache);
7851                         set_extent_dirty(root->fs_info->excluded_extents,
7852                                          rec->start,
7853                                          rec->start + rec->max_size - 1,
7854                                          GFP_NOFS);
7855                         cache = next_cache_extent(cache);
7856                 }
7857
7858                 /* pin down all the corrupted blocks too */
7859                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7860                 while(cache) {
7861                         set_extent_dirty(root->fs_info->excluded_extents,
7862                                          cache->start,
7863                                          cache->start + cache->size - 1,
7864                                          GFP_NOFS);
7865                         cache = next_cache_extent(cache);
7866                 }
7867                 prune_corrupt_blocks(root->fs_info);
7868                 reset_cached_block_groups(root->fs_info);
7869         }
7870
7871         reset_cached_block_groups(root->fs_info);
7872
7873         /*
7874          * We need to delete any duplicate entries we find first otherwise we
7875          * could mess up the extent tree when we have backrefs that actually
7876          * belong to a different extent item and not the weird duplicate one.
7877          */
7878         while (repair && !list_empty(&duplicate_extents)) {
7879                 rec = to_extent_record(duplicate_extents.next);
7880                 list_del_init(&rec->list);
7881
7882                 /* Sometimes we can find a backref before we find an actual
7883                  * extent, so we need to process it a little bit to see if there
7884                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7885                  * if this is a backref screwup.  If we need to delete stuff
7886                  * process_duplicates() will return 0, otherwise it will return
7887                  * 1 and we
7888                  */
7889                 if (process_duplicates(root, extent_cache, rec))
7890                         continue;
7891                 ret = delete_duplicate_records(root, rec);
7892                 if (ret < 0)
7893                         return ret;
7894                 /*
7895                  * delete_duplicate_records will return the number of entries
7896                  * deleted, so if it's greater than 0 then we know we actually
7897                  * did something and we need to remove.
7898                  */
7899                 if (ret)
7900                         had_dups = 1;
7901         }
7902
7903         if (had_dups)
7904                 return -EAGAIN;
7905
7906         while(1) {
7907                 int cur_err = 0;
7908
7909                 fixed = 0;
7910                 recorded = 0;
7911                 cache = search_cache_extent(extent_cache, 0);
7912                 if (!cache)
7913                         break;
7914                 rec = container_of(cache, struct extent_record, cache);
7915                 if (rec->num_duplicates) {
7916                         fprintf(stderr, "extent item %llu has multiple extent "
7917                                 "items\n", (unsigned long long)rec->start);
7918                         err = 1;
7919                         cur_err = 1;
7920                 }
7921
7922                 if (rec->refs != rec->extent_item_refs) {
7923                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7924                                 (unsigned long long)rec->start,
7925                                 (unsigned long long)rec->nr);
7926                         fprintf(stderr, "extent item %llu, found %llu\n",
7927                                 (unsigned long long)rec->extent_item_refs,
7928                                 (unsigned long long)rec->refs);
7929                         ret = record_orphan_data_extents(root->fs_info, rec);
7930                         if (ret < 0)
7931                                 goto repair_abort;
7932                         if (ret == 0) {
7933                                 recorded = 1;
7934                         } else {
7935                                 /*
7936                                  * we can't use the extent to repair file
7937                                  * extent, let the fallback method handle it.
7938                                  */
7939                                 if (!fixed && repair) {
7940                                         ret = fixup_extent_refs(
7941                                                         root->fs_info,
7942                                                         extent_cache, rec);
7943                                         if (ret)
7944                                                 goto repair_abort;
7945                                         fixed = 1;
7946                                 }
7947                         }
7948                         err = 1;
7949                         cur_err = 1;
7950                 }
7951                 if (all_backpointers_checked(rec, 1)) {
7952                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7953                                 (unsigned long long)rec->start,
7954                                 (unsigned long long)rec->nr);
7955
7956                         if (!fixed && !recorded && repair) {
7957                                 ret = fixup_extent_refs(root->fs_info,
7958                                                         extent_cache, rec);
7959                                 if (ret)
7960                                         goto repair_abort;
7961                                 fixed = 1;
7962                         }
7963                         cur_err = 1;
7964                         err = 1;
7965                 }
7966                 if (!rec->owner_ref_checked) {
7967                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7968                                 (unsigned long long)rec->start,
7969                                 (unsigned long long)rec->nr);
7970                         if (!fixed && !recorded && repair) {
7971                                 ret = fixup_extent_refs(root->fs_info,
7972                                                         extent_cache, rec);
7973                                 if (ret)
7974                                         goto repair_abort;
7975                                 fixed = 1;
7976                         }
7977                         err = 1;
7978                         cur_err = 1;
7979                 }
7980                 if (rec->bad_full_backref) {
7981                         fprintf(stderr, "bad full backref, on [%llu]\n",
7982                                 (unsigned long long)rec->start);
7983                         if (repair) {
7984                                 ret = fixup_extent_flags(root->fs_info, rec);
7985                                 if (ret)
7986                                         goto repair_abort;
7987                                 fixed = 1;
7988                         }
7989                         err = 1;
7990                         cur_err = 1;
7991                 }
7992                 /*
7993                  * Although it's not a extent ref's problem, we reuse this
7994                  * routine for error reporting.
7995                  * No repair function yet.
7996                  */
7997                 if (rec->crossing_stripes) {
7998                         fprintf(stderr,
7999                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8000                                 rec->start, rec->start + rec->max_size);
8001                         err = 1;
8002                         cur_err = 1;
8003                 }
8004
8005                 if (rec->wrong_chunk_type) {
8006                         fprintf(stderr,
8007                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8008                                 rec->start, rec->start + rec->max_size);
8009                         err = 1;
8010                         cur_err = 1;
8011                 }
8012
8013                 remove_cache_extent(extent_cache, cache);
8014                 free_all_extent_backrefs(rec);
8015                 if (!init_extent_tree && repair && (!cur_err || fixed))
8016                         clear_extent_dirty(root->fs_info->excluded_extents,
8017                                            rec->start,
8018                                            rec->start + rec->max_size - 1,
8019                                            GFP_NOFS);
8020                 free(rec);
8021         }
8022 repair_abort:
8023         if (repair) {
8024                 if (ret && ret != -EAGAIN) {
8025                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8026                         exit(1);
8027                 } else if (!ret) {
8028                         struct btrfs_trans_handle *trans;
8029
8030                         root = root->fs_info->extent_root;
8031                         trans = btrfs_start_transaction(root, 1);
8032                         if (IS_ERR(trans)) {
8033                                 ret = PTR_ERR(trans);
8034                                 goto repair_abort;
8035                         }
8036
8037                         btrfs_fix_block_accounting(trans, root);
8038                         ret = btrfs_commit_transaction(trans, root);
8039                         if (ret)
8040                                 goto repair_abort;
8041                 }
8042                 if (err)
8043                         fprintf(stderr, "repaired damaged extent references\n");
8044                 return ret;
8045         }
8046         return err;
8047 }
8048
8049 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8050 {
8051         u64 stripe_size;
8052
8053         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8054                 stripe_size = length;
8055                 stripe_size /= num_stripes;
8056         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8057                 stripe_size = length * 2;
8058                 stripe_size /= num_stripes;
8059         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8060                 stripe_size = length;
8061                 stripe_size /= (num_stripes - 1);
8062         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8063                 stripe_size = length;
8064                 stripe_size /= (num_stripes - 2);
8065         } else {
8066                 stripe_size = length;
8067         }
8068         return stripe_size;
8069 }
8070
8071 /*
8072  * Check the chunk with its block group/dev list ref:
8073  * Return 0 if all refs seems valid.
8074  * Return 1 if part of refs seems valid, need later check for rebuild ref
8075  * like missing block group and needs to search extent tree to rebuild them.
8076  * Return -1 if essential refs are missing and unable to rebuild.
8077  */
8078 static int check_chunk_refs(struct chunk_record *chunk_rec,
8079                             struct block_group_tree *block_group_cache,
8080                             struct device_extent_tree *dev_extent_cache,
8081                             int silent)
8082 {
8083         struct cache_extent *block_group_item;
8084         struct block_group_record *block_group_rec;
8085         struct cache_extent *dev_extent_item;
8086         struct device_extent_record *dev_extent_rec;
8087         u64 devid;
8088         u64 offset;
8089         u64 length;
8090         int metadump_v2 = 0;
8091         int i;
8092         int ret = 0;
8093
8094         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8095                                                chunk_rec->offset,
8096                                                chunk_rec->length);
8097         if (block_group_item) {
8098                 block_group_rec = container_of(block_group_item,
8099                                                struct block_group_record,
8100                                                cache);
8101                 if (chunk_rec->length != block_group_rec->offset ||
8102                     chunk_rec->offset != block_group_rec->objectid ||
8103                     (!metadump_v2 &&
8104                      chunk_rec->type_flags != block_group_rec->flags)) {
8105                         if (!silent)
8106                                 fprintf(stderr,
8107                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8108                                         chunk_rec->objectid,
8109                                         chunk_rec->type,
8110                                         chunk_rec->offset,
8111                                         chunk_rec->length,
8112                                         chunk_rec->offset,
8113                                         chunk_rec->type_flags,
8114                                         block_group_rec->objectid,
8115                                         block_group_rec->type,
8116                                         block_group_rec->offset,
8117                                         block_group_rec->offset,
8118                                         block_group_rec->objectid,
8119                                         block_group_rec->flags);
8120                         ret = -1;
8121                 } else {
8122                         list_del_init(&block_group_rec->list);
8123                         chunk_rec->bg_rec = block_group_rec;
8124                 }
8125         } else {
8126                 if (!silent)
8127                         fprintf(stderr,
8128                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8129                                 chunk_rec->objectid,
8130                                 chunk_rec->type,
8131                                 chunk_rec->offset,
8132                                 chunk_rec->length,
8133                                 chunk_rec->offset,
8134                                 chunk_rec->type_flags);
8135                 ret = 1;
8136         }
8137
8138         if (metadump_v2)
8139                 return ret;
8140
8141         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8142                                     chunk_rec->num_stripes);
8143         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8144                 devid = chunk_rec->stripes[i].devid;
8145                 offset = chunk_rec->stripes[i].offset;
8146                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8147                                                        devid, offset, length);
8148                 if (dev_extent_item) {
8149                         dev_extent_rec = container_of(dev_extent_item,
8150                                                 struct device_extent_record,
8151                                                 cache);
8152                         if (dev_extent_rec->objectid != devid ||
8153                             dev_extent_rec->offset != offset ||
8154                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8155                             dev_extent_rec->length != length) {
8156                                 if (!silent)
8157                                         fprintf(stderr,
8158                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8159                                                 chunk_rec->objectid,
8160                                                 chunk_rec->type,
8161                                                 chunk_rec->offset,
8162                                                 chunk_rec->stripes[i].devid,
8163                                                 chunk_rec->stripes[i].offset,
8164                                                 dev_extent_rec->objectid,
8165                                                 dev_extent_rec->offset,
8166                                                 dev_extent_rec->length);
8167                                 ret = -1;
8168                         } else {
8169                                 list_move(&dev_extent_rec->chunk_list,
8170                                           &chunk_rec->dextents);
8171                         }
8172                 } else {
8173                         if (!silent)
8174                                 fprintf(stderr,
8175                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8176                                         chunk_rec->objectid,
8177                                         chunk_rec->type,
8178                                         chunk_rec->offset,
8179                                         chunk_rec->stripes[i].devid,
8180                                         chunk_rec->stripes[i].offset);
8181                         ret = -1;
8182                 }
8183         }
8184         return ret;
8185 }
8186
8187 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8188 int check_chunks(struct cache_tree *chunk_cache,
8189                  struct block_group_tree *block_group_cache,
8190                  struct device_extent_tree *dev_extent_cache,
8191                  struct list_head *good, struct list_head *bad,
8192                  struct list_head *rebuild, int silent)
8193 {
8194         struct cache_extent *chunk_item;
8195         struct chunk_record *chunk_rec;
8196         struct block_group_record *bg_rec;
8197         struct device_extent_record *dext_rec;
8198         int err;
8199         int ret = 0;
8200
8201         chunk_item = first_cache_extent(chunk_cache);
8202         while (chunk_item) {
8203                 chunk_rec = container_of(chunk_item, struct chunk_record,
8204                                          cache);
8205                 err = check_chunk_refs(chunk_rec, block_group_cache,
8206                                        dev_extent_cache, silent);
8207                 if (err < 0)
8208                         ret = err;
8209                 if (err == 0 && good)
8210                         list_add_tail(&chunk_rec->list, good);
8211                 if (err > 0 && rebuild)
8212                         list_add_tail(&chunk_rec->list, rebuild);
8213                 if (err < 0 && bad)
8214                         list_add_tail(&chunk_rec->list, bad);
8215                 chunk_item = next_cache_extent(chunk_item);
8216         }
8217
8218         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8219                 if (!silent)
8220                         fprintf(stderr,
8221                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8222                                 bg_rec->objectid,
8223                                 bg_rec->offset,
8224                                 bg_rec->flags);
8225                 if (!ret)
8226                         ret = 1;
8227         }
8228
8229         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8230                             chunk_list) {
8231                 if (!silent)
8232                         fprintf(stderr,
8233                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8234                                 dext_rec->objectid,
8235                                 dext_rec->offset,
8236                                 dext_rec->length);
8237                 if (!ret)
8238                         ret = 1;
8239         }
8240         return ret;
8241 }
8242
8243
8244 static int check_device_used(struct device_record *dev_rec,
8245                              struct device_extent_tree *dext_cache)
8246 {
8247         struct cache_extent *cache;
8248         struct device_extent_record *dev_extent_rec;
8249         u64 total_byte = 0;
8250
8251         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8252         while (cache) {
8253                 dev_extent_rec = container_of(cache,
8254                                               struct device_extent_record,
8255                                               cache);
8256                 if (dev_extent_rec->objectid != dev_rec->devid)
8257                         break;
8258
8259                 list_del_init(&dev_extent_rec->device_list);
8260                 total_byte += dev_extent_rec->length;
8261                 cache = next_cache_extent(cache);
8262         }
8263
8264         if (total_byte != dev_rec->byte_used) {
8265                 fprintf(stderr,
8266                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8267                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8268                         dev_rec->type, dev_rec->offset);
8269                 return -1;
8270         } else {
8271                 return 0;
8272         }
8273 }
8274
8275 /* check btrfs_dev_item -> btrfs_dev_extent */
8276 static int check_devices(struct rb_root *dev_cache,
8277                          struct device_extent_tree *dev_extent_cache)
8278 {
8279         struct rb_node *dev_node;
8280         struct device_record *dev_rec;
8281         struct device_extent_record *dext_rec;
8282         int err;
8283         int ret = 0;
8284
8285         dev_node = rb_first(dev_cache);
8286         while (dev_node) {
8287                 dev_rec = container_of(dev_node, struct device_record, node);
8288                 err = check_device_used(dev_rec, dev_extent_cache);
8289                 if (err)
8290                         ret = err;
8291
8292                 dev_node = rb_next(dev_node);
8293         }
8294         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8295                             device_list) {
8296                 fprintf(stderr,
8297                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8298                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8299                 if (!ret)
8300                         ret = 1;
8301         }
8302         return ret;
8303 }
8304
8305 static int add_root_item_to_list(struct list_head *head,
8306                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8307                                   u8 level, u8 drop_level,
8308                                   int level_size, struct btrfs_key *drop_key)
8309 {
8310
8311         struct root_item_record *ri_rec;
8312         ri_rec = malloc(sizeof(*ri_rec));
8313         if (!ri_rec)
8314                 return -ENOMEM;
8315         ri_rec->bytenr = bytenr;
8316         ri_rec->objectid = objectid;
8317         ri_rec->level = level;
8318         ri_rec->level_size = level_size;
8319         ri_rec->drop_level = drop_level;
8320         ri_rec->last_snapshot = last_snapshot;
8321         if (drop_key)
8322                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8323         list_add_tail(&ri_rec->list, head);
8324
8325         return 0;
8326 }
8327
8328 static void free_root_item_list(struct list_head *list)
8329 {
8330         struct root_item_record *ri_rec;
8331
8332         while (!list_empty(list)) {
8333                 ri_rec = list_first_entry(list, struct root_item_record,
8334                                           list);
8335                 list_del_init(&ri_rec->list);
8336                 free(ri_rec);
8337         }
8338 }
8339
8340 static int deal_root_from_list(struct list_head *list,
8341                                struct btrfs_root *root,
8342                                struct block_info *bits,
8343                                int bits_nr,
8344                                struct cache_tree *pending,
8345                                struct cache_tree *seen,
8346                                struct cache_tree *reada,
8347                                struct cache_tree *nodes,
8348                                struct cache_tree *extent_cache,
8349                                struct cache_tree *chunk_cache,
8350                                struct rb_root *dev_cache,
8351                                struct block_group_tree *block_group_cache,
8352                                struct device_extent_tree *dev_extent_cache)
8353 {
8354         int ret = 0;
8355         u64 last;
8356
8357         while (!list_empty(list)) {
8358                 struct root_item_record *rec;
8359                 struct extent_buffer *buf;
8360                 rec = list_entry(list->next,
8361                                  struct root_item_record, list);
8362                 last = 0;
8363                 buf = read_tree_block(root->fs_info->tree_root,
8364                                       rec->bytenr, rec->level_size, 0);
8365                 if (!extent_buffer_uptodate(buf)) {
8366                         free_extent_buffer(buf);
8367                         ret = -EIO;
8368                         break;
8369                 }
8370                 ret = add_root_to_pending(buf, extent_cache, pending,
8371                                     seen, nodes, rec->objectid);
8372                 if (ret < 0)
8373                         break;
8374                 /*
8375                  * To rebuild extent tree, we need deal with snapshot
8376                  * one by one, otherwise we deal with node firstly which
8377                  * can maximize readahead.
8378                  */
8379                 while (1) {
8380                         ret = run_next_block(root, bits, bits_nr, &last,
8381                                              pending, seen, reada, nodes,
8382                                              extent_cache, chunk_cache,
8383                                              dev_cache, block_group_cache,
8384                                              dev_extent_cache, rec);
8385                         if (ret != 0)
8386                                 break;
8387                 }
8388                 free_extent_buffer(buf);
8389                 list_del(&rec->list);
8390                 free(rec);
8391                 if (ret < 0)
8392                         break;
8393         }
8394         while (ret >= 0) {
8395                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8396                                      reada, nodes, extent_cache, chunk_cache,
8397                                      dev_cache, block_group_cache,
8398                                      dev_extent_cache, NULL);
8399                 if (ret != 0) {
8400                         if (ret > 0)
8401                                 ret = 0;
8402                         break;
8403                 }
8404         }
8405         return ret;
8406 }
8407
8408 static int check_chunks_and_extents(struct btrfs_root *root)
8409 {
8410         struct rb_root dev_cache;
8411         struct cache_tree chunk_cache;
8412         struct block_group_tree block_group_cache;
8413         struct device_extent_tree dev_extent_cache;
8414         struct cache_tree extent_cache;
8415         struct cache_tree seen;
8416         struct cache_tree pending;
8417         struct cache_tree reada;
8418         struct cache_tree nodes;
8419         struct extent_io_tree excluded_extents;
8420         struct cache_tree corrupt_blocks;
8421         struct btrfs_path path;
8422         struct btrfs_key key;
8423         struct btrfs_key found_key;
8424         int ret, err = 0;
8425         struct block_info *bits;
8426         int bits_nr;
8427         struct extent_buffer *leaf;
8428         int slot;
8429         struct btrfs_root_item ri;
8430         struct list_head dropping_trees;
8431         struct list_head normal_trees;
8432         struct btrfs_root *root1;
8433         u64 objectid;
8434         u32 level_size;
8435         u8 level;
8436
8437         dev_cache = RB_ROOT;
8438         cache_tree_init(&chunk_cache);
8439         block_group_tree_init(&block_group_cache);
8440         device_extent_tree_init(&dev_extent_cache);
8441
8442         cache_tree_init(&extent_cache);
8443         cache_tree_init(&seen);
8444         cache_tree_init(&pending);
8445         cache_tree_init(&nodes);
8446         cache_tree_init(&reada);
8447         cache_tree_init(&corrupt_blocks);
8448         extent_io_tree_init(&excluded_extents);
8449         INIT_LIST_HEAD(&dropping_trees);
8450         INIT_LIST_HEAD(&normal_trees);
8451
8452         if (repair) {
8453                 root->fs_info->excluded_extents = &excluded_extents;
8454                 root->fs_info->fsck_extent_cache = &extent_cache;
8455                 root->fs_info->free_extent_hook = free_extent_hook;
8456                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8457         }
8458
8459         bits_nr = 1024;
8460         bits = malloc(bits_nr * sizeof(struct block_info));
8461         if (!bits) {
8462                 perror("malloc");
8463                 exit(1);
8464         }
8465
8466         if (ctx.progress_enabled) {
8467                 ctx.tp = TASK_EXTENTS;
8468                 task_start(ctx.info);
8469         }
8470
8471 again:
8472         root1 = root->fs_info->tree_root;
8473         level = btrfs_header_level(root1->node);
8474         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8475                                     root1->node->start, 0, level, 0,
8476                                     root1->nodesize, NULL);
8477         if (ret < 0)
8478                 goto out;
8479         root1 = root->fs_info->chunk_root;
8480         level = btrfs_header_level(root1->node);
8481         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8482                                     root1->node->start, 0, level, 0,
8483                                     root1->nodesize, NULL);
8484         if (ret < 0)
8485                 goto out;
8486         btrfs_init_path(&path);
8487         key.offset = 0;
8488         key.objectid = 0;
8489         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8490         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8491                                         &key, &path, 0, 0);
8492         if (ret < 0)
8493                 goto out;
8494         while(1) {
8495                 leaf = path.nodes[0];
8496                 slot = path.slots[0];
8497                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8498                         ret = btrfs_next_leaf(root, &path);
8499                         if (ret != 0)
8500                                 break;
8501                         leaf = path.nodes[0];
8502                         slot = path.slots[0];
8503                 }
8504                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8505                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8506                         unsigned long offset;
8507                         u64 last_snapshot;
8508
8509                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8510                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8511                         last_snapshot = btrfs_root_last_snapshot(&ri);
8512                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8513                                 level = btrfs_root_level(&ri);
8514                                 level_size = root->nodesize;
8515                                 ret = add_root_item_to_list(&normal_trees,
8516                                                 found_key.objectid,
8517                                                 btrfs_root_bytenr(&ri),
8518                                                 last_snapshot, level,
8519                                                 0, level_size, NULL);
8520                                 if (ret < 0)
8521                                         goto out;
8522                         } else {
8523                                 level = btrfs_root_level(&ri);
8524                                 level_size = root->nodesize;
8525                                 objectid = found_key.objectid;
8526                                 btrfs_disk_key_to_cpu(&found_key,
8527                                                       &ri.drop_progress);
8528                                 ret = add_root_item_to_list(&dropping_trees,
8529                                                 objectid,
8530                                                 btrfs_root_bytenr(&ri),
8531                                                 last_snapshot, level,
8532                                                 ri.drop_level,
8533                                                 level_size, &found_key);
8534                                 if (ret < 0)
8535                                         goto out;
8536                         }
8537                 }
8538                 path.slots[0]++;
8539         }
8540         btrfs_release_path(&path);
8541
8542         /*
8543          * check_block can return -EAGAIN if it fixes something, please keep
8544          * this in mind when dealing with return values from these functions, if
8545          * we get -EAGAIN we want to fall through and restart the loop.
8546          */
8547         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8548                                   &seen, &reada, &nodes, &extent_cache,
8549                                   &chunk_cache, &dev_cache, &block_group_cache,
8550                                   &dev_extent_cache);
8551         if (ret < 0) {
8552                 if (ret == -EAGAIN)
8553                         goto loop;
8554                 goto out;
8555         }
8556         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8557                                   &pending, &seen, &reada, &nodes,
8558                                   &extent_cache, &chunk_cache, &dev_cache,
8559                                   &block_group_cache, &dev_extent_cache);
8560         if (ret < 0) {
8561                 if (ret == -EAGAIN)
8562                         goto loop;
8563                 goto out;
8564         }
8565
8566         ret = check_chunks(&chunk_cache, &block_group_cache,
8567                            &dev_extent_cache, NULL, NULL, NULL, 0);
8568         if (ret) {
8569                 if (ret == -EAGAIN)
8570                         goto loop;
8571                 err = ret;
8572         }
8573
8574         ret = check_extent_refs(root, &extent_cache);
8575         if (ret < 0) {
8576                 if (ret == -EAGAIN)
8577                         goto loop;
8578                 goto out;
8579         }
8580
8581         ret = check_devices(&dev_cache, &dev_extent_cache);
8582         if (ret && err)
8583                 ret = err;
8584
8585 out:
8586         task_stop(ctx.info);
8587         if (repair) {
8588                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8589                 extent_io_tree_cleanup(&excluded_extents);
8590                 root->fs_info->fsck_extent_cache = NULL;
8591                 root->fs_info->free_extent_hook = NULL;
8592                 root->fs_info->corrupt_blocks = NULL;
8593                 root->fs_info->excluded_extents = NULL;
8594         }
8595         free(bits);
8596         free_chunk_cache_tree(&chunk_cache);
8597         free_device_cache_tree(&dev_cache);
8598         free_block_group_tree(&block_group_cache);
8599         free_device_extent_tree(&dev_extent_cache);
8600         free_extent_cache_tree(&seen);
8601         free_extent_cache_tree(&pending);
8602         free_extent_cache_tree(&reada);
8603         free_extent_cache_tree(&nodes);
8604         return ret;
8605 loop:
8606         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8607         free_extent_cache_tree(&seen);
8608         free_extent_cache_tree(&pending);
8609         free_extent_cache_tree(&reada);
8610         free_extent_cache_tree(&nodes);
8611         free_chunk_cache_tree(&chunk_cache);
8612         free_block_group_tree(&block_group_cache);
8613         free_device_cache_tree(&dev_cache);
8614         free_device_extent_tree(&dev_extent_cache);
8615         free_extent_record_cache(root->fs_info, &extent_cache);
8616         free_root_item_list(&normal_trees);
8617         free_root_item_list(&dropping_trees);
8618         extent_io_tree_cleanup(&excluded_extents);
8619         goto again;
8620 }
8621
8622 /*
8623  * Check backrefs of a tree block given by @bytenr or @eb.
8624  *
8625  * @root:       the root containing the @bytenr or @eb
8626  * @eb:         tree block extent buffer, can be NULL
8627  * @bytenr:     bytenr of the tree block to search
8628  * @level:      tree level of the tree block
8629  * @owner:      owner of the tree block
8630  *
8631  * Return >0 for any error found and output error message
8632  * Return 0 for no error found
8633  */
8634 static int check_tree_block_ref(struct btrfs_root *root,
8635                                 struct extent_buffer *eb, u64 bytenr,
8636                                 int level, u64 owner)
8637 {
8638         struct btrfs_key key;
8639         struct btrfs_root *extent_root = root->fs_info->extent_root;
8640         struct btrfs_path path;
8641         struct btrfs_extent_item *ei;
8642         struct btrfs_extent_inline_ref *iref;
8643         struct extent_buffer *leaf;
8644         unsigned long end;
8645         unsigned long ptr;
8646         int slot;
8647         int skinny_level;
8648         int type;
8649         u32 nodesize = root->nodesize;
8650         u32 item_size;
8651         u64 offset;
8652         int found_ref = 0;
8653         int err = 0;
8654         int ret;
8655
8656         btrfs_init_path(&path);
8657         key.objectid = bytenr;
8658         if (btrfs_fs_incompat(root->fs_info,
8659                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8660                 key.type = BTRFS_METADATA_ITEM_KEY;
8661         else
8662                 key.type = BTRFS_EXTENT_ITEM_KEY;
8663         key.offset = (u64)-1;
8664
8665         /* Search for the backref in extent tree */
8666         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8667         if (ret < 0) {
8668                 err |= BACKREF_MISSING;
8669                 goto out;
8670         }
8671         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8672         if (ret) {
8673                 err |= BACKREF_MISSING;
8674                 goto out;
8675         }
8676
8677         leaf = path.nodes[0];
8678         slot = path.slots[0];
8679         btrfs_item_key_to_cpu(leaf, &key, slot);
8680
8681         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8682
8683         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8684                 skinny_level = (int)key.offset;
8685                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8686         } else {
8687                 struct btrfs_tree_block_info *info;
8688
8689                 info = (struct btrfs_tree_block_info *)(ei + 1);
8690                 skinny_level = btrfs_tree_block_level(leaf, info);
8691                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8692         }
8693
8694         if (eb) {
8695                 u64 header_gen;
8696                 u64 extent_gen;
8697
8698                 if (!(btrfs_extent_flags(leaf, ei) &
8699                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8700                         error(
8701                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8702                                 key.objectid, nodesize,
8703                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8704                         err = BACKREF_MISMATCH;
8705                 }
8706                 header_gen = btrfs_header_generation(eb);
8707                 extent_gen = btrfs_extent_generation(leaf, ei);
8708                 if (header_gen != extent_gen) {
8709                         error(
8710         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8711                                 key.objectid, nodesize, header_gen,
8712                                 extent_gen);
8713                         err = BACKREF_MISMATCH;
8714                 }
8715                 if (level != skinny_level) {
8716                         error(
8717                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8718                                 key.objectid, nodesize, level, skinny_level);
8719                         err = BACKREF_MISMATCH;
8720                 }
8721                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8722                         error(
8723                         "extent[%llu %u] is referred by other roots than %llu",
8724                                 key.objectid, nodesize, root->objectid);
8725                         err = BACKREF_MISMATCH;
8726                 }
8727         }
8728
8729         /*
8730          * Iterate the extent/metadata item to find the exact backref
8731          */
8732         item_size = btrfs_item_size_nr(leaf, slot);
8733         ptr = (unsigned long)iref;
8734         end = (unsigned long)ei + item_size;
8735         while (ptr < end) {
8736                 iref = (struct btrfs_extent_inline_ref *)ptr;
8737                 type = btrfs_extent_inline_ref_type(leaf, iref);
8738                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8739
8740                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8741                         (offset == root->objectid || offset == owner)) {
8742                         found_ref = 1;
8743                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8744                         /* Check if the backref points to valid referencer */
8745                         found_ref = !check_tree_block_ref(root, NULL, offset,
8746                                                           level + 1, owner);
8747                 }
8748
8749                 if (found_ref)
8750                         break;
8751                 ptr += btrfs_extent_inline_ref_size(type);
8752         }
8753
8754         /*
8755          * Inlined extent item doesn't have what we need, check
8756          * TREE_BLOCK_REF_KEY
8757          */
8758         if (!found_ref) {
8759                 btrfs_release_path(&path);
8760                 key.objectid = bytenr;
8761                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8762                 key.offset = root->objectid;
8763
8764                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8765                 if (!ret)
8766                         found_ref = 1;
8767         }
8768         if (!found_ref)
8769                 err |= BACKREF_MISSING;
8770 out:
8771         btrfs_release_path(&path);
8772         if (eb && (err & BACKREF_MISSING))
8773                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8774                         bytenr, nodesize, owner, level);
8775         return err;
8776 }
8777
8778 /*
8779  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8780  *
8781  * Return >0 any error found and output error message
8782  * Return 0 for no error found
8783  */
8784 static int check_extent_data_item(struct btrfs_root *root,
8785                                   struct extent_buffer *eb, int slot)
8786 {
8787         struct btrfs_file_extent_item *fi;
8788         struct btrfs_path path;
8789         struct btrfs_root *extent_root = root->fs_info->extent_root;
8790         struct btrfs_key fi_key;
8791         struct btrfs_key dbref_key;
8792         struct extent_buffer *leaf;
8793         struct btrfs_extent_item *ei;
8794         struct btrfs_extent_inline_ref *iref;
8795         struct btrfs_extent_data_ref *dref;
8796         u64 owner;
8797         u64 file_extent_gen;
8798         u64 disk_bytenr;
8799         u64 disk_num_bytes;
8800         u64 extent_num_bytes;
8801         u64 extent_flags;
8802         u64 extent_gen;
8803         u32 item_size;
8804         unsigned long end;
8805         unsigned long ptr;
8806         int type;
8807         u64 ref_root;
8808         int found_dbackref = 0;
8809         int err = 0;
8810         int ret;
8811
8812         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8813         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8814         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8815
8816         /* Nothing to check for hole and inline data extents */
8817         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8818             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8819                 return 0;
8820
8821         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8822         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8823         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8824
8825         /* Check unaligned disk_num_bytes and num_bytes */
8826         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8827                 error(
8828 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8829                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8830                         root->sectorsize);
8831                 err |= BYTES_UNALIGNED;
8832         } else {
8833                 data_bytes_allocated += disk_num_bytes;
8834         }
8835         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8836                 error(
8837 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8838                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8839                         root->sectorsize);
8840                 err |= BYTES_UNALIGNED;
8841         } else {
8842                 data_bytes_referenced += extent_num_bytes;
8843         }
8844         owner = btrfs_header_owner(eb);
8845
8846         /* Check the extent item of the file extent in extent tree */
8847         btrfs_init_path(&path);
8848         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8849         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8850         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8851
8852         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8853         if (ret) {
8854                 err |= BACKREF_MISSING;
8855                 goto error;
8856         }
8857
8858         leaf = path.nodes[0];
8859         slot = path.slots[0];
8860         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8861
8862         extent_flags = btrfs_extent_flags(leaf, ei);
8863         extent_gen = btrfs_extent_generation(leaf, ei);
8864
8865         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8866                 error(
8867                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8868                     disk_bytenr, disk_num_bytes,
8869                     BTRFS_EXTENT_FLAG_DATA);
8870                 err |= BACKREF_MISMATCH;
8871         }
8872
8873         if (file_extent_gen < extent_gen) {
8874                 error(
8875 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8876                         disk_bytenr, disk_num_bytes, file_extent_gen,
8877                         extent_gen);
8878                 err |= BACKREF_MISMATCH;
8879         }
8880
8881         /* Check data backref inside that extent item */
8882         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8883         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8884         ptr = (unsigned long)iref;
8885         end = (unsigned long)ei + item_size;
8886         while (ptr < end) {
8887                 iref = (struct btrfs_extent_inline_ref *)ptr;
8888                 type = btrfs_extent_inline_ref_type(leaf, iref);
8889                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8890
8891                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8892                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8893                         if (ref_root == owner || ref_root == root->objectid)
8894                                 found_dbackref = 1;
8895                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8896                         found_dbackref = !check_tree_block_ref(root, NULL,
8897                                 btrfs_extent_inline_ref_offset(leaf, iref),
8898                                 0, owner);
8899                 }
8900
8901                 if (found_dbackref)
8902                         break;
8903                 ptr += btrfs_extent_inline_ref_size(type);
8904         }
8905
8906         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8907         if (!found_dbackref) {
8908                 btrfs_release_path(&path);
8909
8910                 btrfs_init_path(&path);
8911                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8912                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8913                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8914                                 fi_key.objectid, fi_key.offset);
8915
8916                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8917                                         &dbref_key, &path, 0, 0);
8918                 if (!ret)
8919                         found_dbackref = 1;
8920         }
8921
8922         if (!found_dbackref)
8923                 err |= BACKREF_MISSING;
8924 error:
8925         btrfs_release_path(&path);
8926         if (err & BACKREF_MISSING) {
8927                 error("data extent[%llu %llu] backref lost",
8928                       disk_bytenr, disk_num_bytes);
8929         }
8930         return err;
8931 }
8932
8933 /*
8934  * Get real tree block level for the case like shared block
8935  * Return >= 0 as tree level
8936  * Return <0 for error
8937  */
8938 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8939 {
8940         struct extent_buffer *eb;
8941         struct btrfs_path path;
8942         struct btrfs_key key;
8943         struct btrfs_extent_item *ei;
8944         u64 flags;
8945         u64 transid;
8946         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8947         u8 backref_level;
8948         u8 header_level;
8949         int ret;
8950
8951         /* Search extent tree for extent generation and level */
8952         key.objectid = bytenr;
8953         key.type = BTRFS_METADATA_ITEM_KEY;
8954         key.offset = (u64)-1;
8955
8956         btrfs_init_path(&path);
8957         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8958         if (ret < 0)
8959                 goto release_out;
8960         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8961         if (ret < 0)
8962                 goto release_out;
8963         if (ret > 0) {
8964                 ret = -ENOENT;
8965                 goto release_out;
8966         }
8967
8968         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8969         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8970                             struct btrfs_extent_item);
8971         flags = btrfs_extent_flags(path.nodes[0], ei);
8972         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8973                 ret = -ENOENT;
8974                 goto release_out;
8975         }
8976
8977         /* Get transid for later read_tree_block() check */
8978         transid = btrfs_extent_generation(path.nodes[0], ei);
8979
8980         /* Get backref level as one source */
8981         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8982                 backref_level = key.offset;
8983         } else {
8984                 struct btrfs_tree_block_info *info;
8985
8986                 info = (struct btrfs_tree_block_info *)(ei + 1);
8987                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8988         }
8989         btrfs_release_path(&path);
8990
8991         /* Get level from tree block as an alternative source */
8992         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8993         if (!extent_buffer_uptodate(eb)) {
8994                 free_extent_buffer(eb);
8995                 return -EIO;
8996         }
8997         header_level = btrfs_header_level(eb);
8998         free_extent_buffer(eb);
8999
9000         if (header_level != backref_level)
9001                 return -EIO;
9002         return header_level;
9003
9004 release_out:
9005         btrfs_release_path(&path);
9006         return ret;
9007 }
9008
9009 /*
9010  * Check if a tree block backref is valid (points to a valid tree block)
9011  * if level == -1, level will be resolved
9012  * Return >0 for any error found and print error message
9013  */
9014 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9015                                     u64 bytenr, int level)
9016 {
9017         struct btrfs_root *root;
9018         struct btrfs_key key;
9019         struct btrfs_path path;
9020         struct extent_buffer *eb;
9021         struct extent_buffer *node;
9022         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9023         int err = 0;
9024         int ret;
9025
9026         /* Query level for level == -1 special case */
9027         if (level == -1)
9028                 level = query_tree_block_level(fs_info, bytenr);
9029         if (level < 0) {
9030                 err |= REFERENCER_MISSING;
9031                 goto out;
9032         }
9033
9034         key.objectid = root_id;
9035         key.type = BTRFS_ROOT_ITEM_KEY;
9036         key.offset = (u64)-1;
9037
9038         root = btrfs_read_fs_root(fs_info, &key);
9039         if (IS_ERR(root)) {
9040                 err |= REFERENCER_MISSING;
9041                 goto out;
9042         }
9043
9044         /* Read out the tree block to get item/node key */
9045         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9046         if (!extent_buffer_uptodate(eb)) {
9047                 err |= REFERENCER_MISSING;
9048                 free_extent_buffer(eb);
9049                 goto out;
9050         }
9051
9052         /* Empty tree, no need to check key */
9053         if (!btrfs_header_nritems(eb) && !level) {
9054                 free_extent_buffer(eb);
9055                 goto out;
9056         }
9057
9058         if (level)
9059                 btrfs_node_key_to_cpu(eb, &key, 0);
9060         else
9061                 btrfs_item_key_to_cpu(eb, &key, 0);
9062
9063         free_extent_buffer(eb);
9064
9065         btrfs_init_path(&path);
9066         /* Search with the first key, to ensure we can reach it */
9067         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9068         if (ret) {
9069                 err |= REFERENCER_MISSING;
9070                 goto release_out;
9071         }
9072
9073         node = path.nodes[level];
9074         if (btrfs_header_bytenr(node) != bytenr) {
9075                 error(
9076         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9077                         bytenr, nodesize, bytenr,
9078                         btrfs_header_bytenr(node));
9079                 err |= REFERENCER_MISMATCH;
9080         }
9081         if (btrfs_header_level(node) != level) {
9082                 error(
9083         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9084                         bytenr, nodesize, level,
9085                         btrfs_header_level(node));
9086                 err |= REFERENCER_MISMATCH;
9087         }
9088
9089 release_out:
9090         btrfs_release_path(&path);
9091 out:
9092         if (err & REFERENCER_MISSING) {
9093                 if (level < 0)
9094                         error("extent [%llu %d] lost referencer (owner: %llu)",
9095                                 bytenr, nodesize, root_id);
9096                 else
9097                         error(
9098                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9099                                 bytenr, nodesize, root_id, level);
9100         }
9101
9102         return err;
9103 }
9104
9105 /*
9106  * Check referencer for shared block backref
9107  * If level == -1, this function will resolve the level.
9108  */
9109 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9110                                      u64 parent, u64 bytenr, int level)
9111 {
9112         struct extent_buffer *eb;
9113         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9114         u32 nr;
9115         int found_parent = 0;
9116         int i;
9117
9118         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9119         if (!extent_buffer_uptodate(eb))
9120                 goto out;
9121
9122         if (level == -1)
9123                 level = query_tree_block_level(fs_info, bytenr);
9124         if (level < 0)
9125                 goto out;
9126
9127         if (level + 1 != btrfs_header_level(eb))
9128                 goto out;
9129
9130         nr = btrfs_header_nritems(eb);
9131         for (i = 0; i < nr; i++) {
9132                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9133                         found_parent = 1;
9134                         break;
9135                 }
9136         }
9137 out:
9138         free_extent_buffer(eb);
9139         if (!found_parent) {
9140                 error(
9141         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9142                         bytenr, nodesize, parent, level);
9143                 return REFERENCER_MISSING;
9144         }
9145         return 0;
9146 }
9147
9148 /*
9149  * Check referencer for normal (inlined) data ref
9150  * If len == 0, it will be resolved by searching in extent tree
9151  */
9152 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9153                                      u64 root_id, u64 objectid, u64 offset,
9154                                      u64 bytenr, u64 len, u32 count)
9155 {
9156         struct btrfs_root *root;
9157         struct btrfs_root *extent_root = fs_info->extent_root;
9158         struct btrfs_key key;
9159         struct btrfs_path path;
9160         struct extent_buffer *leaf;
9161         struct btrfs_file_extent_item *fi;
9162         u32 found_count = 0;
9163         int slot;
9164         int ret = 0;
9165
9166         if (!len) {
9167                 key.objectid = bytenr;
9168                 key.type = BTRFS_EXTENT_ITEM_KEY;
9169                 key.offset = (u64)-1;
9170
9171                 btrfs_init_path(&path);
9172                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9173                 if (ret < 0)
9174                         goto out;
9175                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9176                 if (ret)
9177                         goto out;
9178                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9179                 if (key.objectid != bytenr ||
9180                     key.type != BTRFS_EXTENT_ITEM_KEY)
9181                         goto out;
9182                 len = key.offset;
9183                 btrfs_release_path(&path);
9184         }
9185         key.objectid = root_id;
9186         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9187         key.offset = (u64)-1;
9188         btrfs_init_path(&path);
9189
9190         root = btrfs_read_fs_root(fs_info, &key);
9191         if (IS_ERR(root))
9192                 goto out;
9193
9194         key.objectid = objectid;
9195         key.type = BTRFS_EXTENT_DATA_KEY;
9196         /*
9197          * It can be nasty as data backref offset is
9198          * file offset - file extent offset, which is smaller or
9199          * equal to original backref offset.  The only special case is
9200          * overflow.  So we need to special check and do further search.
9201          */
9202         key.offset = offset & (1ULL << 63) ? 0 : offset;
9203
9204         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9205         if (ret < 0)
9206                 goto out;
9207
9208         /*
9209          * Search afterwards to get correct one
9210          * NOTE: As we must do a comprehensive check on the data backref to
9211          * make sure the dref count also matches, we must iterate all file
9212          * extents for that inode.
9213          */
9214         while (1) {
9215                 leaf = path.nodes[0];
9216                 slot = path.slots[0];
9217
9218                 btrfs_item_key_to_cpu(leaf, &key, slot);
9219                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9220                         break;
9221                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9222                 /*
9223                  * Except normal disk bytenr and disk num bytes, we still
9224                  * need to do extra check on dbackref offset as
9225                  * dbackref offset = file_offset - file_extent_offset
9226                  */
9227                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9228                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9229                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9230                     offset)
9231                         found_count++;
9232
9233                 ret = btrfs_next_item(root, &path);
9234                 if (ret)
9235                         break;
9236         }
9237 out:
9238         btrfs_release_path(&path);
9239         if (found_count != count) {
9240                 error(
9241 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9242                         bytenr, len, root_id, objectid, offset, count, found_count);
9243                 return REFERENCER_MISSING;
9244         }
9245         return 0;
9246 }
9247
9248 /*
9249  * Check if the referencer of a shared data backref exists
9250  */
9251 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9252                                      u64 parent, u64 bytenr)
9253 {
9254         struct extent_buffer *eb;
9255         struct btrfs_key key;
9256         struct btrfs_file_extent_item *fi;
9257         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9258         u32 nr;
9259         int found_parent = 0;
9260         int i;
9261
9262         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9263         if (!extent_buffer_uptodate(eb))
9264                 goto out;
9265
9266         nr = btrfs_header_nritems(eb);
9267         for (i = 0; i < nr; i++) {
9268                 btrfs_item_key_to_cpu(eb, &key, i);
9269                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9270                         continue;
9271
9272                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9273                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9274                         continue;
9275
9276                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9277                         found_parent = 1;
9278                         break;
9279                 }
9280         }
9281
9282 out:
9283         free_extent_buffer(eb);
9284         if (!found_parent) {
9285                 error("shared extent %llu referencer lost (parent: %llu)",
9286                         bytenr, parent);
9287                 return REFERENCER_MISSING;
9288         }
9289         return 0;
9290 }
9291
9292 /*
9293  * This function will check a given extent item, including its backref and
9294  * itself (like crossing stripe boundary and type)
9295  *
9296  * Since we don't use extent_record anymore, introduce new error bit
9297  */
9298 static int check_extent_item(struct btrfs_fs_info *fs_info,
9299                              struct extent_buffer *eb, int slot)
9300 {
9301         struct btrfs_extent_item *ei;
9302         struct btrfs_extent_inline_ref *iref;
9303         struct btrfs_extent_data_ref *dref;
9304         unsigned long end;
9305         unsigned long ptr;
9306         int type;
9307         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9308         u32 item_size = btrfs_item_size_nr(eb, slot);
9309         u64 flags;
9310         u64 offset;
9311         int metadata = 0;
9312         int level;
9313         struct btrfs_key key;
9314         int ret;
9315         int err = 0;
9316
9317         btrfs_item_key_to_cpu(eb, &key, slot);
9318         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9319                 bytes_used += key.offset;
9320         else
9321                 bytes_used += nodesize;
9322
9323         if (item_size < sizeof(*ei)) {
9324                 /*
9325                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9326                  * old thing when on disk format is still un-determined.
9327                  * No need to care about it anymore
9328                  */
9329                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9330                 return -ENOTTY;
9331         }
9332
9333         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9334         flags = btrfs_extent_flags(eb, ei);
9335
9336         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9337                 metadata = 1;
9338         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9339                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9340                       key.objectid, key.objectid + nodesize);
9341                 err |= CROSSING_STRIPE_BOUNDARY;
9342         }
9343
9344         ptr = (unsigned long)(ei + 1);
9345
9346         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9347                 /* Old EXTENT_ITEM metadata */
9348                 struct btrfs_tree_block_info *info;
9349
9350                 info = (struct btrfs_tree_block_info *)ptr;
9351                 level = btrfs_tree_block_level(eb, info);
9352                 ptr += sizeof(struct btrfs_tree_block_info);
9353         } else {
9354                 /* New METADATA_ITEM */
9355                 level = key.offset;
9356         }
9357         end = (unsigned long)ei + item_size;
9358
9359         if (ptr >= end) {
9360                 err |= ITEM_SIZE_MISMATCH;
9361                 goto out;
9362         }
9363
9364         /* Now check every backref in this extent item */
9365 next:
9366         iref = (struct btrfs_extent_inline_ref *)ptr;
9367         type = btrfs_extent_inline_ref_type(eb, iref);
9368         offset = btrfs_extent_inline_ref_offset(eb, iref);
9369         switch (type) {
9370         case BTRFS_TREE_BLOCK_REF_KEY:
9371                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9372                                                level);
9373                 err |= ret;
9374                 break;
9375         case BTRFS_SHARED_BLOCK_REF_KEY:
9376                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9377                                                  level);
9378                 err |= ret;
9379                 break;
9380         case BTRFS_EXTENT_DATA_REF_KEY:
9381                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9382                 ret = check_extent_data_backref(fs_info,
9383                                 btrfs_extent_data_ref_root(eb, dref),
9384                                 btrfs_extent_data_ref_objectid(eb, dref),
9385                                 btrfs_extent_data_ref_offset(eb, dref),
9386                                 key.objectid, key.offset,
9387                                 btrfs_extent_data_ref_count(eb, dref));
9388                 err |= ret;
9389                 break;
9390         case BTRFS_SHARED_DATA_REF_KEY:
9391                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9392                 err |= ret;
9393                 break;
9394         default:
9395                 error("extent[%llu %d %llu] has unknown ref type: %d",
9396                         key.objectid, key.type, key.offset, type);
9397                 err |= UNKNOWN_TYPE;
9398                 goto out;
9399         }
9400
9401         ptr += btrfs_extent_inline_ref_size(type);
9402         if (ptr < end)
9403                 goto next;
9404
9405 out:
9406         return err;
9407 }
9408
9409 /*
9410  * Check if a dev extent item is referred correctly by its chunk
9411  */
9412 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9413                                  struct extent_buffer *eb, int slot)
9414 {
9415         struct btrfs_root *chunk_root = fs_info->chunk_root;
9416         struct btrfs_dev_extent *ptr;
9417         struct btrfs_path path;
9418         struct btrfs_key chunk_key;
9419         struct btrfs_key devext_key;
9420         struct btrfs_chunk *chunk;
9421         struct extent_buffer *l;
9422         int num_stripes;
9423         u64 length;
9424         int i;
9425         int found_chunk = 0;
9426         int ret;
9427
9428         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9429         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9430         length = btrfs_dev_extent_length(eb, ptr);
9431
9432         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9433         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9434         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9435
9436         btrfs_init_path(&path);
9437         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9438         if (ret)
9439                 goto out;
9440
9441         l = path.nodes[0];
9442         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9443         if (btrfs_chunk_length(l, chunk) != length)
9444                 goto out;
9445
9446         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9447         for (i = 0; i < num_stripes; i++) {
9448                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9449                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9450
9451                 if (devid == devext_key.objectid &&
9452                     offset == devext_key.offset) {
9453                         found_chunk = 1;
9454                         break;
9455                 }
9456         }
9457 out:
9458         btrfs_release_path(&path);
9459         if (!found_chunk) {
9460                 error(
9461                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9462                         devext_key.objectid, devext_key.offset, length);
9463                 return REFERENCER_MISSING;
9464         }
9465         return 0;
9466 }
9467
9468 /*
9469  * Check if the used space is correct with the dev item
9470  */
9471 static int check_dev_item(struct btrfs_fs_info *fs_info,
9472                           struct extent_buffer *eb, int slot)
9473 {
9474         struct btrfs_root *dev_root = fs_info->dev_root;
9475         struct btrfs_dev_item *dev_item;
9476         struct btrfs_path path;
9477         struct btrfs_key key;
9478         struct btrfs_dev_extent *ptr;
9479         u64 dev_id;
9480         u64 used;
9481         u64 total = 0;
9482         int ret;
9483
9484         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9485         dev_id = btrfs_device_id(eb, dev_item);
9486         used = btrfs_device_bytes_used(eb, dev_item);
9487
9488         key.objectid = dev_id;
9489         key.type = BTRFS_DEV_EXTENT_KEY;
9490         key.offset = 0;
9491
9492         btrfs_init_path(&path);
9493         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9494         if (ret < 0) {
9495                 btrfs_item_key_to_cpu(eb, &key, slot);
9496                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9497                         key.objectid, key.type, key.offset);
9498                 btrfs_release_path(&path);
9499                 return REFERENCER_MISSING;
9500         }
9501
9502         /* Iterate dev_extents to calculate the used space of a device */
9503         while (1) {
9504                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9505
9506                 if (key.objectid > dev_id)
9507                         break;
9508                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9509                         goto next;
9510
9511                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9512                                      struct btrfs_dev_extent);
9513                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9514 next:
9515                 ret = btrfs_next_item(dev_root, &path);
9516                 if (ret)
9517                         break;
9518         }
9519         btrfs_release_path(&path);
9520
9521         if (used != total) {
9522                 btrfs_item_key_to_cpu(eb, &key, slot);
9523                 error(
9524 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9525                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9526                         BTRFS_DEV_EXTENT_KEY, dev_id);
9527                 return ACCOUNTING_MISMATCH;
9528         }
9529         return 0;
9530 }
9531
9532 /*
9533  * Check a block group item with its referener (chunk) and its used space
9534  * with extent/metadata item
9535  */
9536 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9537                                   struct extent_buffer *eb, int slot)
9538 {
9539         struct btrfs_root *extent_root = fs_info->extent_root;
9540         struct btrfs_root *chunk_root = fs_info->chunk_root;
9541         struct btrfs_block_group_item *bi;
9542         struct btrfs_block_group_item bg_item;
9543         struct btrfs_path path;
9544         struct btrfs_key bg_key;
9545         struct btrfs_key chunk_key;
9546         struct btrfs_key extent_key;
9547         struct btrfs_chunk *chunk;
9548         struct extent_buffer *leaf;
9549         struct btrfs_extent_item *ei;
9550         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9551         u64 flags;
9552         u64 bg_flags;
9553         u64 used;
9554         u64 total = 0;
9555         int ret;
9556         int err = 0;
9557
9558         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9559         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9560         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9561         used = btrfs_block_group_used(&bg_item);
9562         bg_flags = btrfs_block_group_flags(&bg_item);
9563
9564         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9565         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9566         chunk_key.offset = bg_key.objectid;
9567
9568         btrfs_init_path(&path);
9569         /* Search for the referencer chunk */
9570         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9571         if (ret) {
9572                 error(
9573                 "block group[%llu %llu] did not find the related chunk item",
9574                         bg_key.objectid, bg_key.offset);
9575                 err |= REFERENCER_MISSING;
9576         } else {
9577                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9578                                         struct btrfs_chunk);
9579                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9580                                                 bg_key.offset) {
9581                         error(
9582         "block group[%llu %llu] related chunk item length does not match",
9583                                 bg_key.objectid, bg_key.offset);
9584                         err |= REFERENCER_MISMATCH;
9585                 }
9586         }
9587         btrfs_release_path(&path);
9588
9589         /* Search from the block group bytenr */
9590         extent_key.objectid = bg_key.objectid;
9591         extent_key.type = 0;
9592         extent_key.offset = 0;
9593
9594         btrfs_init_path(&path);
9595         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9596         if (ret < 0)
9597                 goto out;
9598
9599         /* Iterate extent tree to account used space */
9600         while (1) {
9601                 leaf = path.nodes[0];
9602                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9603                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9604                         break;
9605
9606                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9607                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9608                         goto next;
9609                 if (extent_key.objectid < bg_key.objectid)
9610                         goto next;
9611
9612                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9613                         total += nodesize;
9614                 else
9615                         total += extent_key.offset;
9616
9617                 ei = btrfs_item_ptr(leaf, path.slots[0],
9618                                     struct btrfs_extent_item);
9619                 flags = btrfs_extent_flags(leaf, ei);
9620                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9621                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9622                                 error(
9623                         "bad extent[%llu, %llu) type mismatch with chunk",
9624                                         extent_key.objectid,
9625                                         extent_key.objectid + extent_key.offset);
9626                                 err |= CHUNK_TYPE_MISMATCH;
9627                         }
9628                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9629                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9630                                     BTRFS_BLOCK_GROUP_METADATA))) {
9631                                 error(
9632                         "bad extent[%llu, %llu) type mismatch with chunk",
9633                                         extent_key.objectid,
9634                                         extent_key.objectid + nodesize);
9635                                 err |= CHUNK_TYPE_MISMATCH;
9636                         }
9637                 }
9638 next:
9639                 ret = btrfs_next_item(extent_root, &path);
9640                 if (ret)
9641                         break;
9642         }
9643
9644 out:
9645         btrfs_release_path(&path);
9646
9647         if (total != used) {
9648                 error(
9649                 "block group[%llu %llu] used %llu but extent items used %llu",
9650                         bg_key.objectid, bg_key.offset, used, total);
9651                 err |= ACCOUNTING_MISMATCH;
9652         }
9653         return err;
9654 }
9655
9656 /*
9657  * Check a chunk item.
9658  * Including checking all referred dev_extents and block group
9659  */
9660 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9661                             struct extent_buffer *eb, int slot)
9662 {
9663         struct btrfs_root *extent_root = fs_info->extent_root;
9664         struct btrfs_root *dev_root = fs_info->dev_root;
9665         struct btrfs_path path;
9666         struct btrfs_key chunk_key;
9667         struct btrfs_key bg_key;
9668         struct btrfs_key devext_key;
9669         struct btrfs_chunk *chunk;
9670         struct extent_buffer *leaf;
9671         struct btrfs_block_group_item *bi;
9672         struct btrfs_block_group_item bg_item;
9673         struct btrfs_dev_extent *ptr;
9674         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9675         u64 length;
9676         u64 chunk_end;
9677         u64 type;
9678         u64 profile;
9679         int num_stripes;
9680         u64 offset;
9681         u64 objectid;
9682         int i;
9683         int ret;
9684         int err = 0;
9685
9686         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9687         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9688         length = btrfs_chunk_length(eb, chunk);
9689         chunk_end = chunk_key.offset + length;
9690         if (!IS_ALIGNED(length, sectorsize)) {
9691                 error("chunk[%llu %llu) not aligned to %u",
9692                         chunk_key.offset, chunk_end, sectorsize);
9693                 err |= BYTES_UNALIGNED;
9694                 goto out;
9695         }
9696
9697         type = btrfs_chunk_type(eb, chunk);
9698         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9699         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9700                 error("chunk[%llu %llu) has no chunk type",
9701                         chunk_key.offset, chunk_end);
9702                 err |= UNKNOWN_TYPE;
9703         }
9704         if (profile && (profile & (profile - 1))) {
9705                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9706                         chunk_key.offset, chunk_end, profile);
9707                 err |= UNKNOWN_TYPE;
9708         }
9709
9710         bg_key.objectid = chunk_key.offset;
9711         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9712         bg_key.offset = length;
9713
9714         btrfs_init_path(&path);
9715         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9716         if (ret) {
9717                 error(
9718                 "chunk[%llu %llu) did not find the related block group item",
9719                         chunk_key.offset, chunk_end);
9720                 err |= REFERENCER_MISSING;
9721         } else{
9722                 leaf = path.nodes[0];
9723                 bi = btrfs_item_ptr(leaf, path.slots[0],
9724                                     struct btrfs_block_group_item);
9725                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9726                                    sizeof(bg_item));
9727                 if (btrfs_block_group_flags(&bg_item) != type) {
9728                         error(
9729 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9730                                 chunk_key.offset, chunk_end, type,
9731                                 btrfs_block_group_flags(&bg_item));
9732                         err |= REFERENCER_MISSING;
9733                 }
9734         }
9735
9736         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9737         for (i = 0; i < num_stripes; i++) {
9738                 btrfs_release_path(&path);
9739                 btrfs_init_path(&path);
9740                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9741                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9742                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9743
9744                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9745                                         0, 0);
9746                 if (ret)
9747                         goto not_match_dev;
9748
9749                 leaf = path.nodes[0];
9750                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9751                                      struct btrfs_dev_extent);
9752                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9753                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9754                 if (objectid != chunk_key.objectid ||
9755                     offset != chunk_key.offset ||
9756                     btrfs_dev_extent_length(leaf, ptr) != length)
9757                         goto not_match_dev;
9758                 continue;
9759 not_match_dev:
9760                 err |= BACKREF_MISSING;
9761                 error(
9762                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9763                         chunk_key.objectid, chunk_end, i);
9764                 continue;
9765         }
9766         btrfs_release_path(&path);
9767 out:
9768         return err;
9769 }
9770
9771 /*
9772  * Main entry function to check known items and update related accounting info
9773  */
9774 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9775 {
9776         struct btrfs_fs_info *fs_info = root->fs_info;
9777         struct btrfs_key key;
9778         int slot = 0;
9779         int type;
9780         struct btrfs_extent_data_ref *dref;
9781         int ret;
9782         int err = 0;
9783
9784 next:
9785         btrfs_item_key_to_cpu(eb, &key, slot);
9786         type = btrfs_key_type(&key);
9787
9788         switch (type) {
9789         case BTRFS_EXTENT_DATA_KEY:
9790                 ret = check_extent_data_item(root, eb, slot);
9791                 err |= ret;
9792                 break;
9793         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9794                 ret = check_block_group_item(fs_info, eb, slot);
9795                 err |= ret;
9796                 break;
9797         case BTRFS_DEV_ITEM_KEY:
9798                 ret = check_dev_item(fs_info, eb, slot);
9799                 err |= ret;
9800                 break;
9801         case BTRFS_CHUNK_ITEM_KEY:
9802                 ret = check_chunk_item(fs_info, eb, slot);
9803                 err |= ret;
9804                 break;
9805         case BTRFS_DEV_EXTENT_KEY:
9806                 ret = check_dev_extent_item(fs_info, eb, slot);
9807                 err |= ret;
9808                 break;
9809         case BTRFS_EXTENT_ITEM_KEY:
9810         case BTRFS_METADATA_ITEM_KEY:
9811                 ret = check_extent_item(fs_info, eb, slot);
9812                 err |= ret;
9813                 break;
9814         case BTRFS_EXTENT_CSUM_KEY:
9815                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9816                 break;
9817         case BTRFS_TREE_BLOCK_REF_KEY:
9818                 ret = check_tree_block_backref(fs_info, key.offset,
9819                                                key.objectid, -1);
9820                 err |= ret;
9821                 break;
9822         case BTRFS_EXTENT_DATA_REF_KEY:
9823                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9824                 ret = check_extent_data_backref(fs_info,
9825                                 btrfs_extent_data_ref_root(eb, dref),
9826                                 btrfs_extent_data_ref_objectid(eb, dref),
9827                                 btrfs_extent_data_ref_offset(eb, dref),
9828                                 key.objectid, 0,
9829                                 btrfs_extent_data_ref_count(eb, dref));
9830                 err |= ret;
9831                 break;
9832         case BTRFS_SHARED_BLOCK_REF_KEY:
9833                 ret = check_shared_block_backref(fs_info, key.offset,
9834                                                  key.objectid, -1);
9835                 err |= ret;
9836                 break;
9837         case BTRFS_SHARED_DATA_REF_KEY:
9838                 ret = check_shared_data_backref(fs_info, key.offset,
9839                                                 key.objectid);
9840                 err |= ret;
9841                 break;
9842         default:
9843                 break;
9844         }
9845
9846         if (++slot < btrfs_header_nritems(eb))
9847                 goto next;
9848
9849         return err;
9850 }
9851
9852 /*
9853  * Helper function for later fs/subvol tree check.  To determine if a tree
9854  * block should be checked.
9855  * This function will ensure only the direct referencer with lowest rootid to
9856  * check a fs/subvolume tree block.
9857  *
9858  * Backref check at extent tree would detect errors like missing subvolume
9859  * tree, so we can do aggressive check to reduce duplicated checks.
9860  */
9861 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9862 {
9863         struct btrfs_root *extent_root = root->fs_info->extent_root;
9864         struct btrfs_key key;
9865         struct btrfs_path path;
9866         struct extent_buffer *leaf;
9867         int slot;
9868         struct btrfs_extent_item *ei;
9869         unsigned long ptr;
9870         unsigned long end;
9871         int type;
9872         u32 item_size;
9873         u64 offset;
9874         struct btrfs_extent_inline_ref *iref;
9875         int ret;
9876
9877         btrfs_init_path(&path);
9878         key.objectid = btrfs_header_bytenr(eb);
9879         key.type = BTRFS_METADATA_ITEM_KEY;
9880         key.offset = (u64)-1;
9881
9882         /*
9883          * Any failure in backref resolving means we can't determine
9884          * whom the tree block belongs to.
9885          * So in that case, we need to check that tree block
9886          */
9887         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9888         if (ret < 0)
9889                 goto need_check;
9890
9891         ret = btrfs_previous_extent_item(extent_root, &path,
9892                                          btrfs_header_bytenr(eb));
9893         if (ret)
9894                 goto need_check;
9895
9896         leaf = path.nodes[0];
9897         slot = path.slots[0];
9898         btrfs_item_key_to_cpu(leaf, &key, slot);
9899         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9900
9901         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9902                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9903         } else {
9904                 struct btrfs_tree_block_info *info;
9905
9906                 info = (struct btrfs_tree_block_info *)(ei + 1);
9907                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9908         }
9909
9910         item_size = btrfs_item_size_nr(leaf, slot);
9911         ptr = (unsigned long)iref;
9912         end = (unsigned long)ei + item_size;
9913         while (ptr < end) {
9914                 iref = (struct btrfs_extent_inline_ref *)ptr;
9915                 type = btrfs_extent_inline_ref_type(leaf, iref);
9916                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9917
9918                 /*
9919                  * We only check the tree block if current root is
9920                  * the lowest referencer of it.
9921                  */
9922                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9923                     offset < root->objectid) {
9924                         btrfs_release_path(&path);
9925                         return 0;
9926                 }
9927
9928                 ptr += btrfs_extent_inline_ref_size(type);
9929         }
9930         /*
9931          * Normally we should also check keyed tree block ref, but that may be
9932          * very time consuming.  Inlined ref should already make us skip a lot
9933          * of refs now.  So skip search keyed tree block ref.
9934          */
9935
9936 need_check:
9937         btrfs_release_path(&path);
9938         return 1;
9939 }
9940
9941 /*
9942  * Traversal function for tree block. We will do:
9943  * 1) Skip shared fs/subvolume tree blocks
9944  * 2) Update related bytes accounting
9945  * 3) Pre-order traversal
9946  */
9947 static int traverse_tree_block(struct btrfs_root *root,
9948                                 struct extent_buffer *node)
9949 {
9950         struct extent_buffer *eb;
9951         int level;
9952         u64 nr;
9953         int i;
9954         int err = 0;
9955         int ret;
9956
9957         /*
9958          * Skip shared fs/subvolume tree block, in that case they will
9959          * be checked by referencer with lowest rootid
9960          */
9961         if (is_fstree(root->objectid) && !should_check(root, node))
9962                 return 0;
9963
9964         /* Update bytes accounting */
9965         total_btree_bytes += node->len;
9966         if (fs_root_objectid(btrfs_header_owner(node)))
9967                 total_fs_tree_bytes += node->len;
9968         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9969                 total_extent_tree_bytes += node->len;
9970         if (!found_old_backref &&
9971             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9972             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9973             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9974                 found_old_backref = 1;
9975
9976         /* pre-order tranversal, check itself first */
9977         level = btrfs_header_level(node);
9978         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9979                                    btrfs_header_level(node),
9980                                    btrfs_header_owner(node));
9981         err |= ret;
9982         if (err)
9983                 error(
9984         "check %s failed root %llu bytenr %llu level %d, force continue check",
9985                         level ? "node":"leaf", root->objectid,
9986                         btrfs_header_bytenr(node), btrfs_header_level(node));
9987
9988         if (!level) {
9989                 btree_space_waste += btrfs_leaf_free_space(root, node);
9990                 ret = check_leaf_items(root, node);
9991                 err |= ret;
9992                 return err;
9993         }
9994
9995         nr = btrfs_header_nritems(node);
9996         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
9997                 sizeof(struct btrfs_key_ptr);
9998
9999         /* Then check all its children */
10000         for (i = 0; i < nr; i++) {
10001                 u64 blocknr = btrfs_node_blockptr(node, i);
10002
10003                 /*
10004                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10005                  * to call the function itself.
10006                  */
10007                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10008                 if (extent_buffer_uptodate(eb)) {
10009                         ret = traverse_tree_block(root, eb);
10010                         err |= ret;
10011                 }
10012                 free_extent_buffer(eb);
10013         }
10014
10015         return err;
10016 }
10017
10018 /*
10019  * Low memory usage version check_chunks_and_extents.
10020  */
10021 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10022 {
10023         struct btrfs_path path;
10024         struct btrfs_key key;
10025         struct btrfs_root *root1;
10026         struct btrfs_root *cur_root;
10027         int err = 0;
10028         int ret;
10029
10030         root1 = root->fs_info->chunk_root;
10031         ret = traverse_tree_block(root1, root1->node);
10032         err |= ret;
10033
10034         root1 = root->fs_info->tree_root;
10035         ret = traverse_tree_block(root1, root1->node);
10036         err |= ret;
10037
10038         btrfs_init_path(&path);
10039         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10040         key.offset = 0;
10041         key.type = BTRFS_ROOT_ITEM_KEY;
10042
10043         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10044         if (ret) {
10045                 error("cannot find extent treet in tree_root");
10046                 goto out;
10047         }
10048
10049         while (1) {
10050                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10051                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10052                         goto next;
10053                 key.offset = (u64)-1;
10054
10055                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10056                 if (IS_ERR(cur_root) || !cur_root) {
10057                         error("failed to read tree: %lld", key.objectid);
10058                         goto next;
10059                 }
10060
10061                 ret = traverse_tree_block(cur_root, cur_root->node);
10062                 err |= ret;
10063
10064 next:
10065                 ret = btrfs_next_item(root1, &path);
10066                 if (ret)
10067                         goto out;
10068         }
10069
10070 out:
10071         btrfs_release_path(&path);
10072         return err;
10073 }
10074
10075 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10076                            struct btrfs_root *root, int overwrite)
10077 {
10078         struct extent_buffer *c;
10079         struct extent_buffer *old = root->node;
10080         int level;
10081         int ret;
10082         struct btrfs_disk_key disk_key = {0,0,0};
10083
10084         level = 0;
10085
10086         if (overwrite) {
10087                 c = old;
10088                 extent_buffer_get(c);
10089                 goto init;
10090         }
10091         c = btrfs_alloc_free_block(trans, root,
10092                                    root->nodesize,
10093                                    root->root_key.objectid,
10094                                    &disk_key, level, 0, 0);
10095         if (IS_ERR(c)) {
10096                 c = old;
10097                 extent_buffer_get(c);
10098                 overwrite = 1;
10099         }
10100 init:
10101         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10102         btrfs_set_header_level(c, level);
10103         btrfs_set_header_bytenr(c, c->start);
10104         btrfs_set_header_generation(c, trans->transid);
10105         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10106         btrfs_set_header_owner(c, root->root_key.objectid);
10107
10108         write_extent_buffer(c, root->fs_info->fsid,
10109                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10110
10111         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10112                             btrfs_header_chunk_tree_uuid(c),
10113                             BTRFS_UUID_SIZE);
10114
10115         btrfs_mark_buffer_dirty(c);
10116         /*
10117          * this case can happen in the following case:
10118          *
10119          * 1.overwrite previous root.
10120          *
10121          * 2.reinit reloc data root, this is because we skip pin
10122          * down reloc data tree before which means we can allocate
10123          * same block bytenr here.
10124          */
10125         if (old->start == c->start) {
10126                 btrfs_set_root_generation(&root->root_item,
10127                                           trans->transid);
10128                 root->root_item.level = btrfs_header_level(root->node);
10129                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10130                                         &root->root_key, &root->root_item);
10131                 if (ret) {
10132                         free_extent_buffer(c);
10133                         return ret;
10134                 }
10135         }
10136         free_extent_buffer(old);
10137         root->node = c;
10138         add_root_to_dirty_list(root);
10139         return 0;
10140 }
10141
10142 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10143                                 struct extent_buffer *eb, int tree_root)
10144 {
10145         struct extent_buffer *tmp;
10146         struct btrfs_root_item *ri;
10147         struct btrfs_key key;
10148         u64 bytenr;
10149         u32 nodesize;
10150         int level = btrfs_header_level(eb);
10151         int nritems;
10152         int ret;
10153         int i;
10154
10155         /*
10156          * If we have pinned this block before, don't pin it again.
10157          * This can not only avoid forever loop with broken filesystem
10158          * but also give us some speedups.
10159          */
10160         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10161                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10162                 return 0;
10163
10164         btrfs_pin_extent(fs_info, eb->start, eb->len);
10165
10166         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10167         nritems = btrfs_header_nritems(eb);
10168         for (i = 0; i < nritems; i++) {
10169                 if (level == 0) {
10170                         btrfs_item_key_to_cpu(eb, &key, i);
10171                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10172                                 continue;
10173                         /* Skip the extent root and reloc roots */
10174                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10175                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10176                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10177                                 continue;
10178                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10179                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10180
10181                         /*
10182                          * If at any point we start needing the real root we
10183                          * will have to build a stump root for the root we are
10184                          * in, but for now this doesn't actually use the root so
10185                          * just pass in extent_root.
10186                          */
10187                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10188                                               nodesize, 0);
10189                         if (!extent_buffer_uptodate(tmp)) {
10190                                 fprintf(stderr, "Error reading root block\n");
10191                                 return -EIO;
10192                         }
10193                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10194                         free_extent_buffer(tmp);
10195                         if (ret)
10196                                 return ret;
10197                 } else {
10198                         bytenr = btrfs_node_blockptr(eb, i);
10199
10200                         /* If we aren't the tree root don't read the block */
10201                         if (level == 1 && !tree_root) {
10202                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10203                                 continue;
10204                         }
10205
10206                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10207                                               nodesize, 0);
10208                         if (!extent_buffer_uptodate(tmp)) {
10209                                 fprintf(stderr, "Error reading tree block\n");
10210                                 return -EIO;
10211                         }
10212                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10213                         free_extent_buffer(tmp);
10214                         if (ret)
10215                                 return ret;
10216                 }
10217         }
10218
10219         return 0;
10220 }
10221
10222 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10223 {
10224         int ret;
10225
10226         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10227         if (ret)
10228                 return ret;
10229
10230         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10231 }
10232
10233 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10234 {
10235         struct btrfs_block_group_cache *cache;
10236         struct btrfs_path *path;
10237         struct extent_buffer *leaf;
10238         struct btrfs_chunk *chunk;
10239         struct btrfs_key key;
10240         int ret;
10241         u64 start;
10242
10243         path = btrfs_alloc_path();
10244         if (!path)
10245                 return -ENOMEM;
10246
10247         key.objectid = 0;
10248         key.type = BTRFS_CHUNK_ITEM_KEY;
10249         key.offset = 0;
10250
10251         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10252         if (ret < 0) {
10253                 btrfs_free_path(path);
10254                 return ret;
10255         }
10256
10257         /*
10258          * We do this in case the block groups were screwed up and had alloc
10259          * bits that aren't actually set on the chunks.  This happens with
10260          * restored images every time and could happen in real life I guess.
10261          */
10262         fs_info->avail_data_alloc_bits = 0;
10263         fs_info->avail_metadata_alloc_bits = 0;
10264         fs_info->avail_system_alloc_bits = 0;
10265
10266         /* First we need to create the in-memory block groups */
10267         while (1) {
10268                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10269                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10270                         if (ret < 0) {
10271                                 btrfs_free_path(path);
10272                                 return ret;
10273                         }
10274                         if (ret) {
10275                                 ret = 0;
10276                                 break;
10277                         }
10278                 }
10279                 leaf = path->nodes[0];
10280                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10281                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10282                         path->slots[0]++;
10283                         continue;
10284                 }
10285
10286                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10287                                        struct btrfs_chunk);
10288                 btrfs_add_block_group(fs_info, 0,
10289                                       btrfs_chunk_type(leaf, chunk),
10290                                       key.objectid, key.offset,
10291                                       btrfs_chunk_length(leaf, chunk));
10292                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10293                                  key.offset + btrfs_chunk_length(leaf, chunk),
10294                                  GFP_NOFS);
10295                 path->slots[0]++;
10296         }
10297         start = 0;
10298         while (1) {
10299                 cache = btrfs_lookup_first_block_group(fs_info, start);
10300                 if (!cache)
10301                         break;
10302                 cache->cached = 1;
10303                 start = cache->key.objectid + cache->key.offset;
10304         }
10305
10306         btrfs_free_path(path);
10307         return 0;
10308 }
10309
10310 static int reset_balance(struct btrfs_trans_handle *trans,
10311                          struct btrfs_fs_info *fs_info)
10312 {
10313         struct btrfs_root *root = fs_info->tree_root;
10314         struct btrfs_path *path;
10315         struct extent_buffer *leaf;
10316         struct btrfs_key key;
10317         int del_slot, del_nr = 0;
10318         int ret;
10319         int found = 0;
10320
10321         path = btrfs_alloc_path();
10322         if (!path)
10323                 return -ENOMEM;
10324
10325         key.objectid = BTRFS_BALANCE_OBJECTID;
10326         key.type = BTRFS_BALANCE_ITEM_KEY;
10327         key.offset = 0;
10328
10329         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10330         if (ret) {
10331                 if (ret > 0)
10332                         ret = 0;
10333                 if (!ret)
10334                         goto reinit_data_reloc;
10335                 else
10336                         goto out;
10337         }
10338
10339         ret = btrfs_del_item(trans, root, path);
10340         if (ret)
10341                 goto out;
10342         btrfs_release_path(path);
10343
10344         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10345         key.type = BTRFS_ROOT_ITEM_KEY;
10346         key.offset = 0;
10347
10348         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10349         if (ret < 0)
10350                 goto out;
10351         while (1) {
10352                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10353                         if (!found)
10354                                 break;
10355
10356                         if (del_nr) {
10357                                 ret = btrfs_del_items(trans, root, path,
10358                                                       del_slot, del_nr);
10359                                 del_nr = 0;
10360                                 if (ret)
10361                                         goto out;
10362                         }
10363                         key.offset++;
10364                         btrfs_release_path(path);
10365
10366                         found = 0;
10367                         ret = btrfs_search_slot(trans, root, &key, path,
10368                                                 -1, 1);
10369                         if (ret < 0)
10370                                 goto out;
10371                         continue;
10372                 }
10373                 found = 1;
10374                 leaf = path->nodes[0];
10375                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10376                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10377                         break;
10378                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10379                         path->slots[0]++;
10380                         continue;
10381                 }
10382                 if (!del_nr) {
10383                         del_slot = path->slots[0];
10384                         del_nr = 1;
10385                 } else {
10386                         del_nr++;
10387                 }
10388                 path->slots[0]++;
10389         }
10390
10391         if (del_nr) {
10392                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10393                 if (ret)
10394                         goto out;
10395         }
10396         btrfs_release_path(path);
10397
10398 reinit_data_reloc:
10399         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10400         key.type = BTRFS_ROOT_ITEM_KEY;
10401         key.offset = (u64)-1;
10402         root = btrfs_read_fs_root(fs_info, &key);
10403         if (IS_ERR(root)) {
10404                 fprintf(stderr, "Error reading data reloc tree\n");
10405                 ret = PTR_ERR(root);
10406                 goto out;
10407         }
10408         record_root_in_trans(trans, root);
10409         ret = btrfs_fsck_reinit_root(trans, root, 0);
10410         if (ret)
10411                 goto out;
10412         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10413 out:
10414         btrfs_free_path(path);
10415         return ret;
10416 }
10417
10418 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10419                               struct btrfs_fs_info *fs_info)
10420 {
10421         u64 start = 0;
10422         int ret;
10423
10424         /*
10425          * The only reason we don't do this is because right now we're just
10426          * walking the trees we find and pinning down their bytes, we don't look
10427          * at any of the leaves.  In order to do mixed groups we'd have to check
10428          * the leaves of any fs roots and pin down the bytes for any file
10429          * extents we find.  Not hard but why do it if we don't have to?
10430          */
10431         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10432                 fprintf(stderr, "We don't support re-initing the extent tree "
10433                         "for mixed block groups yet, please notify a btrfs "
10434                         "developer you want to do this so they can add this "
10435                         "functionality.\n");
10436                 return -EINVAL;
10437         }
10438
10439         /*
10440          * first we need to walk all of the trees except the extent tree and pin
10441          * down the bytes that are in use so we don't overwrite any existing
10442          * metadata.
10443          */
10444         ret = pin_metadata_blocks(fs_info);
10445         if (ret) {
10446                 fprintf(stderr, "error pinning down used bytes\n");
10447                 return ret;
10448         }
10449
10450         /*
10451          * Need to drop all the block groups since we're going to recreate all
10452          * of them again.
10453          */
10454         btrfs_free_block_groups(fs_info);
10455         ret = reset_block_groups(fs_info);
10456         if (ret) {
10457                 fprintf(stderr, "error resetting the block groups\n");
10458                 return ret;
10459         }
10460
10461         /* Ok we can allocate now, reinit the extent root */
10462         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10463         if (ret) {
10464                 fprintf(stderr, "extent root initialization failed\n");
10465                 /*
10466                  * When the transaction code is updated we should end the
10467                  * transaction, but for now progs only knows about commit so
10468                  * just return an error.
10469                  */
10470                 return ret;
10471         }
10472
10473         /*
10474          * Now we have all the in-memory block groups setup so we can make
10475          * allocations properly, and the metadata we care about is safe since we
10476          * pinned all of it above.
10477          */
10478         while (1) {
10479                 struct btrfs_block_group_cache *cache;
10480
10481                 cache = btrfs_lookup_first_block_group(fs_info, start);
10482                 if (!cache)
10483                         break;
10484                 start = cache->key.objectid + cache->key.offset;
10485                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10486                                         &cache->key, &cache->item,
10487                                         sizeof(cache->item));
10488                 if (ret) {
10489                         fprintf(stderr, "Error adding block group\n");
10490                         return ret;
10491                 }
10492                 btrfs_extent_post_op(trans, fs_info->extent_root);
10493         }
10494
10495         ret = reset_balance(trans, fs_info);
10496         if (ret)
10497                 fprintf(stderr, "error resetting the pending balance\n");
10498
10499         return ret;
10500 }
10501
10502 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10503 {
10504         struct btrfs_path *path;
10505         struct btrfs_trans_handle *trans;
10506         struct btrfs_key key;
10507         int ret;
10508
10509         printf("Recowing metadata block %llu\n", eb->start);
10510         key.objectid = btrfs_header_owner(eb);
10511         key.type = BTRFS_ROOT_ITEM_KEY;
10512         key.offset = (u64)-1;
10513
10514         root = btrfs_read_fs_root(root->fs_info, &key);
10515         if (IS_ERR(root)) {
10516                 fprintf(stderr, "Couldn't find owner root %llu\n",
10517                         key.objectid);
10518                 return PTR_ERR(root);
10519         }
10520
10521         path = btrfs_alloc_path();
10522         if (!path)
10523                 return -ENOMEM;
10524
10525         trans = btrfs_start_transaction(root, 1);
10526         if (IS_ERR(trans)) {
10527                 btrfs_free_path(path);
10528                 return PTR_ERR(trans);
10529         }
10530
10531         path->lowest_level = btrfs_header_level(eb);
10532         if (path->lowest_level)
10533                 btrfs_node_key_to_cpu(eb, &key, 0);
10534         else
10535                 btrfs_item_key_to_cpu(eb, &key, 0);
10536
10537         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10538         btrfs_commit_transaction(trans, root);
10539         btrfs_free_path(path);
10540         return ret;
10541 }
10542
10543 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10544 {
10545         struct btrfs_path *path;
10546         struct btrfs_trans_handle *trans;
10547         struct btrfs_key key;
10548         int ret;
10549
10550         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10551                bad->key.type, bad->key.offset);
10552         key.objectid = bad->root_id;
10553         key.type = BTRFS_ROOT_ITEM_KEY;
10554         key.offset = (u64)-1;
10555
10556         root = btrfs_read_fs_root(root->fs_info, &key);
10557         if (IS_ERR(root)) {
10558                 fprintf(stderr, "Couldn't find owner root %llu\n",
10559                         key.objectid);
10560                 return PTR_ERR(root);
10561         }
10562
10563         path = btrfs_alloc_path();
10564         if (!path)
10565                 return -ENOMEM;
10566
10567         trans = btrfs_start_transaction(root, 1);
10568         if (IS_ERR(trans)) {
10569                 btrfs_free_path(path);
10570                 return PTR_ERR(trans);
10571         }
10572
10573         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10574         if (ret) {
10575                 if (ret > 0)
10576                         ret = 0;
10577                 goto out;
10578         }
10579         ret = btrfs_del_item(trans, root, path);
10580 out:
10581         btrfs_commit_transaction(trans, root);
10582         btrfs_free_path(path);
10583         return ret;
10584 }
10585
10586 static int zero_log_tree(struct btrfs_root *root)
10587 {
10588         struct btrfs_trans_handle *trans;
10589         int ret;
10590
10591         trans = btrfs_start_transaction(root, 1);
10592         if (IS_ERR(trans)) {
10593                 ret = PTR_ERR(trans);
10594                 return ret;
10595         }
10596         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10597         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10598         ret = btrfs_commit_transaction(trans, root);
10599         return ret;
10600 }
10601
10602 static int populate_csum(struct btrfs_trans_handle *trans,
10603                          struct btrfs_root *csum_root, char *buf, u64 start,
10604                          u64 len)
10605 {
10606         u64 offset = 0;
10607         u64 sectorsize;
10608         int ret = 0;
10609
10610         while (offset < len) {
10611                 sectorsize = csum_root->sectorsize;
10612                 ret = read_extent_data(csum_root, buf, start + offset,
10613                                        &sectorsize, 0);
10614                 if (ret)
10615                         break;
10616                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10617                                             start + offset, buf, sectorsize);
10618                 if (ret)
10619                         break;
10620                 offset += sectorsize;
10621         }
10622         return ret;
10623 }
10624
10625 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10626                                       struct btrfs_root *csum_root,
10627                                       struct btrfs_root *cur_root)
10628 {
10629         struct btrfs_path *path;
10630         struct btrfs_key key;
10631         struct extent_buffer *node;
10632         struct btrfs_file_extent_item *fi;
10633         char *buf = NULL;
10634         u64 start = 0;
10635         u64 len = 0;
10636         int slot = 0;
10637         int ret = 0;
10638
10639         path = btrfs_alloc_path();
10640         if (!path)
10641                 return -ENOMEM;
10642         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10643         if (!buf) {
10644                 ret = -ENOMEM;
10645                 goto out;
10646         }
10647
10648         key.objectid = 0;
10649         key.offset = 0;
10650         key.type = 0;
10651
10652         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10653         if (ret < 0)
10654                 goto out;
10655         /* Iterate all regular file extents and fill its csum */
10656         while (1) {
10657                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10658
10659                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10660                         goto next;
10661                 node = path->nodes[0];
10662                 slot = path->slots[0];
10663                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10664                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10665                         goto next;
10666                 start = btrfs_file_extent_disk_bytenr(node, fi);
10667                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10668
10669                 ret = populate_csum(trans, csum_root, buf, start, len);
10670                 if (ret == -EEXIST)
10671                         ret = 0;
10672                 if (ret < 0)
10673                         goto out;
10674 next:
10675                 /*
10676                  * TODO: if next leaf is corrupted, jump to nearest next valid
10677                  * leaf.
10678                  */
10679                 ret = btrfs_next_item(cur_root, path);
10680                 if (ret < 0)
10681                         goto out;
10682                 if (ret > 0) {
10683                         ret = 0;
10684                         goto out;
10685                 }
10686         }
10687
10688 out:
10689         btrfs_free_path(path);
10690         free(buf);
10691         return ret;
10692 }
10693
10694 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10695                                   struct btrfs_root *csum_root)
10696 {
10697         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10698         struct btrfs_path *path;
10699         struct btrfs_root *tree_root = fs_info->tree_root;
10700         struct btrfs_root *cur_root;
10701         struct extent_buffer *node;
10702         struct btrfs_key key;
10703         int slot = 0;
10704         int ret = 0;
10705
10706         path = btrfs_alloc_path();
10707         if (!path)
10708                 return -ENOMEM;
10709
10710         key.objectid = BTRFS_FS_TREE_OBJECTID;
10711         key.offset = 0;
10712         key.type = BTRFS_ROOT_ITEM_KEY;
10713
10714         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10715         if (ret < 0)
10716                 goto out;
10717         if (ret > 0) {
10718                 ret = -ENOENT;
10719                 goto out;
10720         }
10721
10722         while (1) {
10723                 node = path->nodes[0];
10724                 slot = path->slots[0];
10725                 btrfs_item_key_to_cpu(node, &key, slot);
10726                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10727                         goto out;
10728                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10729                         goto next;
10730                 if (!is_fstree(key.objectid))
10731                         goto next;
10732                 key.offset = (u64)-1;
10733
10734                 cur_root = btrfs_read_fs_root(fs_info, &key);
10735                 if (IS_ERR(cur_root) || !cur_root) {
10736                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10737                                 key.objectid);
10738                         goto out;
10739                 }
10740                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10741                                 cur_root);
10742                 if (ret < 0)
10743                         goto out;
10744 next:
10745                 ret = btrfs_next_item(tree_root, path);
10746                 if (ret > 0) {
10747                         ret = 0;
10748                         goto out;
10749                 }
10750                 if (ret < 0)
10751                         goto out;
10752         }
10753
10754 out:
10755         btrfs_free_path(path);
10756         return ret;
10757 }
10758
10759 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10760                                       struct btrfs_root *csum_root)
10761 {
10762         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10763         struct btrfs_path *path;
10764         struct btrfs_extent_item *ei;
10765         struct extent_buffer *leaf;
10766         char *buf;
10767         struct btrfs_key key;
10768         int ret;
10769
10770         path = btrfs_alloc_path();
10771         if (!path)
10772                 return -ENOMEM;
10773
10774         key.objectid = 0;
10775         key.type = BTRFS_EXTENT_ITEM_KEY;
10776         key.offset = 0;
10777
10778         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10779         if (ret < 0) {
10780                 btrfs_free_path(path);
10781                 return ret;
10782         }
10783
10784         buf = malloc(csum_root->sectorsize);
10785         if (!buf) {
10786                 btrfs_free_path(path);
10787                 return -ENOMEM;
10788         }
10789
10790         while (1) {
10791                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10792                         ret = btrfs_next_leaf(extent_root, path);
10793                         if (ret < 0)
10794                                 break;
10795                         if (ret) {
10796                                 ret = 0;
10797                                 break;
10798                         }
10799                 }
10800                 leaf = path->nodes[0];
10801
10802                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10803                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10804                         path->slots[0]++;
10805                         continue;
10806                 }
10807
10808                 ei = btrfs_item_ptr(leaf, path->slots[0],
10809                                     struct btrfs_extent_item);
10810                 if (!(btrfs_extent_flags(leaf, ei) &
10811                       BTRFS_EXTENT_FLAG_DATA)) {
10812                         path->slots[0]++;
10813                         continue;
10814                 }
10815
10816                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10817                                     key.offset);
10818                 if (ret)
10819                         break;
10820                 path->slots[0]++;
10821         }
10822
10823         btrfs_free_path(path);
10824         free(buf);
10825         return ret;
10826 }
10827
10828 /*
10829  * Recalculate the csum and put it into the csum tree.
10830  *
10831  * Extent tree init will wipe out all the extent info, so in that case, we
10832  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10833  * will use fs/subvol trees to init the csum tree.
10834  */
10835 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10836                           struct btrfs_root *csum_root,
10837                           int search_fs_tree)
10838 {
10839         if (search_fs_tree)
10840                 return fill_csum_tree_from_fs(trans, csum_root);
10841         else
10842                 return fill_csum_tree_from_extent(trans, csum_root);
10843 }
10844
10845 static void free_roots_info_cache(void)
10846 {
10847         if (!roots_info_cache)
10848                 return;
10849
10850         while (!cache_tree_empty(roots_info_cache)) {
10851                 struct cache_extent *entry;
10852                 struct root_item_info *rii;
10853
10854                 entry = first_cache_extent(roots_info_cache);
10855                 if (!entry)
10856                         break;
10857                 remove_cache_extent(roots_info_cache, entry);
10858                 rii = container_of(entry, struct root_item_info, cache_extent);
10859                 free(rii);
10860         }
10861
10862         free(roots_info_cache);
10863         roots_info_cache = NULL;
10864 }
10865
10866 static int build_roots_info_cache(struct btrfs_fs_info *info)
10867 {
10868         int ret = 0;
10869         struct btrfs_key key;
10870         struct extent_buffer *leaf;
10871         struct btrfs_path *path;
10872
10873         if (!roots_info_cache) {
10874                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10875                 if (!roots_info_cache)
10876                         return -ENOMEM;
10877                 cache_tree_init(roots_info_cache);
10878         }
10879
10880         path = btrfs_alloc_path();
10881         if (!path)
10882                 return -ENOMEM;
10883
10884         key.objectid = 0;
10885         key.type = BTRFS_EXTENT_ITEM_KEY;
10886         key.offset = 0;
10887
10888         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10889         if (ret < 0)
10890                 goto out;
10891         leaf = path->nodes[0];
10892
10893         while (1) {
10894                 struct btrfs_key found_key;
10895                 struct btrfs_extent_item *ei;
10896                 struct btrfs_extent_inline_ref *iref;
10897                 int slot = path->slots[0];
10898                 int type;
10899                 u64 flags;
10900                 u64 root_id;
10901                 u8 level;
10902                 struct cache_extent *entry;
10903                 struct root_item_info *rii;
10904
10905                 if (slot >= btrfs_header_nritems(leaf)) {
10906                         ret = btrfs_next_leaf(info->extent_root, path);
10907                         if (ret < 0) {
10908                                 break;
10909                         } else if (ret) {
10910                                 ret = 0;
10911                                 break;
10912                         }
10913                         leaf = path->nodes[0];
10914                         slot = path->slots[0];
10915                 }
10916
10917                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10918
10919                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10920                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10921                         goto next;
10922
10923                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10924                 flags = btrfs_extent_flags(leaf, ei);
10925
10926                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10927                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10928                         goto next;
10929
10930                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10931                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10932                         level = found_key.offset;
10933                 } else {
10934                         struct btrfs_tree_block_info *binfo;
10935
10936                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10937                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10938                         level = btrfs_tree_block_level(leaf, binfo);
10939                 }
10940
10941                 /*
10942                  * For a root extent, it must be of the following type and the
10943                  * first (and only one) iref in the item.
10944                  */
10945                 type = btrfs_extent_inline_ref_type(leaf, iref);
10946                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10947                         goto next;
10948
10949                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10950                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10951                 if (!entry) {
10952                         rii = malloc(sizeof(struct root_item_info));
10953                         if (!rii) {
10954                                 ret = -ENOMEM;
10955                                 goto out;
10956                         }
10957                         rii->cache_extent.start = root_id;
10958                         rii->cache_extent.size = 1;
10959                         rii->level = (u8)-1;
10960                         entry = &rii->cache_extent;
10961                         ret = insert_cache_extent(roots_info_cache, entry);
10962                         ASSERT(ret == 0);
10963                 } else {
10964                         rii = container_of(entry, struct root_item_info,
10965                                            cache_extent);
10966                 }
10967
10968                 ASSERT(rii->cache_extent.start == root_id);
10969                 ASSERT(rii->cache_extent.size == 1);
10970
10971                 if (level > rii->level || rii->level == (u8)-1) {
10972                         rii->level = level;
10973                         rii->bytenr = found_key.objectid;
10974                         rii->gen = btrfs_extent_generation(leaf, ei);
10975                         rii->node_count = 1;
10976                 } else if (level == rii->level) {
10977                         rii->node_count++;
10978                 }
10979 next:
10980                 path->slots[0]++;
10981         }
10982
10983 out:
10984         btrfs_free_path(path);
10985
10986         return ret;
10987 }
10988
10989 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10990                                   struct btrfs_path *path,
10991                                   const struct btrfs_key *root_key,
10992                                   const int read_only_mode)
10993 {
10994         const u64 root_id = root_key->objectid;
10995         struct cache_extent *entry;
10996         struct root_item_info *rii;
10997         struct btrfs_root_item ri;
10998         unsigned long offset;
10999
11000         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11001         if (!entry) {
11002                 fprintf(stderr,
11003                         "Error: could not find extent items for root %llu\n",
11004                         root_key->objectid);
11005                 return -ENOENT;
11006         }
11007
11008         rii = container_of(entry, struct root_item_info, cache_extent);
11009         ASSERT(rii->cache_extent.start == root_id);
11010         ASSERT(rii->cache_extent.size == 1);
11011
11012         if (rii->node_count != 1) {
11013                 fprintf(stderr,
11014                         "Error: could not find btree root extent for root %llu\n",
11015                         root_id);
11016                 return -ENOENT;
11017         }
11018
11019         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11020         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11021
11022         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11023             btrfs_root_level(&ri) != rii->level ||
11024             btrfs_root_generation(&ri) != rii->gen) {
11025
11026                 /*
11027                  * If we're in repair mode but our caller told us to not update
11028                  * the root item, i.e. just check if it needs to be updated, don't
11029                  * print this message, since the caller will call us again shortly
11030                  * for the same root item without read only mode (the caller will
11031                  * open a transaction first).
11032                  */
11033                 if (!(read_only_mode && repair))
11034                         fprintf(stderr,
11035                                 "%sroot item for root %llu,"
11036                                 " current bytenr %llu, current gen %llu, current level %u,"
11037                                 " new bytenr %llu, new gen %llu, new level %u\n",
11038                                 (read_only_mode ? "" : "fixing "),
11039                                 root_id,
11040                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11041                                 btrfs_root_level(&ri),
11042                                 rii->bytenr, rii->gen, rii->level);
11043
11044                 if (btrfs_root_generation(&ri) > rii->gen) {
11045                         fprintf(stderr,
11046                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11047                                 root_id, btrfs_root_generation(&ri), rii->gen);
11048                         return -EINVAL;
11049                 }
11050
11051                 if (!read_only_mode) {
11052                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11053                         btrfs_set_root_level(&ri, rii->level);
11054                         btrfs_set_root_generation(&ri, rii->gen);
11055                         write_extent_buffer(path->nodes[0], &ri,
11056                                             offset, sizeof(ri));
11057                 }
11058
11059                 return 1;
11060         }
11061
11062         return 0;
11063 }
11064
11065 /*
11066  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11067  * caused read-only snapshots to be corrupted if they were created at a moment
11068  * when the source subvolume/snapshot had orphan items. The issue was that the
11069  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11070  * node instead of the post orphan cleanup root node.
11071  * So this function, and its callees, just detects and fixes those cases. Even
11072  * though the regression was for read-only snapshots, this function applies to
11073  * any snapshot/subvolume root.
11074  * This must be run before any other repair code - not doing it so, makes other
11075  * repair code delete or modify backrefs in the extent tree for example, which
11076  * will result in an inconsistent fs after repairing the root items.
11077  */
11078 static int repair_root_items(struct btrfs_fs_info *info)
11079 {
11080         struct btrfs_path *path = NULL;
11081         struct btrfs_key key;
11082         struct extent_buffer *leaf;
11083         struct btrfs_trans_handle *trans = NULL;
11084         int ret = 0;
11085         int bad_roots = 0;
11086         int need_trans = 0;
11087
11088         ret = build_roots_info_cache(info);
11089         if (ret)
11090                 goto out;
11091
11092         path = btrfs_alloc_path();
11093         if (!path) {
11094                 ret = -ENOMEM;
11095                 goto out;
11096         }
11097
11098         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11099         key.type = BTRFS_ROOT_ITEM_KEY;
11100         key.offset = 0;
11101
11102 again:
11103         /*
11104          * Avoid opening and committing transactions if a leaf doesn't have
11105          * any root items that need to be fixed, so that we avoid rotating
11106          * backup roots unnecessarily.
11107          */
11108         if (need_trans) {
11109                 trans = btrfs_start_transaction(info->tree_root, 1);
11110                 if (IS_ERR(trans)) {
11111                         ret = PTR_ERR(trans);
11112                         goto out;
11113                 }
11114         }
11115
11116         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11117                                 0, trans ? 1 : 0);
11118         if (ret < 0)
11119                 goto out;
11120         leaf = path->nodes[0];
11121
11122         while (1) {
11123                 struct btrfs_key found_key;
11124
11125                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11126                         int no_more_keys = find_next_key(path, &key);
11127
11128                         btrfs_release_path(path);
11129                         if (trans) {
11130                                 ret = btrfs_commit_transaction(trans,
11131                                                                info->tree_root);
11132                                 trans = NULL;
11133                                 if (ret < 0)
11134                                         goto out;
11135                         }
11136                         need_trans = 0;
11137                         if (no_more_keys)
11138                                 break;
11139                         goto again;
11140                 }
11141
11142                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11143
11144                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11145                         goto next;
11146                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11147                         goto next;
11148
11149                 ret = maybe_repair_root_item(info, path, &found_key,
11150                                              trans ? 0 : 1);
11151                 if (ret < 0)
11152                         goto out;
11153                 if (ret) {
11154                         if (!trans && repair) {
11155                                 need_trans = 1;
11156                                 key = found_key;
11157                                 btrfs_release_path(path);
11158                                 goto again;
11159                         }
11160                         bad_roots++;
11161                 }
11162 next:
11163                 path->slots[0]++;
11164         }
11165         ret = 0;
11166 out:
11167         free_roots_info_cache();
11168         btrfs_free_path(path);
11169         if (trans)
11170                 btrfs_commit_transaction(trans, info->tree_root);
11171         if (ret < 0)
11172                 return ret;
11173
11174         return bad_roots;
11175 }
11176
11177 const char * const cmd_check_usage[] = {
11178         "btrfs check [options] <device>",
11179         "Check structural integrity of a filesystem (unmounted).",
11180         "Check structural integrity of an unmounted filesystem. Verify internal",
11181         "trees' consistency and item connectivity. In the repair mode try to",
11182         "fix the problems found. ",
11183         "WARNING: the repair mode is considered dangerous",
11184         "",
11185         "-s|--super <superblock>     use this superblock copy",
11186         "-b|--backup                 use the first valid backup root copy",
11187         "--repair                    try to repair the filesystem",
11188         "--readonly                  run in read-only mode (default)",
11189         "--init-csum-tree            create a new CRC tree",
11190         "--init-extent-tree          create a new extent tree",
11191         "--mode <MODE>               select mode, allows to make some memory/IO",
11192         "                            trade-offs, where MODE is one of:",
11193         "                            original - read inodes and extents to memory (requires",
11194         "                                       more memory, does less IO)",
11195         "                            lowmem   - try to use less memory but read blocks again",
11196         "                                       when needed",
11197         "--check-data-csum           verify checksums of data blocks",
11198         "-Q|--qgroup-report           print a report on qgroup consistency",
11199         "-E|--subvol-extents <subvolid>",
11200         "                            print subvolume extents and sharing state",
11201         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11202         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11203         "-p|--progress               indicate progress",
11204         NULL
11205 };
11206
11207 int cmd_check(int argc, char **argv)
11208 {
11209         struct cache_tree root_cache;
11210         struct btrfs_root *root;
11211         struct btrfs_fs_info *info;
11212         u64 bytenr = 0;
11213         u64 subvolid = 0;
11214         u64 tree_root_bytenr = 0;
11215         u64 chunk_root_bytenr = 0;
11216         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11217         int ret;
11218         u64 num;
11219         int init_csum_tree = 0;
11220         int readonly = 0;
11221         int qgroup_report = 0;
11222         int qgroups_repaired = 0;
11223         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11224
11225         while(1) {
11226                 int c;
11227                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11228                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11229                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11230                         GETOPT_VAL_MODE };
11231                 static const struct option long_options[] = {
11232                         { "super", required_argument, NULL, 's' },
11233                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11234                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11235                         { "init-csum-tree", no_argument, NULL,
11236                                 GETOPT_VAL_INIT_CSUM },
11237                         { "init-extent-tree", no_argument, NULL,
11238                                 GETOPT_VAL_INIT_EXTENT },
11239                         { "check-data-csum", no_argument, NULL,
11240                                 GETOPT_VAL_CHECK_CSUM },
11241                         { "backup", no_argument, NULL, 'b' },
11242                         { "subvol-extents", required_argument, NULL, 'E' },
11243                         { "qgroup-report", no_argument, NULL, 'Q' },
11244                         { "tree-root", required_argument, NULL, 'r' },
11245                         { "chunk-root", required_argument, NULL,
11246                                 GETOPT_VAL_CHUNK_TREE },
11247                         { "progress", no_argument, NULL, 'p' },
11248                         { "mode", required_argument, NULL,
11249                                 GETOPT_VAL_MODE },
11250                         { NULL, 0, NULL, 0}
11251                 };
11252
11253                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11254                 if (c < 0)
11255                         break;
11256                 switch(c) {
11257                         case 'a': /* ignored */ break;
11258                         case 'b':
11259                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11260                                 break;
11261                         case 's':
11262                                 num = arg_strtou64(optarg);
11263                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11264                                         fprintf(stderr,
11265                                                 "ERROR: super mirror should be less than: %d\n",
11266                                                 BTRFS_SUPER_MIRROR_MAX);
11267                                         exit(1);
11268                                 }
11269                                 bytenr = btrfs_sb_offset(((int)num));
11270                                 printf("using SB copy %llu, bytenr %llu\n", num,
11271                                        (unsigned long long)bytenr);
11272                                 break;
11273                         case 'Q':
11274                                 qgroup_report = 1;
11275                                 break;
11276                         case 'E':
11277                                 subvolid = arg_strtou64(optarg);
11278                                 break;
11279                         case 'r':
11280                                 tree_root_bytenr = arg_strtou64(optarg);
11281                                 break;
11282                         case GETOPT_VAL_CHUNK_TREE:
11283                                 chunk_root_bytenr = arg_strtou64(optarg);
11284                                 break;
11285                         case 'p':
11286                                 ctx.progress_enabled = true;
11287                                 break;
11288                         case '?':
11289                         case 'h':
11290                                 usage(cmd_check_usage);
11291                         case GETOPT_VAL_REPAIR:
11292                                 printf("enabling repair mode\n");
11293                                 repair = 1;
11294                                 ctree_flags |= OPEN_CTREE_WRITES;
11295                                 break;
11296                         case GETOPT_VAL_READONLY:
11297                                 readonly = 1;
11298                                 break;
11299                         case GETOPT_VAL_INIT_CSUM:
11300                                 printf("Creating a new CRC tree\n");
11301                                 init_csum_tree = 1;
11302                                 repair = 1;
11303                                 ctree_flags |= OPEN_CTREE_WRITES;
11304                                 break;
11305                         case GETOPT_VAL_INIT_EXTENT:
11306                                 init_extent_tree = 1;
11307                                 ctree_flags |= (OPEN_CTREE_WRITES |
11308                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11309                                 repair = 1;
11310                                 break;
11311                         case GETOPT_VAL_CHECK_CSUM:
11312                                 check_data_csum = 1;
11313                                 break;
11314                         case GETOPT_VAL_MODE:
11315                                 check_mode = parse_check_mode(optarg);
11316                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11317                                         error("unknown mode: %s", optarg);
11318                                         exit(1);
11319                                 }
11320                                 break;
11321                 }
11322         }
11323
11324         if (check_argc_exact(argc - optind, 1))
11325                 usage(cmd_check_usage);
11326
11327         if (ctx.progress_enabled) {
11328                 ctx.tp = TASK_NOTHING;
11329                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11330         }
11331
11332         /* This check is the only reason for --readonly to exist */
11333         if (readonly && repair) {
11334                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11335                 exit(1);
11336         }
11337
11338         /*
11339          * Not supported yet
11340          */
11341         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11342                 error("Low memory mode doesn't support repair yet");
11343                 exit(1);
11344         }
11345
11346         radix_tree_init();
11347         cache_tree_init(&root_cache);
11348
11349         if((ret = check_mounted(argv[optind])) < 0) {
11350                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11351                 goto err_out;
11352         } else if(ret) {
11353                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11354                 ret = -EBUSY;
11355                 goto err_out;
11356         }
11357
11358         /* only allow partial opening under repair mode */
11359         if (repair)
11360                 ctree_flags |= OPEN_CTREE_PARTIAL;
11361
11362         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11363                                   chunk_root_bytenr, ctree_flags);
11364         if (!info) {
11365                 fprintf(stderr, "Couldn't open file system\n");
11366                 ret = -EIO;
11367                 goto err_out;
11368         }
11369
11370         global_info = info;
11371         root = info->fs_root;
11372
11373         /*
11374          * repair mode will force us to commit transaction which
11375          * will make us fail to load log tree when mounting.
11376          */
11377         if (repair && btrfs_super_log_root(info->super_copy)) {
11378                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11379                 if (!ret) {
11380                         ret = 1;
11381                         goto close_out;
11382                 }
11383                 ret = zero_log_tree(root);
11384                 if (ret) {
11385                         fprintf(stderr, "fail to zero log tree\n");
11386                         goto close_out;
11387                 }
11388         }
11389
11390         uuid_unparse(info->super_copy->fsid, uuidbuf);
11391         if (qgroup_report) {
11392                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11393                        uuidbuf);
11394                 ret = qgroup_verify_all(info);
11395                 if (ret == 0)
11396                         report_qgroups(1);
11397                 goto close_out;
11398         }
11399         if (subvolid) {
11400                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11401                        subvolid, argv[optind], uuidbuf);
11402                 ret = print_extent_state(info, subvolid);
11403                 goto close_out;
11404         }
11405         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11406
11407         if (!extent_buffer_uptodate(info->tree_root->node) ||
11408             !extent_buffer_uptodate(info->dev_root->node) ||
11409             !extent_buffer_uptodate(info->chunk_root->node)) {
11410                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11411                 ret = -EIO;
11412                 goto close_out;
11413         }
11414
11415         if (init_extent_tree || init_csum_tree) {
11416                 struct btrfs_trans_handle *trans;
11417
11418                 trans = btrfs_start_transaction(info->extent_root, 0);
11419                 if (IS_ERR(trans)) {
11420                         fprintf(stderr, "Error starting transaction\n");
11421                         ret = PTR_ERR(trans);
11422                         goto close_out;
11423                 }
11424
11425                 if (init_extent_tree) {
11426                         printf("Creating a new extent tree\n");
11427                         ret = reinit_extent_tree(trans, info);
11428                         if (ret)
11429                                 goto close_out;
11430                 }
11431
11432                 if (init_csum_tree) {
11433                         fprintf(stderr, "Reinit crc root\n");
11434                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11435                         if (ret) {
11436                                 fprintf(stderr, "crc root initialization failed\n");
11437                                 ret = -EIO;
11438                                 goto close_out;
11439                         }
11440
11441                         ret = fill_csum_tree(trans, info->csum_root,
11442                                              init_extent_tree);
11443                         if (ret) {
11444                                 fprintf(stderr, "crc refilling failed\n");
11445                                 return -EIO;
11446                         }
11447                 }
11448                 /*
11449                  * Ok now we commit and run the normal fsck, which will add
11450                  * extent entries for all of the items it finds.
11451                  */
11452                 ret = btrfs_commit_transaction(trans, info->extent_root);
11453                 if (ret)
11454                         goto close_out;
11455         }
11456         if (!extent_buffer_uptodate(info->extent_root->node)) {
11457                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11458                 ret = -EIO;
11459                 goto close_out;
11460         }
11461         if (!extent_buffer_uptodate(info->csum_root->node)) {
11462                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11463                 ret = -EIO;
11464                 goto close_out;
11465         }
11466
11467         if (!ctx.progress_enabled)
11468                 fprintf(stderr, "checking extents\n");
11469         if (check_mode == CHECK_MODE_LOWMEM)
11470                 ret = check_chunks_and_extents_v2(root);
11471         else
11472                 ret = check_chunks_and_extents(root);
11473         if (ret)
11474                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11475
11476         ret = repair_root_items(info);
11477         if (ret < 0)
11478                 goto close_out;
11479         if (repair) {
11480                 fprintf(stderr, "Fixed %d roots.\n", ret);
11481                 ret = 0;
11482         } else if (ret > 0) {
11483                 fprintf(stderr,
11484                        "Found %d roots with an outdated root item.\n",
11485                        ret);
11486                 fprintf(stderr,
11487                         "Please run a filesystem check with the option --repair to fix them.\n");
11488                 ret = 1;
11489                 goto close_out;
11490         }
11491
11492         if (!ctx.progress_enabled) {
11493                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11494                         fprintf(stderr, "checking free space tree\n");
11495                 else
11496                         fprintf(stderr, "checking free space cache\n");
11497         }
11498         ret = check_space_cache(root);
11499         if (ret)
11500                 goto out;
11501
11502         /*
11503          * We used to have to have these hole extents in between our real
11504          * extents so if we don't have this flag set we need to make sure there
11505          * are no gaps in the file extents for inodes, otherwise we can just
11506          * ignore it when this happens.
11507          */
11508         no_holes = btrfs_fs_incompat(root->fs_info,
11509                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11510         if (!ctx.progress_enabled)
11511                 fprintf(stderr, "checking fs roots\n");
11512         ret = check_fs_roots(root, &root_cache);
11513         if (ret)
11514                 goto out;
11515
11516         fprintf(stderr, "checking csums\n");
11517         ret = check_csums(root);
11518         if (ret)
11519                 goto out;
11520
11521         fprintf(stderr, "checking root refs\n");
11522         ret = check_root_refs(root, &root_cache);
11523         if (ret)
11524                 goto out;
11525
11526         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11527                 struct extent_buffer *eb;
11528
11529                 eb = list_first_entry(&root->fs_info->recow_ebs,
11530                                       struct extent_buffer, recow);
11531                 list_del_init(&eb->recow);
11532                 ret = recow_extent_buffer(root, eb);
11533                 if (ret)
11534                         break;
11535         }
11536
11537         while (!list_empty(&delete_items)) {
11538                 struct bad_item *bad;
11539
11540                 bad = list_first_entry(&delete_items, struct bad_item, list);
11541                 list_del_init(&bad->list);
11542                 if (repair)
11543                         ret = delete_bad_item(root, bad);
11544                 free(bad);
11545         }
11546
11547         if (info->quota_enabled) {
11548                 int err;
11549                 fprintf(stderr, "checking quota groups\n");
11550                 err = qgroup_verify_all(info);
11551                 if (err)
11552                         goto out;
11553                 report_qgroups(0);
11554                 err = repair_qgroups(info, &qgroups_repaired);
11555                 if (err)
11556                         goto out;
11557         }
11558
11559         if (!list_empty(&root->fs_info->recow_ebs)) {
11560                 fprintf(stderr, "Transid errors in file system\n");
11561                 ret = 1;
11562         }
11563 out:
11564         /* Don't override original ret */
11565         if (!ret && qgroups_repaired)
11566                 ret = qgroups_repaired;
11567
11568         if (found_old_backref) { /*
11569                  * there was a disk format change when mixed
11570                  * backref was in testing tree. The old format
11571                  * existed about one week.
11572                  */
11573                 printf("\n * Found old mixed backref format. "
11574                        "The old format is not supported! *"
11575                        "\n * Please mount the FS in readonly mode, "
11576                        "backup data and re-format the FS. *\n\n");
11577                 ret = 1;
11578         }
11579         printf("found %llu bytes used err is %d\n",
11580                (unsigned long long)bytes_used, ret);
11581         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11582         printf("total tree bytes: %llu\n",
11583                (unsigned long long)total_btree_bytes);
11584         printf("total fs tree bytes: %llu\n",
11585                (unsigned long long)total_fs_tree_bytes);
11586         printf("total extent tree bytes: %llu\n",
11587                (unsigned long long)total_extent_tree_bytes);
11588         printf("btree space waste bytes: %llu\n",
11589                (unsigned long long)btree_space_waste);
11590         printf("file data blocks allocated: %llu\n referenced %llu\n",
11591                 (unsigned long long)data_bytes_allocated,
11592                 (unsigned long long)data_bytes_referenced);
11593
11594         free_qgroup_counts();
11595         free_root_recs_tree(&root_cache);
11596 close_out:
11597         close_ctree(root);
11598 err_out:
11599         if (ctx.progress_enabled)
11600                 task_deinit(ctx.info);
11601
11602         return ret;
11603 }