btrfs-progs: check: use on-stack path buffer in delete_dir_index
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         u8 filetype;
189         u8 ref_type;
190         int errors;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         struct rb_node *rb;
663         size_t size;
664         int ret;
665
666         rec = malloc(sizeof(*rec));
667         if (!rec)
668                 return ERR_PTR(-ENOMEM);
669         memcpy(rec, orig_rec, sizeof(*rec));
670         rec->refs = 1;
671         INIT_LIST_HEAD(&rec->backrefs);
672         INIT_LIST_HEAD(&rec->orphan_extents);
673         rec->holes = RB_ROOT;
674
675         list_for_each_entry(orig, &orig_rec->backrefs, list) {
676                 size = sizeof(*orig) + orig->namelen + 1;
677                 backref = malloc(size);
678                 if (!backref) {
679                         ret = -ENOMEM;
680                         goto cleanup;
681                 }
682                 memcpy(backref, orig, size);
683                 list_add_tail(&backref->list, &rec->backrefs);
684         }
685         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686                 dst_orphan = malloc(sizeof(*dst_orphan));
687                 if (!dst_orphan) {
688                         ret = -ENOMEM;
689                         goto cleanup;
690                 }
691                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693         }
694         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
695         if (ret < 0)
696                 goto cleanup_rb;
697
698         return rec;
699
700 cleanup_rb:
701         rb = rb_first(&rec->holes);
702         while (rb) {
703                 struct file_extent_hole *hole;
704
705                 hole = rb_entry(rb, struct file_extent_hole, node);
706                 rb = rb_next(rb);
707                 free(hole);
708         }
709
710 cleanup:
711         if (!list_empty(&rec->backrefs))
712                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713                         list_del(&orig->list);
714                         free(orig);
715                 }
716
717         if (!list_empty(&rec->orphan_extents))
718                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719                         list_del(&orig->list);
720                         free(orig);
721                 }
722
723         free(rec);
724
725         return ERR_PTR(ret);
726 }
727
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
729                                       u64 objectid)
730 {
731         struct orphan_data_extent *orphan;
732
733         if (list_empty(orphan_extents))
734                 return;
735         printf("The following data extent is lost in tree %llu:\n",
736                objectid);
737         list_for_each_entry(orphan, orphan_extents, list) {
738                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
740                        orphan->disk_len);
741         }
742 }
743
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
745 {
746         u64 root_objectid = root->root_key.objectid;
747         int errors = rec->errors;
748
749         if (!errors)
750                 return;
751         /* reloc root errors, we print its corresponding fs root objectid*/
752         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753                 root_objectid = root->root_key.offset;
754                 fprintf(stderr, "reloc");
755         }
756         fprintf(stderr, "root %llu inode %llu errors %x",
757                 (unsigned long long) root_objectid,
758                 (unsigned long long) rec->ino, rec->errors);
759
760         if (errors & I_ERR_NO_INODE_ITEM)
761                 fprintf(stderr, ", no inode item");
762         if (errors & I_ERR_NO_ORPHAN_ITEM)
763                 fprintf(stderr, ", no orphan item");
764         if (errors & I_ERR_DUP_INODE_ITEM)
765                 fprintf(stderr, ", dup inode item");
766         if (errors & I_ERR_DUP_DIR_INDEX)
767                 fprintf(stderr, ", dup dir index");
768         if (errors & I_ERR_ODD_DIR_ITEM)
769                 fprintf(stderr, ", odd dir item");
770         if (errors & I_ERR_ODD_FILE_EXTENT)
771                 fprintf(stderr, ", odd file extent");
772         if (errors & I_ERR_BAD_FILE_EXTENT)
773                 fprintf(stderr, ", bad file extent");
774         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775                 fprintf(stderr, ", file extent overlap");
776         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777                 fprintf(stderr, ", file extent discount");
778         if (errors & I_ERR_DIR_ISIZE_WRONG)
779                 fprintf(stderr, ", dir isize wrong");
780         if (errors & I_ERR_FILE_NBYTES_WRONG)
781                 fprintf(stderr, ", nbytes wrong");
782         if (errors & I_ERR_ODD_CSUM_ITEM)
783                 fprintf(stderr, ", odd csum item");
784         if (errors & I_ERR_SOME_CSUM_MISSING)
785                 fprintf(stderr, ", some csum missing");
786         if (errors & I_ERR_LINK_COUNT_WRONG)
787                 fprintf(stderr, ", link count wrong");
788         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789                 fprintf(stderr, ", orphan file extent");
790         fprintf(stderr, "\n");
791         /* Print the orphan extents if needed */
792         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
794
795         /* Print the holes if needed */
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797                 struct file_extent_hole *hole;
798                 struct rb_node *node;
799                 int found = 0;
800
801                 node = rb_first(&rec->holes);
802                 fprintf(stderr, "Found file extent holes:\n");
803                 while (node) {
804                         found = 1;
805                         hole = rb_entry(node, struct file_extent_hole, node);
806                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
807                                 hole->start, hole->len);
808                         node = rb_next(node);
809                 }
810                 if (!found)
811                         fprintf(stderr, "\tstart: 0, len: %llu\n",
812                                 round_up(rec->isize, root->sectorsize));
813         }
814 }
815
816 static void print_ref_error(int errors)
817 {
818         if (errors & REF_ERR_NO_DIR_ITEM)
819                 fprintf(stderr, ", no dir item");
820         if (errors & REF_ERR_NO_DIR_INDEX)
821                 fprintf(stderr, ", no dir index");
822         if (errors & REF_ERR_NO_INODE_REF)
823                 fprintf(stderr, ", no inode ref");
824         if (errors & REF_ERR_DUP_DIR_ITEM)
825                 fprintf(stderr, ", dup dir item");
826         if (errors & REF_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & REF_ERR_DUP_INODE_REF)
829                 fprintf(stderr, ", dup inode ref");
830         if (errors & REF_ERR_INDEX_UNMATCH)
831                 fprintf(stderr, ", index mismatch");
832         if (errors & REF_ERR_FILETYPE_UNMATCH)
833                 fprintf(stderr, ", filetype mismatch");
834         if (errors & REF_ERR_NAME_TOO_LONG)
835                 fprintf(stderr, ", name too long");
836         if (errors & REF_ERR_NO_ROOT_REF)
837                 fprintf(stderr, ", no root ref");
838         if (errors & REF_ERR_NO_ROOT_BACKREF)
839                 fprintf(stderr, ", no root backref");
840         if (errors & REF_ERR_DUP_ROOT_REF)
841                 fprintf(stderr, ", dup root ref");
842         if (errors & REF_ERR_DUP_ROOT_BACKREF)
843                 fprintf(stderr, ", dup root backref");
844         fprintf(stderr, "\n");
845 }
846
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
848                                           u64 ino, int mod)
849 {
850         struct ptr_node *node;
851         struct cache_extent *cache;
852         struct inode_record *rec = NULL;
853         int ret;
854
855         cache = lookup_cache_extent(inode_cache, ino, 1);
856         if (cache) {
857                 node = container_of(cache, struct ptr_node, cache);
858                 rec = node->data;
859                 if (mod && rec->refs > 1) {
860                         node->data = clone_inode_rec(rec);
861                         if (IS_ERR(node->data))
862                                 return node->data;
863                         rec->refs--;
864                         rec = node->data;
865                 }
866         } else if (mod) {
867                 rec = calloc(1, sizeof(*rec));
868                 if (!rec)
869                         return ERR_PTR(-ENOMEM);
870                 rec->ino = ino;
871                 rec->extent_start = (u64)-1;
872                 rec->refs = 1;
873                 INIT_LIST_HEAD(&rec->backrefs);
874                 INIT_LIST_HEAD(&rec->orphan_extents);
875                 rec->holes = RB_ROOT;
876
877                 node = malloc(sizeof(*node));
878                 if (!node) {
879                         free(rec);
880                         return ERR_PTR(-ENOMEM);
881                 }
882                 node->cache.start = ino;
883                 node->cache.size = 1;
884                 node->data = rec;
885
886                 if (ino == BTRFS_FREE_INO_OBJECTID)
887                         rec->found_link = 1;
888
889                 ret = insert_cache_extent(inode_cache, &node->cache);
890                 if (ret)
891                         return ERR_PTR(-EEXIST);
892         }
893         return rec;
894 }
895
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
897 {
898         struct orphan_data_extent *orphan;
899
900         while (!list_empty(orphan_extents)) {
901                 orphan = list_entry(orphan_extents->next,
902                                     struct orphan_data_extent, list);
903                 list_del(&orphan->list);
904                 free(orphan);
905         }
906 }
907
908 static void free_inode_rec(struct inode_record *rec)
909 {
910         struct inode_backref *backref;
911
912         if (--rec->refs > 0)
913                 return;
914
915         while (!list_empty(&rec->backrefs)) {
916                 backref = to_inode_backref(rec->backrefs.next);
917                 list_del(&backref->list);
918                 free(backref);
919         }
920         free_orphan_data_extents(&rec->orphan_extents);
921         free_file_extent_holes(&rec->holes);
922         free(rec);
923 }
924
925 static int can_free_inode_rec(struct inode_record *rec)
926 {
927         if (!rec->errors && rec->checked && rec->found_inode_item &&
928             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
929                 return 1;
930         return 0;
931 }
932
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934                                  struct inode_record *rec)
935 {
936         struct cache_extent *cache;
937         struct inode_backref *tmp, *backref;
938         struct ptr_node *node;
939         u8 filetype;
940
941         if (!rec->found_inode_item)
942                 return;
943
944         filetype = imode_to_type(rec->imode);
945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946                 if (backref->found_dir_item && backref->found_dir_index) {
947                         if (backref->filetype != filetype)
948                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949                         if (!backref->errors && backref->found_inode_ref &&
950                             rec->nlink == rec->found_link) {
951                                 list_del(&backref->list);
952                                 free(backref);
953                         }
954                 }
955         }
956
957         if (!rec->checked || rec->merging)
958                 return;
959
960         if (S_ISDIR(rec->imode)) {
961                 if (rec->found_size != rec->isize)
962                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963                 if (rec->found_file_extent)
964                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
965         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966                 if (rec->found_dir_item)
967                         rec->errors |= I_ERR_ODD_DIR_ITEM;
968                 if (rec->found_size != rec->nbytes)
969                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970                 if (rec->nlink > 0 && !no_holes &&
971                     (rec->extent_end < rec->isize ||
972                      first_extent_gap(&rec->holes) < rec->isize))
973                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
974         }
975
976         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977                 if (rec->found_csum_item && rec->nodatasum)
978                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
979                 if (rec->some_csum_missing && !rec->nodatasum)
980                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
981         }
982
983         BUG_ON(rec->refs != 1);
984         if (can_free_inode_rec(rec)) {
985                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986                 node = container_of(cache, struct ptr_node, cache);
987                 BUG_ON(node->data != rec);
988                 remove_cache_extent(inode_cache, &node->cache);
989                 free(node);
990                 free_inode_rec(rec);
991         }
992 }
993
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
995 {
996         struct btrfs_path path;
997         struct btrfs_key key;
998         int ret;
999
1000         key.objectid = BTRFS_ORPHAN_OBJECTID;
1001         key.type = BTRFS_ORPHAN_ITEM_KEY;
1002         key.offset = ino;
1003
1004         btrfs_init_path(&path);
1005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006         btrfs_release_path(&path);
1007         if (ret > 0)
1008                 ret = -ENOENT;
1009         return ret;
1010 }
1011
1012 static int process_inode_item(struct extent_buffer *eb,
1013                               int slot, struct btrfs_key *key,
1014                               struct shared_node *active_node)
1015 {
1016         struct inode_record *rec;
1017         struct btrfs_inode_item *item;
1018
1019         rec = active_node->current;
1020         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021         if (rec->found_inode_item) {
1022                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1023                 return 1;
1024         }
1025         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026         rec->nlink = btrfs_inode_nlink(eb, item);
1027         rec->isize = btrfs_inode_size(eb, item);
1028         rec->nbytes = btrfs_inode_nbytes(eb, item);
1029         rec->imode = btrfs_inode_mode(eb, item);
1030         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1031                 rec->nodatasum = 1;
1032         rec->found_inode_item = 1;
1033         if (rec->nlink == 0)
1034                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035         maybe_free_inode_rec(&active_node->inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1040                                                 const char *name,
1041                                                 int namelen, u64 dir)
1042 {
1043         struct inode_backref *backref;
1044
1045         list_for_each_entry(backref, &rec->backrefs, list) {
1046                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1047                         break;
1048                 if (backref->dir != dir || backref->namelen != namelen)
1049                         continue;
1050                 if (memcmp(name, backref->name, namelen))
1051                         continue;
1052                 return backref;
1053         }
1054
1055         backref = malloc(sizeof(*backref) + namelen + 1);
1056         if (!backref)
1057                 return NULL;
1058         memset(backref, 0, sizeof(*backref));
1059         backref->dir = dir;
1060         backref->namelen = namelen;
1061         memcpy(backref->name, name, namelen);
1062         backref->name[namelen] = '\0';
1063         list_add_tail(&backref->list, &rec->backrefs);
1064         return backref;
1065 }
1066
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068                              u64 ino, u64 dir, u64 index,
1069                              const char *name, int namelen,
1070                              u8 filetype, u8 itemtype, int errors)
1071 {
1072         struct inode_record *rec;
1073         struct inode_backref *backref;
1074
1075         rec = get_inode_rec(inode_cache, ino, 1);
1076         BUG_ON(IS_ERR(rec));
1077         backref = get_inode_backref(rec, name, namelen, dir);
1078         BUG_ON(!backref);
1079         if (errors)
1080                 backref->errors |= errors;
1081         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082                 if (backref->found_dir_index)
1083                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084                 if (backref->found_inode_ref && backref->index != index)
1085                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1086                 if (backref->found_dir_item && backref->filetype != filetype)
1087                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1088
1089                 backref->index = index;
1090                 backref->filetype = filetype;
1091                 backref->found_dir_index = 1;
1092         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1093                 rec->found_link++;
1094                 if (backref->found_dir_item)
1095                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096                 if (backref->found_dir_index && backref->filetype != filetype)
1097                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1098
1099                 backref->filetype = filetype;
1100                 backref->found_dir_item = 1;
1101         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103                 if (backref->found_inode_ref)
1104                         backref->errors |= REF_ERR_DUP_INODE_REF;
1105                 if (backref->found_dir_index && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 else
1108                         backref->index = index;
1109
1110                 backref->ref_type = itemtype;
1111                 backref->found_inode_ref = 1;
1112         } else {
1113                 BUG_ON(1);
1114         }
1115
1116         maybe_free_inode_rec(inode_cache, rec);
1117         return 0;
1118 }
1119
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121                             struct cache_tree *dst_cache)
1122 {
1123         struct inode_backref *backref;
1124         u32 dir_count = 0;
1125         int ret = 0;
1126
1127         dst->merging = 1;
1128         list_for_each_entry(backref, &src->backrefs, list) {
1129                 if (backref->found_dir_index) {
1130                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1131                                         backref->index, backref->name,
1132                                         backref->namelen, backref->filetype,
1133                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1134                 }
1135                 if (backref->found_dir_item) {
1136                         dir_count++;
1137                         add_inode_backref(dst_cache, dst->ino,
1138                                         backref->dir, 0, backref->name,
1139                                         backref->namelen, backref->filetype,
1140                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1141                 }
1142                 if (backref->found_inode_ref) {
1143                         add_inode_backref(dst_cache, dst->ino,
1144                                         backref->dir, backref->index,
1145                                         backref->name, backref->namelen, 0,
1146                                         backref->ref_type, backref->errors);
1147                 }
1148         }
1149
1150         if (src->found_dir_item)
1151                 dst->found_dir_item = 1;
1152         if (src->found_file_extent)
1153                 dst->found_file_extent = 1;
1154         if (src->found_csum_item)
1155                 dst->found_csum_item = 1;
1156         if (src->some_csum_missing)
1157                 dst->some_csum_missing = 1;
1158         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1160                 if (ret < 0)
1161                         return ret;
1162         }
1163
1164         BUG_ON(src->found_link < dir_count);
1165         dst->found_link += src->found_link - dir_count;
1166         dst->found_size += src->found_size;
1167         if (src->extent_start != (u64)-1) {
1168                 if (dst->extent_start == (u64)-1) {
1169                         dst->extent_start = src->extent_start;
1170                         dst->extent_end = src->extent_end;
1171                 } else {
1172                         if (dst->extent_end > src->extent_start)
1173                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174                         else if (dst->extent_end < src->extent_start) {
1175                                 ret = add_file_extent_hole(&dst->holes,
1176                                         dst->extent_end,
1177                                         src->extent_start - dst->extent_end);
1178                         }
1179                         if (dst->extent_end < src->extent_end)
1180                                 dst->extent_end = src->extent_end;
1181                 }
1182         }
1183
1184         dst->errors |= src->errors;
1185         if (src->found_inode_item) {
1186                 if (!dst->found_inode_item) {
1187                         dst->nlink = src->nlink;
1188                         dst->isize = src->isize;
1189                         dst->nbytes = src->nbytes;
1190                         dst->imode = src->imode;
1191                         dst->nodatasum = src->nodatasum;
1192                         dst->found_inode_item = 1;
1193                 } else {
1194                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1195                 }
1196         }
1197         dst->merging = 0;
1198
1199         return 0;
1200 }
1201
1202 static int splice_shared_node(struct shared_node *src_node,
1203                               struct shared_node *dst_node)
1204 {
1205         struct cache_extent *cache;
1206         struct ptr_node *node, *ins;
1207         struct cache_tree *src, *dst;
1208         struct inode_record *rec, *conflict;
1209         u64 current_ino = 0;
1210         int splice = 0;
1211         int ret;
1212
1213         if (--src_node->refs == 0)
1214                 splice = 1;
1215         if (src_node->current)
1216                 current_ino = src_node->current->ino;
1217
1218         src = &src_node->root_cache;
1219         dst = &dst_node->root_cache;
1220 again:
1221         cache = search_cache_extent(src, 0);
1222         while (cache) {
1223                 node = container_of(cache, struct ptr_node, cache);
1224                 rec = node->data;
1225                 cache = next_cache_extent(cache);
1226
1227                 if (splice) {
1228                         remove_cache_extent(src, &node->cache);
1229                         ins = node;
1230                 } else {
1231                         ins = malloc(sizeof(*ins));
1232                         BUG_ON(!ins);
1233                         ins->cache.start = node->cache.start;
1234                         ins->cache.size = node->cache.size;
1235                         ins->data = rec;
1236                         rec->refs++;
1237                 }
1238                 ret = insert_cache_extent(dst, &ins->cache);
1239                 if (ret == -EEXIST) {
1240                         conflict = get_inode_rec(dst, rec->ino, 1);
1241                         BUG_ON(IS_ERR(conflict));
1242                         merge_inode_recs(rec, conflict, dst);
1243                         if (rec->checked) {
1244                                 conflict->checked = 1;
1245                                 if (dst_node->current == conflict)
1246                                         dst_node->current = NULL;
1247                         }
1248                         maybe_free_inode_rec(dst, conflict);
1249                         free_inode_rec(rec);
1250                         free(ins);
1251                 } else {
1252                         BUG_ON(ret);
1253                 }
1254         }
1255
1256         if (src == &src_node->root_cache) {
1257                 src = &src_node->inode_cache;
1258                 dst = &dst_node->inode_cache;
1259                 goto again;
1260         }
1261
1262         if (current_ino > 0 && (!dst_node->current ||
1263             current_ino > dst_node->current->ino)) {
1264                 if (dst_node->current) {
1265                         dst_node->current->checked = 1;
1266                         maybe_free_inode_rec(dst, dst_node->current);
1267                 }
1268                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269                 BUG_ON(IS_ERR(dst_node->current));
1270         }
1271         return 0;
1272 }
1273
1274 static void free_inode_ptr(struct cache_extent *cache)
1275 {
1276         struct ptr_node *node;
1277         struct inode_record *rec;
1278
1279         node = container_of(cache, struct ptr_node, cache);
1280         rec = node->data;
1281         free_inode_rec(rec);
1282         free(node);
1283 }
1284
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1286
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1288                                             u64 bytenr)
1289 {
1290         struct cache_extent *cache;
1291         struct shared_node *node;
1292
1293         cache = lookup_cache_extent(shared, bytenr, 1);
1294         if (cache) {
1295                 node = container_of(cache, struct shared_node, cache);
1296                 return node;
1297         }
1298         return NULL;
1299 }
1300
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1302 {
1303         int ret;
1304         struct shared_node *node;
1305
1306         node = calloc(1, sizeof(*node));
1307         if (!node)
1308                 return -ENOMEM;
1309         node->cache.start = bytenr;
1310         node->cache.size = 1;
1311         cache_tree_init(&node->root_cache);
1312         cache_tree_init(&node->inode_cache);
1313         node->refs = refs;
1314
1315         ret = insert_cache_extent(shared, &node->cache);
1316
1317         return ret;
1318 }
1319
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321                              struct walk_control *wc, int level)
1322 {
1323         struct shared_node *node;
1324         struct shared_node *dest;
1325         int ret;
1326
1327         if (level == wc->active_node)
1328                 return 0;
1329
1330         BUG_ON(wc->active_node <= level);
1331         node = find_shared_node(&wc->shared, bytenr);
1332         if (!node) {
1333                 ret = add_shared_node(&wc->shared, bytenr, refs);
1334                 BUG_ON(ret);
1335                 node = find_shared_node(&wc->shared, bytenr);
1336                 wc->nodes[level] = node;
1337                 wc->active_node = level;
1338                 return 0;
1339         }
1340
1341         if (wc->root_level == wc->active_node &&
1342             btrfs_root_refs(&root->root_item) == 0) {
1343                 if (--node->refs == 0) {
1344                         free_inode_recs_tree(&node->root_cache);
1345                         free_inode_recs_tree(&node->inode_cache);
1346                         remove_cache_extent(&wc->shared, &node->cache);
1347                         free(node);
1348                 }
1349                 return 1;
1350         }
1351
1352         dest = wc->nodes[wc->active_node];
1353         splice_shared_node(node, dest);
1354         if (node->refs == 0) {
1355                 remove_cache_extent(&wc->shared, &node->cache);
1356                 free(node);
1357         }
1358         return 1;
1359 }
1360
1361 static int leave_shared_node(struct btrfs_root *root,
1362                              struct walk_control *wc, int level)
1363 {
1364         struct shared_node *node;
1365         struct shared_node *dest;
1366         int i;
1367
1368         if (level == wc->root_level)
1369                 return 0;
1370
1371         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1372                 if (wc->nodes[i])
1373                         break;
1374         }
1375         BUG_ON(i >= BTRFS_MAX_LEVEL);
1376
1377         node = wc->nodes[wc->active_node];
1378         wc->nodes[wc->active_node] = NULL;
1379         wc->active_node = i;
1380
1381         dest = wc->nodes[wc->active_node];
1382         if (wc->active_node < wc->root_level ||
1383             btrfs_root_refs(&root->root_item) > 0) {
1384                 BUG_ON(node->refs <= 1);
1385                 splice_shared_node(node, dest);
1386         } else {
1387                 BUG_ON(node->refs < 2);
1388                 node->refs--;
1389         }
1390         return 0;
1391 }
1392
1393 /*
1394  * Returns:
1395  * < 0 - on error
1396  * 1   - if the root with id child_root_id is a child of root parent_root_id
1397  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1398  *       has other root(s) as parent(s)
1399  * 2   - if the root child_root_id doesn't have any parent roots
1400  */
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1402                          u64 child_root_id)
1403 {
1404         struct btrfs_path path;
1405         struct btrfs_key key;
1406         struct extent_buffer *leaf;
1407         int has_parent = 0;
1408         int ret;
1409
1410         btrfs_init_path(&path);
1411
1412         key.objectid = parent_root_id;
1413         key.type = BTRFS_ROOT_REF_KEY;
1414         key.offset = child_root_id;
1415         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1416                                 0, 0);
1417         if (ret < 0)
1418                 return ret;
1419         btrfs_release_path(&path);
1420         if (!ret)
1421                 return 1;
1422
1423         key.objectid = child_root_id;
1424         key.type = BTRFS_ROOT_BACKREF_KEY;
1425         key.offset = 0;
1426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1427                                 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430
1431         while (1) {
1432                 leaf = path.nodes[0];
1433                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1435                         if (ret)
1436                                 break;
1437                         leaf = path.nodes[0];
1438                 }
1439
1440                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441                 if (key.objectid != child_root_id ||
1442                     key.type != BTRFS_ROOT_BACKREF_KEY)
1443                         break;
1444
1445                 has_parent = 1;
1446
1447                 if (key.offset == parent_root_id) {
1448                         btrfs_release_path(&path);
1449                         return 1;
1450                 }
1451
1452                 path.slots[0]++;
1453         }
1454 out:
1455         btrfs_release_path(&path);
1456         if (ret < 0)
1457                 return ret;
1458         return has_parent ? 0 : 2;
1459 }
1460
1461 static int process_dir_item(struct btrfs_root *root,
1462                             struct extent_buffer *eb,
1463                             int slot, struct btrfs_key *key,
1464                             struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u32 data_len;
1471         int error;
1472         int nritems = 0;
1473         u8 filetype;
1474         struct btrfs_dir_item *di;
1475         struct inode_record *rec;
1476         struct cache_tree *root_cache;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_key location;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         root_cache = &active_node->root_cache;
1482         inode_cache = &active_node->inode_cache;
1483         rec = active_node->current;
1484         rec->found_dir_item = 1;
1485
1486         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 nritems++;
1490                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491                 name_len = btrfs_dir_name_len(eb, di);
1492                 data_len = btrfs_dir_data_len(eb, di);
1493                 filetype = btrfs_dir_type(eb, di);
1494
1495                 rec->found_size += name_len;
1496                 if (name_len <= BTRFS_NAME_LEN) {
1497                         len = name_len;
1498                         error = 0;
1499                 } else {
1500                         len = BTRFS_NAME_LEN;
1501                         error = REF_ERR_NAME_TOO_LONG;
1502                 }
1503                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1504
1505                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506                         add_inode_backref(inode_cache, location.objectid,
1507                                           key->objectid, key->offset, namebuf,
1508                                           len, filetype, key->type, error);
1509                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510                         add_inode_backref(root_cache, location.objectid,
1511                                           key->objectid, key->offset,
1512                                           namebuf, len, filetype,
1513                                           key->type, error);
1514                 } else {
1515                         fprintf(stderr, "invalid location in dir item %u\n",
1516                                 location.type);
1517                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518                                           key->objectid, key->offset, namebuf,
1519                                           len, filetype, key->type, error);
1520                 }
1521
1522                 len = sizeof(*di) + name_len + data_len;
1523                 di = (struct btrfs_dir_item *)((char *)di + len);
1524                 cur += len;
1525         }
1526         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1528
1529         return 0;
1530 }
1531
1532 static int process_inode_ref(struct extent_buffer *eb,
1533                              int slot, struct btrfs_key *key,
1534                              struct shared_node *active_node)
1535 {
1536         u32 total;
1537         u32 cur = 0;
1538         u32 len;
1539         u32 name_len;
1540         u64 index;
1541         int error;
1542         struct cache_tree *inode_cache;
1543         struct btrfs_inode_ref *ref;
1544         char namebuf[BTRFS_NAME_LEN];
1545
1546         inode_cache = &active_node->inode_cache;
1547
1548         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549         total = btrfs_item_size_nr(eb, slot);
1550         while (cur < total) {
1551                 name_len = btrfs_inode_ref_name_len(eb, ref);
1552                 index = btrfs_inode_ref_index(eb, ref);
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561                 add_inode_backref(inode_cache, key->objectid, key->offset,
1562                                   index, namebuf, len, 0, key->type, error);
1563
1564                 len = sizeof(*ref) + name_len;
1565                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1566                 cur += len;
1567         }
1568         return 0;
1569 }
1570
1571 static int process_inode_extref(struct extent_buffer *eb,
1572                                 int slot, struct btrfs_key *key,
1573                                 struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u64 index;
1580         u64 parent;
1581         int error;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_inode_extref *extref;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         inode_cache = &active_node->inode_cache;
1587
1588         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589         total = btrfs_item_size_nr(eb, slot);
1590         while (cur < total) {
1591                 name_len = btrfs_inode_extref_name_len(eb, extref);
1592                 index = btrfs_inode_extref_index(eb, extref);
1593                 parent = btrfs_inode_extref_parent(eb, extref);
1594                 if (name_len <= BTRFS_NAME_LEN) {
1595                         len = name_len;
1596                         error = 0;
1597                 } else {
1598                         len = BTRFS_NAME_LEN;
1599                         error = REF_ERR_NAME_TOO_LONG;
1600                 }
1601                 read_extent_buffer(eb, namebuf,
1602                                    (unsigned long)(extref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, parent,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*extref) + name_len;
1607                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611
1612 }
1613
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615                             u64 len, u64 *found)
1616 {
1617         struct btrfs_key key;
1618         struct btrfs_path path;
1619         struct extent_buffer *leaf;
1620         int ret;
1621         size_t size;
1622         *found = 0;
1623         u64 csum_end;
1624         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1625
1626         btrfs_init_path(&path);
1627
1628         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1629         key.offset = start;
1630         key.type = BTRFS_EXTENT_CSUM_KEY;
1631
1632         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1633                                 &key, &path, 0, 0);
1634         if (ret < 0)
1635                 goto out;
1636         if (ret > 0 && path.slots[0] > 0) {
1637                 leaf = path.nodes[0];
1638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640                     key.type == BTRFS_EXTENT_CSUM_KEY)
1641                         path.slots[0]--;
1642         }
1643
1644         while (len > 0) {
1645                 leaf = path.nodes[0];
1646                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1648                         if (ret > 0)
1649                                 break;
1650                         else if (ret < 0)
1651                                 goto out;
1652                         leaf = path.nodes[0];
1653                 }
1654
1655                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657                     key.type != BTRFS_EXTENT_CSUM_KEY)
1658                         break;
1659
1660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661                 if (key.offset >= start + len)
1662                         break;
1663
1664                 if (key.offset > start)
1665                         start = key.offset;
1666
1667                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669                 if (csum_end > start) {
1670                         size = min(csum_end - start, len);
1671                         len -= size;
1672                         start += size;
1673                         *found += size;
1674                 }
1675
1676                 path.slots[0]++;
1677         }
1678 out:
1679         btrfs_release_path(&path);
1680         if (ret < 0)
1681                 return ret;
1682         return 0;
1683 }
1684
1685 static int process_file_extent(struct btrfs_root *root,
1686                                 struct extent_buffer *eb,
1687                                 int slot, struct btrfs_key *key,
1688                                 struct shared_node *active_node)
1689 {
1690         struct inode_record *rec;
1691         struct btrfs_file_extent_item *fi;
1692         u64 num_bytes = 0;
1693         u64 disk_bytenr = 0;
1694         u64 extent_offset = 0;
1695         u64 mask = root->sectorsize - 1;
1696         int extent_type;
1697         int ret;
1698
1699         rec = active_node->current;
1700         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701         rec->found_file_extent = 1;
1702
1703         if (rec->extent_start == (u64)-1) {
1704                 rec->extent_start = key->offset;
1705                 rec->extent_end = key->offset;
1706         }
1707
1708         if (rec->extent_end > key->offset)
1709                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710         else if (rec->extent_end < key->offset) {
1711                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712                                            key->offset - rec->extent_end);
1713                 if (ret < 0)
1714                         return ret;
1715         }
1716
1717         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718         extent_type = btrfs_file_extent_type(eb, fi);
1719
1720         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1722                 if (num_bytes == 0)
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 rec->found_size += num_bytes;
1725                 num_bytes = (num_bytes + mask) & ~mask;
1726         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730                 extent_offset = btrfs_file_extent_offset(eb, fi);
1731                 if (num_bytes == 0 || (num_bytes & mask))
1732                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733                 if (num_bytes + extent_offset >
1734                     btrfs_file_extent_ram_bytes(eb, fi))
1735                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737                     (btrfs_file_extent_compression(eb, fi) ||
1738                      btrfs_file_extent_encryption(eb, fi) ||
1739                      btrfs_file_extent_other_encoding(eb, fi)))
1740                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741                 if (disk_bytenr > 0)
1742                         rec->found_size += num_bytes;
1743         } else {
1744                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1745         }
1746         rec->extent_end = key->offset + num_bytes;
1747
1748         /*
1749          * The data reloc tree will copy full extents into its inode and then
1750          * copy the corresponding csums.  Because the extent it copied could be
1751          * a preallocated extent that hasn't been written to yet there may be no
1752          * csums to copy, ergo we won't have csums for our file extent.  This is
1753          * ok so just don't bother checking csums if the inode belongs to the
1754          * data reloc tree.
1755          */
1756         if (disk_bytenr > 0 &&
1757             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1758                 u64 found;
1759                 if (btrfs_file_extent_compression(eb, fi))
1760                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1761                 else
1762                         disk_bytenr += extent_offset;
1763
1764                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1765                 if (ret < 0)
1766                         return ret;
1767                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1768                         if (found > 0)
1769                                 rec->found_csum_item = 1;
1770                         if (found < num_bytes)
1771                                 rec->some_csum_missing = 1;
1772                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1773                         if (found > 0)
1774                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781                             struct walk_control *wc)
1782 {
1783         struct btrfs_key key;
1784         u32 nritems;
1785         int i;
1786         int ret = 0;
1787         struct cache_tree *inode_cache;
1788         struct shared_node *active_node;
1789
1790         if (wc->root_level == wc->active_node &&
1791             btrfs_root_refs(&root->root_item) == 0)
1792                 return 0;
1793
1794         active_node = wc->nodes[wc->active_node];
1795         inode_cache = &active_node->inode_cache;
1796         nritems = btrfs_header_nritems(eb);
1797         for (i = 0; i < nritems; i++) {
1798                 btrfs_item_key_to_cpu(eb, &key, i);
1799
1800                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1801                         continue;
1802                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1803                         continue;
1804
1805                 if (active_node->current == NULL ||
1806                     active_node->current->ino < key.objectid) {
1807                         if (active_node->current) {
1808                                 active_node->current->checked = 1;
1809                                 maybe_free_inode_rec(inode_cache,
1810                                                      active_node->current);
1811                         }
1812                         active_node->current = get_inode_rec(inode_cache,
1813                                                              key.objectid, 1);
1814                         BUG_ON(IS_ERR(active_node->current));
1815                 }
1816                 switch (key.type) {
1817                 case BTRFS_DIR_ITEM_KEY:
1818                 case BTRFS_DIR_INDEX_KEY:
1819                         ret = process_dir_item(root, eb, i, &key, active_node);
1820                         break;
1821                 case BTRFS_INODE_REF_KEY:
1822                         ret = process_inode_ref(eb, i, &key, active_node);
1823                         break;
1824                 case BTRFS_INODE_EXTREF_KEY:
1825                         ret = process_inode_extref(eb, i, &key, active_node);
1826                         break;
1827                 case BTRFS_INODE_ITEM_KEY:
1828                         ret = process_inode_item(eb, i, &key, active_node);
1829                         break;
1830                 case BTRFS_EXTENT_DATA_KEY:
1831                         ret = process_file_extent(root, eb, i, &key,
1832                                                   active_node);
1833                         break;
1834                 default:
1835                         break;
1836                 };
1837         }
1838         return ret;
1839 }
1840
1841 static void reada_walk_down(struct btrfs_root *root,
1842                             struct extent_buffer *node, int slot)
1843 {
1844         u64 bytenr;
1845         u64 ptr_gen;
1846         u32 nritems;
1847         u32 blocksize;
1848         int i;
1849         int level;
1850
1851         level = btrfs_header_level(node);
1852         if (level != 1)
1853                 return;
1854
1855         nritems = btrfs_header_nritems(node);
1856         blocksize = root->nodesize;
1857         for (i = slot; i < nritems; i++) {
1858                 bytenr = btrfs_node_blockptr(node, i);
1859                 ptr_gen = btrfs_node_ptr_generation(node, i);
1860                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1861         }
1862 }
1863
1864 /*
1865  * Check the child node/leaf by the following condition:
1866  * 1. the first item key of the node/leaf should be the same with the one
1867  *    in parent.
1868  * 2. block in parent node should match the child node/leaf.
1869  * 3. generation of parent node and child's header should be consistent.
1870  *
1871  * Or the child node/leaf pointed by the key in parent is not valid.
1872  *
1873  * We hope to check leaf owner too, but since subvol may share leaves,
1874  * which makes leaf owner check not so strong, key check should be
1875  * sufficient enough for that case.
1876  */
1877 static int check_child_node(struct btrfs_root *root,
1878                             struct extent_buffer *parent, int slot,
1879                             struct extent_buffer *child)
1880 {
1881         struct btrfs_key parent_key;
1882         struct btrfs_key child_key;
1883         int ret = 0;
1884
1885         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886         if (btrfs_header_level(child) == 0)
1887                 btrfs_item_key_to_cpu(child, &child_key, 0);
1888         else
1889                 btrfs_node_key_to_cpu(child, &child_key, 0);
1890
1891         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1892                 ret = -EINVAL;
1893                 fprintf(stderr,
1894                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895                         parent_key.objectid, parent_key.type, parent_key.offset,
1896                         child_key.objectid, child_key.type, child_key.offset);
1897         }
1898         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1899                 ret = -EINVAL;
1900                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901                         btrfs_node_blockptr(parent, slot),
1902                         btrfs_header_bytenr(child));
1903         }
1904         if (btrfs_node_ptr_generation(parent, slot) !=
1905             btrfs_header_generation(child)) {
1906                 ret = -EINVAL;
1907                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908                         btrfs_header_generation(child),
1909                         btrfs_node_ptr_generation(parent, slot));
1910         }
1911         return ret;
1912 }
1913
1914 struct node_refs {
1915         u64 bytenr[BTRFS_MAX_LEVEL];
1916         u64 refs[BTRFS_MAX_LEVEL];
1917 };
1918
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920                           struct walk_control *wc, int *level,
1921                           struct node_refs *nrefs)
1922 {
1923         enum btrfs_tree_block_status status;
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         struct extent_buffer *next;
1927         struct extent_buffer *cur;
1928         u32 blocksize;
1929         int ret, err = 0;
1930         u64 refs;
1931
1932         WARN_ON(*level < 0);
1933         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1934
1935         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936                 refs = nrefs->refs[*level];
1937                 ret = 0;
1938         } else {
1939                 ret = btrfs_lookup_extent_info(NULL, root,
1940                                        path->nodes[*level]->start,
1941                                        *level, 1, &refs, NULL);
1942                 if (ret < 0) {
1943                         err = ret;
1944                         goto out;
1945                 }
1946                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947                 nrefs->refs[*level] = refs;
1948         }
1949
1950         if (refs > 1) {
1951                 ret = enter_shared_node(root, path->nodes[*level]->start,
1952                                         refs, wc, *level);
1953                 if (ret > 0) {
1954                         err = ret;
1955                         goto out;
1956                 }
1957         }
1958
1959         while (*level >= 0) {
1960                 WARN_ON(*level < 0);
1961                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962                 cur = path->nodes[*level];
1963
1964                 if (btrfs_header_level(cur) != *level)
1965                         WARN_ON(1);
1966
1967                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1968                         break;
1969                 if (*level == 0) {
1970                         ret = process_one_leaf(root, cur, wc);
1971                         if (ret < 0)
1972                                 err = ret;
1973                         break;
1974                 }
1975                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977                 blocksize = root->nodesize;
1978
1979                 if (bytenr == nrefs->bytenr[*level - 1]) {
1980                         refs = nrefs->refs[*level - 1];
1981                 } else {
1982                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983                                         *level - 1, 1, &refs, NULL);
1984                         if (ret < 0) {
1985                                 refs = 0;
1986                         } else {
1987                                 nrefs->bytenr[*level - 1] = bytenr;
1988                                 nrefs->refs[*level - 1] = refs;
1989                         }
1990                 }
1991
1992                 if (refs > 1) {
1993                         ret = enter_shared_node(root, bytenr, refs,
1994                                                 wc, *level - 1);
1995                         if (ret > 0) {
1996                                 path->slots[*level]++;
1997                                 continue;
1998                         }
1999                 }
2000
2001                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003                         free_extent_buffer(next);
2004                         reada_walk_down(root, cur, path->slots[*level]);
2005                         next = read_tree_block(root, bytenr, blocksize,
2006                                                ptr_gen);
2007                         if (!extent_buffer_uptodate(next)) {
2008                                 struct btrfs_key node_key;
2009
2010                                 btrfs_node_key_to_cpu(path->nodes[*level],
2011                                                       &node_key,
2012                                                       path->slots[*level]);
2013                                 btrfs_add_corrupt_extent_record(root->fs_info,
2014                                                 &node_key,
2015                                                 path->nodes[*level]->start,
2016                                                 root->nodesize, *level);
2017                                 err = -EIO;
2018                                 goto out;
2019                         }
2020                 }
2021
2022                 ret = check_child_node(root, cur, path->slots[*level], next);
2023                 if (ret) {
2024                         err = ret;
2025                         goto out;
2026                 }
2027
2028                 if (btrfs_is_leaf(next))
2029                         status = btrfs_check_leaf(root, NULL, next);
2030                 else
2031                         status = btrfs_check_node(root, NULL, next);
2032                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033                         free_extent_buffer(next);
2034                         err = -EIO;
2035                         goto out;
2036                 }
2037
2038                 *level = *level - 1;
2039                 free_extent_buffer(path->nodes[*level]);
2040                 path->nodes[*level] = next;
2041                 path->slots[*level] = 0;
2042         }
2043 out:
2044         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2045         return err;
2046 }
2047
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049                         struct walk_control *wc, int *level)
2050 {
2051         int i;
2052         struct extent_buffer *leaf;
2053
2054         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055                 leaf = path->nodes[i];
2056                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2057                         path->slots[i]++;
2058                         *level = i;
2059                         return 0;
2060                 } else {
2061                         free_extent_buffer(path->nodes[*level]);
2062                         path->nodes[*level] = NULL;
2063                         BUG_ON(*level > wc->active_node);
2064                         if (*level == wc->active_node)
2065                                 leave_shared_node(root, wc, *level);
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 static int check_root_dir(struct inode_record *rec)
2073 {
2074         struct inode_backref *backref;
2075         int ret = -1;
2076
2077         if (!rec->found_inode_item || rec->errors)
2078                 goto out;
2079         if (rec->nlink != 1 || rec->found_link != 0)
2080                 goto out;
2081         if (list_empty(&rec->backrefs))
2082                 goto out;
2083         backref = to_inode_backref(rec->backrefs.next);
2084         if (!backref->found_inode_ref)
2085                 goto out;
2086         if (backref->index != 0 || backref->namelen != 2 ||
2087             memcmp(backref->name, "..", 2))
2088                 goto out;
2089         if (backref->found_dir_index || backref->found_dir_item)
2090                 goto out;
2091         ret = 0;
2092 out:
2093         return ret;
2094 }
2095
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097                               struct btrfs_root *root, struct btrfs_path *path,
2098                               struct inode_record *rec)
2099 {
2100         struct btrfs_inode_item *ei;
2101         struct btrfs_key key;
2102         int ret;
2103
2104         key.objectid = rec->ino;
2105         key.type = BTRFS_INODE_ITEM_KEY;
2106         key.offset = (u64)-1;
2107
2108         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2109         if (ret < 0)
2110                 goto out;
2111         if (ret) {
2112                 if (!path->slots[0]) {
2113                         ret = -ENOENT;
2114                         goto out;
2115                 }
2116                 path->slots[0]--;
2117                 ret = 0;
2118         }
2119         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120         if (key.objectid != rec->ino) {
2121                 ret = -ENOENT;
2122                 goto out;
2123         }
2124
2125         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126                             struct btrfs_inode_item);
2127         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128         btrfs_mark_buffer_dirty(path->nodes[0]);
2129         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131                root->root_key.objectid);
2132 out:
2133         btrfs_release_path(path);
2134         return ret;
2135 }
2136
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138                                     struct btrfs_root *root,
2139                                     struct btrfs_path *path,
2140                                     struct inode_record *rec)
2141 {
2142         int ret;
2143
2144         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145         btrfs_release_path(path);
2146         if (!ret)
2147                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2148         return ret;
2149 }
2150
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152                                struct btrfs_root *root,
2153                                struct btrfs_path *path,
2154                                struct inode_record *rec)
2155 {
2156         struct btrfs_inode_item *ei;
2157         struct btrfs_key key;
2158         int ret = 0;
2159
2160         key.objectid = rec->ino;
2161         key.type = BTRFS_INODE_ITEM_KEY;
2162         key.offset = 0;
2163
2164         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2165         if (ret) {
2166                 if (ret > 0)
2167                         ret = -ENOENT;
2168                 goto out;
2169         }
2170
2171         /* Since ret == 0, no need to check anything */
2172         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173                             struct btrfs_inode_item);
2174         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175         btrfs_mark_buffer_dirty(path->nodes[0]);
2176         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177         printf("reset nbytes for ino %llu root %llu\n",
2178                rec->ino, root->root_key.objectid);
2179 out:
2180         btrfs_release_path(path);
2181         return ret;
2182 }
2183
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185                                  struct cache_tree *inode_cache,
2186                                  struct inode_record *rec,
2187                                  struct inode_backref *backref)
2188 {
2189         struct btrfs_path path;
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_dir_item *dir_item;
2192         struct extent_buffer *leaf;
2193         struct btrfs_key key;
2194         struct btrfs_disk_key disk_key;
2195         struct inode_record *dir_rec;
2196         unsigned long name_ptr;
2197         u32 data_size = sizeof(*dir_item) + backref->namelen;
2198         int ret;
2199
2200         trans = btrfs_start_transaction(root, 1);
2201         if (IS_ERR(trans))
2202                 return PTR_ERR(trans);
2203
2204         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205                 (unsigned long long)rec->ino);
2206
2207         btrfs_init_path(&path);
2208         key.objectid = backref->dir;
2209         key.type = BTRFS_DIR_INDEX_KEY;
2210         key.offset = backref->index;
2211         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2212         BUG_ON(ret);
2213
2214         leaf = path.nodes[0];
2215         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2216
2217         disk_key.objectid = cpu_to_le64(rec->ino);
2218         disk_key.type = BTRFS_INODE_ITEM_KEY;
2219         disk_key.offset = 0;
2220
2221         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223         btrfs_set_dir_data_len(leaf, dir_item, 0);
2224         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225         name_ptr = (unsigned long)(dir_item + 1);
2226         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227         btrfs_mark_buffer_dirty(leaf);
2228         btrfs_release_path(&path);
2229         btrfs_commit_transaction(trans, root);
2230
2231         backref->found_dir_index = 1;
2232         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233         BUG_ON(IS_ERR(dir_rec));
2234         if (!dir_rec)
2235                 return 0;
2236         dir_rec->found_size += backref->namelen;
2237         if (dir_rec->found_size == dir_rec->isize &&
2238             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240         if (dir_rec->found_size != dir_rec->isize)
2241                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2242
2243         return 0;
2244 }
2245
2246 static int delete_dir_index(struct btrfs_root *root,
2247                             struct cache_tree *inode_cache,
2248                             struct inode_record *rec,
2249                             struct inode_backref *backref)
2250 {
2251         struct btrfs_trans_handle *trans;
2252         struct btrfs_dir_item *di;
2253         struct btrfs_path path;
2254         int ret = 0;
2255
2256         trans = btrfs_start_transaction(root, 1);
2257         if (IS_ERR(trans))
2258                 return PTR_ERR(trans);
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         btrfs_init_path(&path);
2266         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267                                     backref->name, backref->namelen,
2268                                     backref->index, -1);
2269         if (IS_ERR(di)) {
2270                 ret = PTR_ERR(di);
2271                 btrfs_release_path(&path);
2272                 btrfs_commit_transaction(trans, root);
2273                 if (ret == -ENOENT)
2274                         return 0;
2275                 return ret;
2276         }
2277
2278         if (!di)
2279                 ret = btrfs_del_item(trans, root, &path);
2280         else
2281                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2282         BUG_ON(ret);
2283         btrfs_release_path(&path);
2284         btrfs_commit_transaction(trans, root);
2285         return ret;
2286 }
2287
2288 static int create_inode_item(struct btrfs_root *root,
2289                              struct inode_record *rec,
2290                              struct inode_backref *backref, int root_dir)
2291 {
2292         struct btrfs_trans_handle *trans;
2293         struct btrfs_inode_item inode_item;
2294         time_t now = time(NULL);
2295         int ret;
2296
2297         trans = btrfs_start_transaction(root, 1);
2298         if (IS_ERR(trans)) {
2299                 ret = PTR_ERR(trans);
2300                 return ret;
2301         }
2302
2303         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304                 "be incomplete, please check permissions and content after "
2305                 "the fsck completes.\n", (unsigned long long)root->objectid,
2306                 (unsigned long long)rec->ino);
2307
2308         memset(&inode_item, 0, sizeof(inode_item));
2309         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2310         if (root_dir)
2311                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2312         else
2313                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315         if (rec->found_dir_item) {
2316                 if (rec->found_file_extent)
2317                         fprintf(stderr, "root %llu inode %llu has both a dir "
2318                                 "item and extents, unsure if it is a dir or a "
2319                                 "regular file so setting it as a directory\n",
2320                                 (unsigned long long)root->objectid,
2321                                 (unsigned long long)rec->ino);
2322                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324         } else if (!rec->found_dir_item) {
2325                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2327         }
2328         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2336
2337         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2338         BUG_ON(ret);
2339         btrfs_commit_transaction(trans, root);
2340         return 0;
2341 }
2342
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344                                  struct inode_record *rec,
2345                                  struct cache_tree *inode_cache,
2346                                  int delete)
2347 {
2348         struct inode_backref *tmp, *backref;
2349         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2350         int ret = 0;
2351         int repaired = 0;
2352
2353         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354                 if (!delete && rec->ino == root_dirid) {
2355                         if (!rec->found_inode_item) {
2356                                 ret = create_inode_item(root, rec, backref, 1);
2357                                 if (ret)
2358                                         break;
2359                                 repaired++;
2360                         }
2361                 }
2362
2363                 /* Index 0 for root dir's are special, don't mess with it */
2364                 if (rec->ino == root_dirid && backref->index == 0)
2365                         continue;
2366
2367                 if (delete &&
2368                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2369                      (backref->found_dir_index && backref->found_inode_ref &&
2370                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371                         ret = delete_dir_index(root, inode_cache, rec, backref);
2372                         if (ret)
2373                                 break;
2374                         repaired++;
2375                         list_del(&backref->list);
2376                         free(backref);
2377                 }
2378
2379                 if (!delete && !backref->found_dir_index &&
2380                     backref->found_dir_item && backref->found_inode_ref) {
2381                         ret = add_missing_dir_index(root, inode_cache, rec,
2382                                                     backref);
2383                         if (ret)
2384                                 break;
2385                         repaired++;
2386                         if (backref->found_dir_item &&
2387                             backref->found_dir_index &&
2388                             backref->found_dir_index) {
2389                                 if (!backref->errors &&
2390                                     backref->found_inode_ref) {
2391                                         list_del(&backref->list);
2392                                         free(backref);
2393                                 }
2394                         }
2395                 }
2396
2397                 if (!delete && (!backref->found_dir_index &&
2398                                 !backref->found_dir_item &&
2399                                 backref->found_inode_ref)) {
2400                         struct btrfs_trans_handle *trans;
2401                         struct btrfs_key location;
2402
2403                         ret = check_dir_conflict(root, backref->name,
2404                                                  backref->namelen,
2405                                                  backref->dir,
2406                                                  backref->index);
2407                         if (ret) {
2408                                 /*
2409                                  * let nlink fixing routine to handle it,
2410                                  * which can do it better.
2411                                  */
2412                                 ret = 0;
2413                                 break;
2414                         }
2415                         location.objectid = rec->ino;
2416                         location.type = BTRFS_INODE_ITEM_KEY;
2417                         location.offset = 0;
2418
2419                         trans = btrfs_start_transaction(root, 1);
2420                         if (IS_ERR(trans)) {
2421                                 ret = PTR_ERR(trans);
2422                                 break;
2423                         }
2424                         fprintf(stderr, "adding missing dir index/item pair "
2425                                 "for inode %llu\n",
2426                                 (unsigned long long)rec->ino);
2427                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2428                                                     backref->namelen,
2429                                                     backref->dir, &location,
2430                                                     imode_to_type(rec->imode),
2431                                                     backref->index);
2432                         BUG_ON(ret);
2433                         btrfs_commit_transaction(trans, root);
2434                         repaired++;
2435                 }
2436
2437                 if (!delete && (backref->found_inode_ref &&
2438                                 backref->found_dir_index &&
2439                                 backref->found_dir_item &&
2440                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441                                 !rec->found_inode_item)) {
2442                         ret = create_inode_item(root, rec, backref, 0);
2443                         if (ret)
2444                                 break;
2445                         repaired++;
2446                 }
2447
2448         }
2449         return ret ? ret : repaired;
2450 }
2451
2452 /*
2453  * To determine the file type for nlink/inode_item repair
2454  *
2455  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456  * Return -ENOENT if file type is not found.
2457  */
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2459 {
2460         struct inode_backref *backref;
2461
2462         /* For inode item recovered case */
2463         if (rec->found_inode_item) {
2464                 *type = imode_to_type(rec->imode);
2465                 return 0;
2466         }
2467
2468         list_for_each_entry(backref, &rec->backrefs, list) {
2469                 if (backref->found_dir_index || backref->found_dir_item) {
2470                         *type = backref->filetype;
2471                         return 0;
2472                 }
2473         }
2474         return -ENOENT;
2475 }
2476
2477 /*
2478  * To determine the file name for nlink repair
2479  *
2480  * Return 0 if file name is found, set name and namelen.
2481  * Return -ENOENT if file name is not found.
2482  */
2483 static int find_file_name(struct inode_record *rec,
2484                           char *name, int *namelen)
2485 {
2486         struct inode_backref *backref;
2487
2488         list_for_each_entry(backref, &rec->backrefs, list) {
2489                 if (backref->found_dir_index || backref->found_dir_item ||
2490                     backref->found_inode_ref) {
2491                         memcpy(name, backref->name, backref->namelen);
2492                         *namelen = backref->namelen;
2493                         return 0;
2494                 }
2495         }
2496         return -ENOENT;
2497 }
2498
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501                        struct btrfs_root *root,
2502                        struct btrfs_path *path,
2503                        struct inode_record *rec)
2504 {
2505         struct inode_backref *backref;
2506         struct inode_backref *tmp;
2507         struct btrfs_key key;
2508         struct btrfs_inode_item *inode_item;
2509         int ret = 0;
2510
2511         /* We don't believe this either, reset it and iterate backref */
2512         rec->found_link = 0;
2513
2514         /* Remove all backref including the valid ones */
2515         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517                                    backref->index, backref->name,
2518                                    backref->namelen, 0);
2519                 if (ret < 0)
2520                         goto out;
2521
2522                 /* remove invalid backref, so it won't be added back */
2523                 if (!(backref->found_dir_index &&
2524                       backref->found_dir_item &&
2525                       backref->found_inode_ref)) {
2526                         list_del(&backref->list);
2527                         free(backref);
2528                 } else {
2529                         rec->found_link++;
2530                 }
2531         }
2532
2533         /* Set nlink to 0 */
2534         key.objectid = rec->ino;
2535         key.type = BTRFS_INODE_ITEM_KEY;
2536         key.offset = 0;
2537         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2538         if (ret < 0)
2539                 goto out;
2540         if (ret > 0) {
2541                 ret = -ENOENT;
2542                 goto out;
2543         }
2544         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545                                     struct btrfs_inode_item);
2546         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547         btrfs_mark_buffer_dirty(path->nodes[0]);
2548         btrfs_release_path(path);
2549
2550         /*
2551          * Add back valid inode_ref/dir_item/dir_index,
2552          * add_link() will handle the nlink inc, so new nlink must be correct
2553          */
2554         list_for_each_entry(backref, &rec->backrefs, list) {
2555                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556                                      backref->name, backref->namelen,
2557                                      backref->filetype, &backref->index, 1);
2558                 if (ret < 0)
2559                         goto out;
2560         }
2561 out:
2562         btrfs_release_path(path);
2563         return ret;
2564 }
2565
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567                                struct btrfs_root *root,
2568                                struct btrfs_path *path,
2569                                struct inode_record *rec)
2570 {
2571         char *dir_name = "lost+found";
2572         char namebuf[BTRFS_NAME_LEN] = {0};
2573         u64 lost_found_ino;
2574         u32 mode = 0700;
2575         u8 type = 0;
2576         int namelen = 0;
2577         int name_recovered = 0;
2578         int type_recovered = 0;
2579         int ret = 0;
2580
2581         /*
2582          * Get file name and type first before these invalid inode ref
2583          * are deleted by remove_all_invalid_backref()
2584          */
2585         name_recovered = !find_file_name(rec, namebuf, &namelen);
2586         type_recovered = !find_file_type(rec, &type);
2587
2588         if (!name_recovered) {
2589                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590                        rec->ino, rec->ino);
2591                 namelen = count_digits(rec->ino);
2592                 sprintf(namebuf, "%llu", rec->ino);
2593                 name_recovered = 1;
2594         }
2595         if (!type_recovered) {
2596                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2597                        rec->ino);
2598                 type = BTRFS_FT_REG_FILE;
2599                 type_recovered = 1;
2600         }
2601
2602         ret = reset_nlink(trans, root, path, rec);
2603         if (ret < 0) {
2604                 fprintf(stderr,
2605                         "Failed to reset nlink for inode %llu: %s\n",
2606                         rec->ino, strerror(-ret));
2607                 goto out;
2608         }
2609
2610         if (rec->found_link == 0) {
2611                 lost_found_ino = root->highest_inode;
2612                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2613                         ret = -EOVERFLOW;
2614                         goto out;
2615                 }
2616                 lost_found_ino++;
2617                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2619                                   mode);
2620                 if (ret < 0) {
2621                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622                                 dir_name, strerror(-ret));
2623                         goto out;
2624                 }
2625                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626                                      namebuf, namelen, type, NULL, 1);
2627                 /*
2628                  * Add ".INO" suffix several times to handle case where
2629                  * "FILENAME.INO" is already taken by another file.
2630                  */
2631                 while (ret == -EEXIST) {
2632                         /*
2633                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2634                          */
2635                         if (namelen + count_digits(rec->ino) + 1 >
2636                             BTRFS_NAME_LEN) {
2637                                 ret = -EFBIG;
2638                                 goto out;
2639                         }
2640                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2641                                  ".%llu", rec->ino);
2642                         namelen += count_digits(rec->ino) + 1;
2643                         ret = btrfs_add_link(trans, root, rec->ino,
2644                                              lost_found_ino, namebuf,
2645                                              namelen, type, NULL, 1);
2646                 }
2647                 if (ret < 0) {
2648                         fprintf(stderr,
2649                                 "Failed to link the inode %llu to %s dir: %s\n",
2650                                 rec->ino, dir_name, strerror(-ret));
2651                         goto out;
2652                 }
2653                 /*
2654                  * Just increase the found_link, don't actually add the
2655                  * backref. This will make things easier and this inode
2656                  * record will be freed after the repair is done.
2657                  * So fsck will not report problem about this inode.
2658                  */
2659                 rec->found_link++;
2660                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661                        namelen, namebuf, dir_name);
2662         }
2663         printf("Fixed the nlink of inode %llu\n", rec->ino);
2664 out:
2665         /*
2666          * Clear the flag anyway, or we will loop forever for the same inode
2667          * as it will not be removed from the bad inode list and the dead loop
2668          * happens.
2669          */
2670         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671         btrfs_release_path(path);
2672         return ret;
2673 }
2674
2675 /*
2676  * Check if there is any normal(reg or prealloc) file extent for given
2677  * ino.
2678  * This is used to determine the file type when neither its dir_index/item or
2679  * inode_item exists.
2680  *
2681  * This will *NOT* report error, if any error happens, just consider it does
2682  * not have any normal file extent.
2683  */
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2685 {
2686         struct btrfs_path *path;
2687         struct btrfs_key key;
2688         struct btrfs_key found_key;
2689         struct btrfs_file_extent_item *fi;
2690         u8 type;
2691         int ret = 0;
2692
2693         path = btrfs_alloc_path();
2694         if (!path)
2695                 goto out;
2696         key.objectid = ino;
2697         key.type = BTRFS_EXTENT_DATA_KEY;
2698         key.offset = 0;
2699
2700         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2701         if (ret < 0) {
2702                 ret = 0;
2703                 goto out;
2704         }
2705         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2706                 ret = btrfs_next_leaf(root, path);
2707                 if (ret) {
2708                         ret = 0;
2709                         goto out;
2710                 }
2711         }
2712         while (1) {
2713                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2714                                       path->slots[0]);
2715                 if (found_key.objectid != ino ||
2716                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2717                         break;
2718                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2719                                     struct btrfs_file_extent_item);
2720                 type = btrfs_file_extent_type(path->nodes[0], fi);
2721                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2722                         ret = 1;
2723                         goto out;
2724                 }
2725         }
2726 out:
2727         btrfs_free_path(path);
2728         return ret;
2729 }
2730
2731 static u32 btrfs_type_to_imode(u8 type)
2732 {
2733         static u32 imode_by_btrfs_type[] = {
2734                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2735                 [BTRFS_FT_DIR]          = S_IFDIR,
2736                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2737                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2738                 [BTRFS_FT_FIFO]         = S_IFIFO,
2739                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2740                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2741         };
2742
2743         return imode_by_btrfs_type[(type)];
2744 }
2745
2746 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2747                                 struct btrfs_root *root,
2748                                 struct btrfs_path *path,
2749                                 struct inode_record *rec)
2750 {
2751         u8 filetype;
2752         u32 mode = 0700;
2753         int type_recovered = 0;
2754         int ret = 0;
2755
2756         printf("Trying to rebuild inode:%llu\n", rec->ino);
2757
2758         type_recovered = !find_file_type(rec, &filetype);
2759
2760         /*
2761          * Try to determine inode type if type not found.
2762          *
2763          * For found regular file extent, it must be FILE.
2764          * For found dir_item/index, it must be DIR.
2765          *
2766          * For undetermined one, use FILE as fallback.
2767          *
2768          * TODO:
2769          * 1. If found backref(inode_index/item is already handled) to it,
2770          *    it must be DIR.
2771          *    Need new inode-inode ref structure to allow search for that.
2772          */
2773         if (!type_recovered) {
2774                 if (rec->found_file_extent &&
2775                     find_normal_file_extent(root, rec->ino)) {
2776                         type_recovered = 1;
2777                         filetype = BTRFS_FT_REG_FILE;
2778                 } else if (rec->found_dir_item) {
2779                         type_recovered = 1;
2780                         filetype = BTRFS_FT_DIR;
2781                 } else if (!list_empty(&rec->orphan_extents)) {
2782                         type_recovered = 1;
2783                         filetype = BTRFS_FT_REG_FILE;
2784                 } else{
2785                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2786                                rec->ino);
2787                         type_recovered = 1;
2788                         filetype = BTRFS_FT_REG_FILE;
2789                 }
2790         }
2791
2792         ret = btrfs_new_inode(trans, root, rec->ino,
2793                               mode | btrfs_type_to_imode(filetype));
2794         if (ret < 0)
2795                 goto out;
2796
2797         /*
2798          * Here inode rebuild is done, we only rebuild the inode item,
2799          * don't repair the nlink(like move to lost+found).
2800          * That is the job of nlink repair.
2801          *
2802          * We just fill the record and return
2803          */
2804         rec->found_dir_item = 1;
2805         rec->imode = mode | btrfs_type_to_imode(filetype);
2806         rec->nlink = 0;
2807         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2808         /* Ensure the inode_nlinks repair function will be called */
2809         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2810 out:
2811         return ret;
2812 }
2813
2814 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2815                                       struct btrfs_root *root,
2816                                       struct btrfs_path *path,
2817                                       struct inode_record *rec)
2818 {
2819         struct orphan_data_extent *orphan;
2820         struct orphan_data_extent *tmp;
2821         int ret = 0;
2822
2823         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824                 /*
2825                  * Check for conflicting file extents
2826                  *
2827                  * Here we don't know whether the extents is compressed or not,
2828                  * so we can only assume it not compressed nor data offset,
2829                  * and use its disk_len as extent length.
2830                  */
2831                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2832                                        orphan->offset, orphan->disk_len, 0);
2833                 btrfs_release_path(path);
2834                 if (ret < 0)
2835                         goto out;
2836                 if (!ret) {
2837                         fprintf(stderr,
2838                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2839                                 orphan->disk_bytenr, orphan->disk_len);
2840                         ret = btrfs_free_extent(trans,
2841                                         root->fs_info->extent_root,
2842                                         orphan->disk_bytenr, orphan->disk_len,
2843                                         0, root->objectid, orphan->objectid,
2844                                         orphan->offset);
2845                         if (ret < 0)
2846                                 goto out;
2847                 }
2848                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2849                                 orphan->offset, orphan->disk_bytenr,
2850                                 orphan->disk_len, orphan->disk_len);
2851                 if (ret < 0)
2852                         goto out;
2853
2854                 /* Update file size info */
2855                 rec->found_size += orphan->disk_len;
2856                 if (rec->found_size == rec->nbytes)
2857                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858
2859                 /* Update the file extent hole info too */
2860                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2861                                            orphan->disk_len);
2862                 if (ret < 0)
2863                         goto out;
2864                 if (RB_EMPTY_ROOT(&rec->holes))
2865                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866
2867                 list_del(&orphan->list);
2868                 free(orphan);
2869         }
2870         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2871 out:
2872         return ret;
2873 }
2874
2875 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2876                                         struct btrfs_root *root,
2877                                         struct btrfs_path *path,
2878                                         struct inode_record *rec)
2879 {
2880         struct rb_node *node;
2881         struct file_extent_hole *hole;
2882         int found = 0;
2883         int ret = 0;
2884
2885         node = rb_first(&rec->holes);
2886
2887         while (node) {
2888                 found = 1;
2889                 hole = rb_entry(node, struct file_extent_hole, node);
2890                 ret = btrfs_punch_hole(trans, root, rec->ino,
2891                                        hole->start, hole->len);
2892                 if (ret < 0)
2893                         goto out;
2894                 ret = del_file_extent_hole(&rec->holes, hole->start,
2895                                            hole->len);
2896                 if (ret < 0)
2897                         goto out;
2898                 if (RB_EMPTY_ROOT(&rec->holes))
2899                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2900                 node = rb_first(&rec->holes);
2901         }
2902         /* special case for a file losing all its file extent */
2903         if (!found) {
2904                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2905                                        round_up(rec->isize, root->sectorsize));
2906                 if (ret < 0)
2907                         goto out;
2908         }
2909         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2910                rec->ino, root->objectid);
2911 out:
2912         return ret;
2913 }
2914
2915 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 {
2917         struct btrfs_trans_handle *trans;
2918         struct btrfs_path *path;
2919         int ret = 0;
2920
2921         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2922                              I_ERR_NO_ORPHAN_ITEM |
2923                              I_ERR_LINK_COUNT_WRONG |
2924                              I_ERR_NO_INODE_ITEM |
2925                              I_ERR_FILE_EXTENT_ORPHAN |
2926                              I_ERR_FILE_EXTENT_DISCOUNT|
2927                              I_ERR_FILE_NBYTES_WRONG)))
2928                 return rec->errors;
2929
2930         path = btrfs_alloc_path();
2931         if (!path)
2932                 return -ENOMEM;
2933
2934         /*
2935          * For nlink repair, it may create a dir and add link, so
2936          * 2 for parent(256)'s dir_index and dir_item
2937          * 2 for lost+found dir's inode_item and inode_ref
2938          * 1 for the new inode_ref of the file
2939          * 2 for lost+found dir's dir_index and dir_item for the file
2940          */
2941         trans = btrfs_start_transaction(root, 7);
2942         if (IS_ERR(trans)) {
2943                 btrfs_free_path(path);
2944                 return PTR_ERR(trans);
2945         }
2946
2947         if (rec->errors & I_ERR_NO_INODE_ITEM)
2948                 ret = repair_inode_no_item(trans, root, path, rec);
2949         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2950                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2951         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2952                 ret = repair_inode_discount_extent(trans, root, path, rec);
2953         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2954                 ret = repair_inode_isize(trans, root, path, rec);
2955         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2956                 ret = repair_inode_orphan_item(trans, root, path, rec);
2957         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2958                 ret = repair_inode_nlinks(trans, root, path, rec);
2959         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2960                 ret = repair_inode_nbytes(trans, root, path, rec);
2961         btrfs_commit_transaction(trans, root);
2962         btrfs_free_path(path);
2963         return ret;
2964 }
2965
2966 static int check_inode_recs(struct btrfs_root *root,
2967                             struct cache_tree *inode_cache)
2968 {
2969         struct cache_extent *cache;
2970         struct ptr_node *node;
2971         struct inode_record *rec;
2972         struct inode_backref *backref;
2973         int stage = 0;
2974         int ret = 0;
2975         int err = 0;
2976         u64 error = 0;
2977         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2978
2979         if (btrfs_root_refs(&root->root_item) == 0) {
2980                 if (!cache_tree_empty(inode_cache))
2981                         fprintf(stderr, "warning line %d\n", __LINE__);
2982                 return 0;
2983         }
2984
2985         /*
2986          * We need to record the highest inode number for later 'lost+found'
2987          * dir creation.
2988          * We must select an ino not used/referred by any existing inode, or
2989          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2990          * this may cause 'lost+found' dir has wrong nlinks.
2991          */
2992         cache = last_cache_extent(inode_cache);
2993         if (cache) {
2994                 node = container_of(cache, struct ptr_node, cache);
2995                 rec = node->data;
2996                 if (rec->ino > root->highest_inode)
2997                         root->highest_inode = rec->ino;
2998         }
2999
3000         /*
3001          * We need to repair backrefs first because we could change some of the
3002          * errors in the inode recs.
3003          *
3004          * We also need to go through and delete invalid backrefs first and then
3005          * add the correct ones second.  We do this because we may get EEXIST
3006          * when adding back the correct index because we hadn't yet deleted the
3007          * invalid index.
3008          *
3009          * For example, if we were missing a dir index then the directories
3010          * isize would be wrong, so if we fixed the isize to what we thought it
3011          * would be and then fixed the backref we'd still have a invalid fs, so
3012          * we need to add back the dir index and then check to see if the isize
3013          * is still wrong.
3014          */
3015         while (stage < 3) {
3016                 stage++;
3017                 if (stage == 3 && !err)
3018                         break;
3019
3020                 cache = search_cache_extent(inode_cache, 0);
3021                 while (repair && cache) {
3022                         node = container_of(cache, struct ptr_node, cache);
3023                         rec = node->data;
3024                         cache = next_cache_extent(cache);
3025
3026                         /* Need to free everything up and rescan */
3027                         if (stage == 3) {
3028                                 remove_cache_extent(inode_cache, &node->cache);
3029                                 free(node);
3030                                 free_inode_rec(rec);
3031                                 continue;
3032                         }
3033
3034                         if (list_empty(&rec->backrefs))
3035                                 continue;
3036
3037                         ret = repair_inode_backrefs(root, rec, inode_cache,
3038                                                     stage == 1);
3039                         if (ret < 0) {
3040                                 err = ret;
3041                                 stage = 2;
3042                                 break;
3043                         } if (ret > 0) {
3044                                 err = -EAGAIN;
3045                         }
3046                 }
3047         }
3048         if (err)
3049                 return err;
3050
3051         rec = get_inode_rec(inode_cache, root_dirid, 0);
3052         BUG_ON(IS_ERR(rec));
3053         if (rec) {
3054                 ret = check_root_dir(rec);
3055                 if (ret) {
3056                         fprintf(stderr, "root %llu root dir %llu error\n",
3057                                 (unsigned long long)root->root_key.objectid,
3058                                 (unsigned long long)root_dirid);
3059                         print_inode_error(root, rec);
3060                         error++;
3061                 }
3062         } else {
3063                 if (repair) {
3064                         struct btrfs_trans_handle *trans;
3065
3066                         trans = btrfs_start_transaction(root, 1);
3067                         if (IS_ERR(trans)) {
3068                                 err = PTR_ERR(trans);
3069                                 return err;
3070                         }
3071
3072                         fprintf(stderr,
3073                                 "root %llu missing its root dir, recreating\n",
3074                                 (unsigned long long)root->objectid);
3075
3076                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3077                         BUG_ON(ret);
3078
3079                         btrfs_commit_transaction(trans, root);
3080                         return -EAGAIN;
3081                 }
3082
3083                 fprintf(stderr, "root %llu root dir %llu not found\n",
3084                         (unsigned long long)root->root_key.objectid,
3085                         (unsigned long long)root_dirid);
3086         }
3087
3088         while (1) {
3089                 cache = search_cache_extent(inode_cache, 0);
3090                 if (!cache)
3091                         break;
3092                 node = container_of(cache, struct ptr_node, cache);
3093                 rec = node->data;
3094                 remove_cache_extent(inode_cache, &node->cache);
3095                 free(node);
3096                 if (rec->ino == root_dirid ||
3097                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3098                         free_inode_rec(rec);
3099                         continue;
3100                 }
3101
3102                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3103                         ret = check_orphan_item(root, rec->ino);
3104                         if (ret == 0)
3105                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3106                         if (can_free_inode_rec(rec)) {
3107                                 free_inode_rec(rec);
3108                                 continue;
3109                         }
3110                 }
3111
3112                 if (!rec->found_inode_item)
3113                         rec->errors |= I_ERR_NO_INODE_ITEM;
3114                 if (rec->found_link != rec->nlink)
3115                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3116                 if (repair) {
3117                         ret = try_repair_inode(root, rec);
3118                         if (ret == 0 && can_free_inode_rec(rec)) {
3119                                 free_inode_rec(rec);
3120                                 continue;
3121                         }
3122                         ret = 0;
3123                 }
3124
3125                 if (!(repair && ret == 0))
3126                         error++;
3127                 print_inode_error(root, rec);
3128                 list_for_each_entry(backref, &rec->backrefs, list) {
3129                         if (!backref->found_dir_item)
3130                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3131                         if (!backref->found_dir_index)
3132                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3133                         if (!backref->found_inode_ref)
3134                                 backref->errors |= REF_ERR_NO_INODE_REF;
3135                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3136                                 " namelen %u name %s filetype %d errors %x",
3137                                 (unsigned long long)backref->dir,
3138                                 (unsigned long long)backref->index,
3139                                 backref->namelen, backref->name,
3140                                 backref->filetype, backref->errors);
3141                         print_ref_error(backref->errors);
3142                 }
3143                 free_inode_rec(rec);
3144         }
3145         return (error > 0) ? -1 : 0;
3146 }
3147
3148 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3149                                         u64 objectid)
3150 {
3151         struct cache_extent *cache;
3152         struct root_record *rec = NULL;
3153         int ret;
3154
3155         cache = lookup_cache_extent(root_cache, objectid, 1);
3156         if (cache) {
3157                 rec = container_of(cache, struct root_record, cache);
3158         } else {
3159                 rec = calloc(1, sizeof(*rec));
3160                 if (!rec)
3161                         return ERR_PTR(-ENOMEM);
3162                 rec->objectid = objectid;
3163                 INIT_LIST_HEAD(&rec->backrefs);
3164                 rec->cache.start = objectid;
3165                 rec->cache.size = 1;
3166
3167                 ret = insert_cache_extent(root_cache, &rec->cache);
3168                 if (ret)
3169                         return ERR_PTR(-EEXIST);
3170         }
3171         return rec;
3172 }
3173
3174 static struct root_backref *get_root_backref(struct root_record *rec,
3175                                              u64 ref_root, u64 dir, u64 index,
3176                                              const char *name, int namelen)
3177 {
3178         struct root_backref *backref;
3179
3180         list_for_each_entry(backref, &rec->backrefs, list) {
3181                 if (backref->ref_root != ref_root || backref->dir != dir ||
3182                     backref->namelen != namelen)
3183                         continue;
3184                 if (memcmp(name, backref->name, namelen))
3185                         continue;
3186                 return backref;
3187         }
3188
3189         backref = calloc(1, sizeof(*backref) + namelen + 1);
3190         if (!backref)
3191                 return NULL;
3192         backref->ref_root = ref_root;
3193         backref->dir = dir;
3194         backref->index = index;
3195         backref->namelen = namelen;
3196         memcpy(backref->name, name, namelen);
3197         backref->name[namelen] = '\0';
3198         list_add_tail(&backref->list, &rec->backrefs);
3199         return backref;
3200 }
3201
3202 static void free_root_record(struct cache_extent *cache)
3203 {
3204         struct root_record *rec;
3205         struct root_backref *backref;
3206
3207         rec = container_of(cache, struct root_record, cache);
3208         while (!list_empty(&rec->backrefs)) {
3209                 backref = to_root_backref(rec->backrefs.next);
3210                 list_del(&backref->list);
3211                 free(backref);
3212         }
3213
3214         free(rec);
3215 }
3216
3217 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3218
3219 static int add_root_backref(struct cache_tree *root_cache,
3220                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3221                             const char *name, int namelen,
3222                             int item_type, int errors)
3223 {
3224         struct root_record *rec;
3225         struct root_backref *backref;
3226
3227         rec = get_root_rec(root_cache, root_id);
3228         BUG_ON(IS_ERR(rec));
3229         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3230         BUG_ON(!backref);
3231
3232         backref->errors |= errors;
3233
3234         if (item_type != BTRFS_DIR_ITEM_KEY) {
3235                 if (backref->found_dir_index || backref->found_back_ref ||
3236                     backref->found_forward_ref) {
3237                         if (backref->index != index)
3238                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3239                 } else {
3240                         backref->index = index;
3241                 }
3242         }
3243
3244         if (item_type == BTRFS_DIR_ITEM_KEY) {
3245                 if (backref->found_forward_ref)
3246                         rec->found_ref++;
3247                 backref->found_dir_item = 1;
3248         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3249                 backref->found_dir_index = 1;
3250         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3251                 if (backref->found_forward_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3253                 else if (backref->found_dir_item)
3254                         rec->found_ref++;
3255                 backref->found_forward_ref = 1;
3256         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3257                 if (backref->found_back_ref)
3258                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3259                 backref->found_back_ref = 1;
3260         } else {
3261                 BUG_ON(1);
3262         }
3263
3264         if (backref->found_forward_ref && backref->found_dir_item)
3265                 backref->reachable = 1;
3266         return 0;
3267 }
3268
3269 static int merge_root_recs(struct btrfs_root *root,
3270                            struct cache_tree *src_cache,
3271                            struct cache_tree *dst_cache)
3272 {
3273         struct cache_extent *cache;
3274         struct ptr_node *node;
3275         struct inode_record *rec;
3276         struct inode_backref *backref;
3277         int ret = 0;
3278
3279         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3280                 free_inode_recs_tree(src_cache);
3281                 return 0;
3282         }
3283
3284         while (1) {
3285                 cache = search_cache_extent(src_cache, 0);
3286                 if (!cache)
3287                         break;
3288                 node = container_of(cache, struct ptr_node, cache);
3289                 rec = node->data;
3290                 remove_cache_extent(src_cache, &node->cache);
3291                 free(node);
3292
3293                 ret = is_child_root(root, root->objectid, rec->ino);
3294                 if (ret < 0)
3295                         break;
3296                 else if (ret == 0)
3297                         goto skip;
3298
3299                 list_for_each_entry(backref, &rec->backrefs, list) {
3300                         BUG_ON(backref->found_inode_ref);
3301                         if (backref->found_dir_item)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3306                                         backref->errors);
3307                         if (backref->found_dir_index)
3308                                 add_root_backref(dst_cache, rec->ino,
3309                                         root->root_key.objectid, backref->dir,
3310                                         backref->index, backref->name,
3311                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3312                                         backref->errors);
3313                 }
3314 skip:
3315                 free_inode_rec(rec);
3316         }
3317         if (ret < 0)
3318                 return ret;
3319         return 0;
3320 }
3321
3322 static int check_root_refs(struct btrfs_root *root,
3323                            struct cache_tree *root_cache)
3324 {
3325         struct root_record *rec;
3326         struct root_record *ref_root;
3327         struct root_backref *backref;
3328         struct cache_extent *cache;
3329         int loop = 1;
3330         int ret;
3331         int error;
3332         int errors = 0;
3333
3334         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3335         BUG_ON(IS_ERR(rec));
3336         rec->found_ref = 1;
3337
3338         /* fixme: this can not detect circular references */
3339         while (loop) {
3340                 loop = 0;
3341                 cache = search_cache_extent(root_cache, 0);
3342                 while (1) {
3343                         if (!cache)
3344                                 break;
3345                         rec = container_of(cache, struct root_record, cache);
3346                         cache = next_cache_extent(cache);
3347
3348                         if (rec->found_ref == 0)
3349                                 continue;
3350
3351                         list_for_each_entry(backref, &rec->backrefs, list) {
3352                                 if (!backref->reachable)
3353                                         continue;
3354
3355                                 ref_root = get_root_rec(root_cache,
3356                                                         backref->ref_root);
3357                                 BUG_ON(IS_ERR(ref_root));
3358                                 if (ref_root->found_ref > 0)
3359                                         continue;
3360
3361                                 backref->reachable = 0;
3362                                 rec->found_ref--;
3363                                 if (rec->found_ref == 0)
3364                                         loop = 1;
3365                         }
3366                 }
3367         }
3368
3369         cache = search_cache_extent(root_cache, 0);
3370         while (1) {
3371                 if (!cache)
3372                         break;
3373                 rec = container_of(cache, struct root_record, cache);
3374                 cache = next_cache_extent(cache);
3375
3376                 if (rec->found_ref == 0 &&
3377                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3378                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3379                         ret = check_orphan_item(root->fs_info->tree_root,
3380                                                 rec->objectid);
3381                         if (ret == 0)
3382                                 continue;
3383
3384                         /*
3385                          * If we don't have a root item then we likely just have
3386                          * a dir item in a snapshot for this root but no actual
3387                          * ref key or anything so it's meaningless.
3388                          */
3389                         if (!rec->found_root_item)
3390                                 continue;
3391                         errors++;
3392                         fprintf(stderr, "fs tree %llu not referenced\n",
3393                                 (unsigned long long)rec->objectid);
3394                 }
3395
3396                 error = 0;
3397                 if (rec->found_ref > 0 && !rec->found_root_item)
3398                         error = 1;
3399                 list_for_each_entry(backref, &rec->backrefs, list) {
3400                         if (!backref->found_dir_item)
3401                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3402                         if (!backref->found_dir_index)
3403                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3404                         if (!backref->found_back_ref)
3405                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3406                         if (!backref->found_forward_ref)
3407                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3408                         if (backref->reachable && backref->errors)
3409                                 error = 1;
3410                 }
3411                 if (!error)
3412                         continue;
3413
3414                 errors++;
3415                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3416                         (unsigned long long)rec->objectid, rec->found_ref,
3417                          rec->found_root_item ? "" : "not found");
3418
3419                 list_for_each_entry(backref, &rec->backrefs, list) {
3420                         if (!backref->reachable)
3421                                 continue;
3422                         if (!backref->errors && rec->found_root_item)
3423                                 continue;
3424                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3425                                 " index %llu namelen %u name %s errors %x\n",
3426                                 (unsigned long long)backref->ref_root,
3427                                 (unsigned long long)backref->dir,
3428                                 (unsigned long long)backref->index,
3429                                 backref->namelen, backref->name,
3430                                 backref->errors);
3431                         print_ref_error(backref->errors);
3432                 }
3433         }
3434         return errors > 0 ? 1 : 0;
3435 }
3436
3437 static int process_root_ref(struct extent_buffer *eb, int slot,
3438                             struct btrfs_key *key,
3439                             struct cache_tree *root_cache)
3440 {
3441         u64 dirid;
3442         u64 index;
3443         u32 len;
3444         u32 name_len;
3445         struct btrfs_root_ref *ref;
3446         char namebuf[BTRFS_NAME_LEN];
3447         int error;
3448
3449         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3450
3451         dirid = btrfs_root_ref_dirid(eb, ref);
3452         index = btrfs_root_ref_sequence(eb, ref);
3453         name_len = btrfs_root_ref_name_len(eb, ref);
3454
3455         if (name_len <= BTRFS_NAME_LEN) {
3456                 len = name_len;
3457                 error = 0;
3458         } else {
3459                 len = BTRFS_NAME_LEN;
3460                 error = REF_ERR_NAME_TOO_LONG;
3461         }
3462         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3463
3464         if (key->type == BTRFS_ROOT_REF_KEY) {
3465                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3466                                  index, namebuf, len, key->type, error);
3467         } else {
3468                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3469                                  index, namebuf, len, key->type, error);
3470         }
3471         return 0;
3472 }
3473
3474 static void free_corrupt_block(struct cache_extent *cache)
3475 {
3476         struct btrfs_corrupt_block *corrupt;
3477
3478         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3479         free(corrupt);
3480 }
3481
3482 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3483
3484 /*
3485  * Repair the btree of the given root.
3486  *
3487  * The fix is to remove the node key in corrupt_blocks cache_tree.
3488  * and rebalance the tree.
3489  * After the fix, the btree should be writeable.
3490  */
3491 static int repair_btree(struct btrfs_root *root,
3492                         struct cache_tree *corrupt_blocks)
3493 {
3494         struct btrfs_trans_handle *trans;
3495         struct btrfs_path *path;
3496         struct btrfs_corrupt_block *corrupt;
3497         struct cache_extent *cache;
3498         struct btrfs_key key;
3499         u64 offset;
3500         int level;
3501         int ret = 0;
3502
3503         if (cache_tree_empty(corrupt_blocks))
3504                 return 0;
3505
3506         path = btrfs_alloc_path();
3507         if (!path)
3508                 return -ENOMEM;
3509
3510         trans = btrfs_start_transaction(root, 1);
3511         if (IS_ERR(trans)) {
3512                 ret = PTR_ERR(trans);
3513                 fprintf(stderr, "Error starting transaction: %s\n",
3514                         strerror(-ret));
3515                 goto out_free_path;
3516         }
3517         cache = first_cache_extent(corrupt_blocks);
3518         while (cache) {
3519                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3520                                        cache);
3521                 level = corrupt->level;
3522                 path->lowest_level = level;
3523                 key.objectid = corrupt->key.objectid;
3524                 key.type = corrupt->key.type;
3525                 key.offset = corrupt->key.offset;
3526
3527                 /*
3528                  * Here we don't want to do any tree balance, since it may
3529                  * cause a balance with corrupted brother leaf/node,
3530                  * so ins_len set to 0 here.
3531                  * Balance will be done after all corrupt node/leaf is deleted.
3532                  */
3533                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3534                 if (ret < 0)
3535                         goto out;
3536                 offset = btrfs_node_blockptr(path->nodes[level],
3537                                              path->slots[level]);
3538
3539                 /* Remove the ptr */
3540                 ret = btrfs_del_ptr(trans, root, path, level,
3541                                     path->slots[level]);
3542                 if (ret < 0)
3543                         goto out;
3544                 /*
3545                  * Remove the corresponding extent
3546                  * return value is not concerned.
3547                  */
3548                 btrfs_release_path(path);
3549                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3550                                         0, root->root_key.objectid,
3551                                         level - 1, 0);
3552                 cache = next_cache_extent(cache);
3553         }
3554
3555         /* Balance the btree using btrfs_search_slot() */
3556         cache = first_cache_extent(corrupt_blocks);
3557         while (cache) {
3558                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3559                                        cache);
3560                 memcpy(&key, &corrupt->key, sizeof(key));
3561                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3562                 if (ret < 0)
3563                         goto out;
3564                 /* return will always >0 since it won't find the item */
3565                 ret = 0;
3566                 btrfs_release_path(path);
3567                 cache = next_cache_extent(cache);
3568         }
3569 out:
3570         btrfs_commit_transaction(trans, root);
3571 out_free_path:
3572         btrfs_free_path(path);
3573         return ret;
3574 }
3575
3576 static int check_fs_root(struct btrfs_root *root,
3577                          struct cache_tree *root_cache,
3578                          struct walk_control *wc)
3579 {
3580         int ret = 0;
3581         int err = 0;
3582         int wret;
3583         int level;
3584         struct btrfs_path path;
3585         struct shared_node root_node;
3586         struct root_record *rec;
3587         struct btrfs_root_item *root_item = &root->root_item;
3588         struct cache_tree corrupt_blocks;
3589         struct orphan_data_extent *orphan;
3590         struct orphan_data_extent *tmp;
3591         enum btrfs_tree_block_status status;
3592         struct node_refs nrefs;
3593
3594         /*
3595          * Reuse the corrupt_block cache tree to record corrupted tree block
3596          *
3597          * Unlike the usage in extent tree check, here we do it in a per
3598          * fs/subvol tree base.
3599          */
3600         cache_tree_init(&corrupt_blocks);
3601         root->fs_info->corrupt_blocks = &corrupt_blocks;
3602
3603         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3604                 rec = get_root_rec(root_cache, root->root_key.objectid);
3605                 BUG_ON(IS_ERR(rec));
3606                 if (btrfs_root_refs(root_item) > 0)
3607                         rec->found_root_item = 1;
3608         }
3609
3610         btrfs_init_path(&path);
3611         memset(&root_node, 0, sizeof(root_node));
3612         cache_tree_init(&root_node.root_cache);
3613         cache_tree_init(&root_node.inode_cache);
3614         memset(&nrefs, 0, sizeof(nrefs));
3615
3616         /* Move the orphan extent record to corresponding inode_record */
3617         list_for_each_entry_safe(orphan, tmp,
3618                                  &root->orphan_data_extents, list) {
3619                 struct inode_record *inode;
3620
3621                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3622                                       1);
3623                 BUG_ON(IS_ERR(inode));
3624                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3625                 list_move(&orphan->list, &inode->orphan_extents);
3626         }
3627
3628         level = btrfs_header_level(root->node);
3629         memset(wc->nodes, 0, sizeof(wc->nodes));
3630         wc->nodes[level] = &root_node;
3631         wc->active_node = level;
3632         wc->root_level = level;
3633
3634         /* We may not have checked the root block, lets do that now */
3635         if (btrfs_is_leaf(root->node))
3636                 status = btrfs_check_leaf(root, NULL, root->node);
3637         else
3638                 status = btrfs_check_node(root, NULL, root->node);
3639         if (status != BTRFS_TREE_BLOCK_CLEAN)
3640                 return -EIO;
3641
3642         if (btrfs_root_refs(root_item) > 0 ||
3643             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3644                 path.nodes[level] = root->node;
3645                 extent_buffer_get(root->node);
3646                 path.slots[level] = 0;
3647         } else {
3648                 struct btrfs_key key;
3649                 struct btrfs_disk_key found_key;
3650
3651                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3652                 level = root_item->drop_level;
3653                 path.lowest_level = level;
3654                 if (level > btrfs_header_level(root->node) ||
3655                     level >= BTRFS_MAX_LEVEL) {
3656                         error("ignoring invalid drop level: %u", level);
3657                         goto skip_walking;
3658                 }
3659                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3660                 if (wret < 0)
3661                         goto skip_walking;
3662                 btrfs_node_key(path.nodes[level], &found_key,
3663                                 path.slots[level]);
3664                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3665                                         sizeof(found_key)));
3666         }
3667
3668         while (1) {
3669                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3670                 if (wret < 0)
3671                         ret = wret;
3672                 if (wret != 0)
3673                         break;
3674
3675                 wret = walk_up_tree(root, &path, wc, &level);
3676                 if (wret < 0)
3677                         ret = wret;
3678                 if (wret != 0)
3679                         break;
3680         }
3681 skip_walking:
3682         btrfs_release_path(&path);
3683
3684         if (!cache_tree_empty(&corrupt_blocks)) {
3685                 struct cache_extent *cache;
3686                 struct btrfs_corrupt_block *corrupt;
3687
3688                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3689                        root->root_key.objectid);
3690                 cache = first_cache_extent(&corrupt_blocks);
3691                 while (cache) {
3692                         corrupt = container_of(cache,
3693                                                struct btrfs_corrupt_block,
3694                                                cache);
3695                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3696                                cache->start, corrupt->level,
3697                                corrupt->key.objectid, corrupt->key.type,
3698                                corrupt->key.offset);
3699                         cache = next_cache_extent(cache);
3700                 }
3701                 if (repair) {
3702                         printf("Try to repair the btree for root %llu\n",
3703                                root->root_key.objectid);
3704                         ret = repair_btree(root, &corrupt_blocks);
3705                         if (ret < 0)
3706                                 fprintf(stderr, "Failed to repair btree: %s\n",
3707                                         strerror(-ret));
3708                         if (!ret)
3709                                 printf("Btree for root %llu is fixed\n",
3710                                        root->root_key.objectid);
3711                 }
3712         }
3713
3714         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3715         if (err < 0)
3716                 ret = err;
3717
3718         if (root_node.current) {
3719                 root_node.current->checked = 1;
3720                 maybe_free_inode_rec(&root_node.inode_cache,
3721                                 root_node.current);
3722         }
3723
3724         err = check_inode_recs(root, &root_node.inode_cache);
3725         if (!ret)
3726                 ret = err;
3727
3728         free_corrupt_blocks_tree(&corrupt_blocks);
3729         root->fs_info->corrupt_blocks = NULL;
3730         free_orphan_data_extents(&root->orphan_data_extents);
3731         return ret;
3732 }
3733
3734 static int fs_root_objectid(u64 objectid)
3735 {
3736         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3737             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3738                 return 1;
3739         return is_fstree(objectid);
3740 }
3741
3742 static int check_fs_roots(struct btrfs_root *root,
3743                           struct cache_tree *root_cache)
3744 {
3745         struct btrfs_path path;
3746         struct btrfs_key key;
3747         struct walk_control wc;
3748         struct extent_buffer *leaf, *tree_node;
3749         struct btrfs_root *tmp_root;
3750         struct btrfs_root *tree_root = root->fs_info->tree_root;
3751         int ret;
3752         int err = 0;
3753
3754         if (ctx.progress_enabled) {
3755                 ctx.tp = TASK_FS_ROOTS;
3756                 task_start(ctx.info);
3757         }
3758
3759         /*
3760          * Just in case we made any changes to the extent tree that weren't
3761          * reflected into the free space cache yet.
3762          */
3763         if (repair)
3764                 reset_cached_block_groups(root->fs_info);
3765         memset(&wc, 0, sizeof(wc));
3766         cache_tree_init(&wc.shared);
3767         btrfs_init_path(&path);
3768
3769 again:
3770         key.offset = 0;
3771         key.objectid = 0;
3772         key.type = BTRFS_ROOT_ITEM_KEY;
3773         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3774         if (ret < 0) {
3775                 err = 1;
3776                 goto out;
3777         }
3778         tree_node = tree_root->node;
3779         while (1) {
3780                 if (tree_node != tree_root->node) {
3781                         free_root_recs_tree(root_cache);
3782                         btrfs_release_path(&path);
3783                         goto again;
3784                 }
3785                 leaf = path.nodes[0];
3786                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3787                         ret = btrfs_next_leaf(tree_root, &path);
3788                         if (ret) {
3789                                 if (ret < 0)
3790                                         err = 1;
3791                                 break;
3792                         }
3793                         leaf = path.nodes[0];
3794                 }
3795                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3796                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3797                     fs_root_objectid(key.objectid)) {
3798                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3799                                 tmp_root = btrfs_read_fs_root_no_cache(
3800                                                 root->fs_info, &key);
3801                         } else {
3802                                 key.offset = (u64)-1;
3803                                 tmp_root = btrfs_read_fs_root(
3804                                                 root->fs_info, &key);
3805                         }
3806                         if (IS_ERR(tmp_root)) {
3807                                 err = 1;
3808                                 goto next;
3809                         }
3810                         ret = check_fs_root(tmp_root, root_cache, &wc);
3811                         if (ret == -EAGAIN) {
3812                                 free_root_recs_tree(root_cache);
3813                                 btrfs_release_path(&path);
3814                                 goto again;
3815                         }
3816                         if (ret)
3817                                 err = 1;
3818                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3819                                 btrfs_free_fs_root(tmp_root);
3820                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3821                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3822                         process_root_ref(leaf, path.slots[0], &key,
3823                                          root_cache);
3824                 }
3825 next:
3826                 path.slots[0]++;
3827         }
3828 out:
3829         btrfs_release_path(&path);
3830         if (err)
3831                 free_extent_cache_tree(&wc.shared);
3832         if (!cache_tree_empty(&wc.shared))
3833                 fprintf(stderr, "warning line %d\n", __LINE__);
3834
3835         task_stop(ctx.info);
3836
3837         return err;
3838 }
3839
3840 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3841 {
3842         struct list_head *cur = rec->backrefs.next;
3843         struct extent_backref *back;
3844         struct tree_backref *tback;
3845         struct data_backref *dback;
3846         u64 found = 0;
3847         int err = 0;
3848
3849         while(cur != &rec->backrefs) {
3850                 back = to_extent_backref(cur);
3851                 cur = cur->next;
3852                 if (!back->found_extent_tree) {
3853                         err = 1;
3854                         if (!print_errs)
3855                                 goto out;
3856                         if (back->is_data) {
3857                                 dback = to_data_backref(back);
3858                                 fprintf(stderr, "Backref %llu %s %llu"
3859                                         " owner %llu offset %llu num_refs %lu"
3860                                         " not found in extent tree\n",
3861                                         (unsigned long long)rec->start,
3862                                         back->full_backref ?
3863                                         "parent" : "root",
3864                                         back->full_backref ?
3865                                         (unsigned long long)dback->parent:
3866                                         (unsigned long long)dback->root,
3867                                         (unsigned long long)dback->owner,
3868                                         (unsigned long long)dback->offset,
3869                                         (unsigned long)dback->num_refs);
3870                         } else {
3871                                 tback = to_tree_backref(back);
3872                                 fprintf(stderr, "Backref %llu parent %llu"
3873                                         " root %llu not found in extent tree\n",
3874                                         (unsigned long long)rec->start,
3875                                         (unsigned long long)tback->parent,
3876                                         (unsigned long long)tback->root);
3877                         }
3878                 }
3879                 if (!back->is_data && !back->found_ref) {
3880                         err = 1;
3881                         if (!print_errs)
3882                                 goto out;
3883                         tback = to_tree_backref(back);
3884                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3885                                 (unsigned long long)rec->start,
3886                                 back->full_backref ? "parent" : "root",
3887                                 back->full_backref ?
3888                                 (unsigned long long)tback->parent :
3889                                 (unsigned long long)tback->root, back);
3890                 }
3891                 if (back->is_data) {
3892                         dback = to_data_backref(back);
3893                         if (dback->found_ref != dback->num_refs) {
3894                                 err = 1;
3895                                 if (!print_errs)
3896                                         goto out;
3897                                 fprintf(stderr, "Incorrect local backref count"
3898                                         " on %llu %s %llu owner %llu"
3899                                         " offset %llu found %u wanted %u back %p\n",
3900                                         (unsigned long long)rec->start,
3901                                         back->full_backref ?
3902                                         "parent" : "root",
3903                                         back->full_backref ?
3904                                         (unsigned long long)dback->parent:
3905                                         (unsigned long long)dback->root,
3906                                         (unsigned long long)dback->owner,
3907                                         (unsigned long long)dback->offset,
3908                                         dback->found_ref, dback->num_refs, back);
3909                         }
3910                         if (dback->disk_bytenr != rec->start) {
3911                                 err = 1;
3912                                 if (!print_errs)
3913                                         goto out;
3914                                 fprintf(stderr, "Backref disk bytenr does not"
3915                                         " match extent record, bytenr=%llu, "
3916                                         "ref bytenr=%llu\n",
3917                                         (unsigned long long)rec->start,
3918                                         (unsigned long long)dback->disk_bytenr);
3919                         }
3920
3921                         if (dback->bytes != rec->nr) {
3922                                 err = 1;
3923                                 if (!print_errs)
3924                                         goto out;
3925                                 fprintf(stderr, "Backref bytes do not match "
3926                                         "extent backref, bytenr=%llu, ref "
3927                                         "bytes=%llu, backref bytes=%llu\n",
3928                                         (unsigned long long)rec->start,
3929                                         (unsigned long long)rec->nr,
3930                                         (unsigned long long)dback->bytes);
3931                         }
3932                 }
3933                 if (!back->is_data) {
3934                         found += 1;
3935                 } else {
3936                         dback = to_data_backref(back);
3937                         found += dback->found_ref;
3938                 }
3939         }
3940         if (found != rec->refs) {
3941                 err = 1;
3942                 if (!print_errs)
3943                         goto out;
3944                 fprintf(stderr, "Incorrect global backref count "
3945                         "on %llu found %llu wanted %llu\n",
3946                         (unsigned long long)rec->start,
3947                         (unsigned long long)found,
3948                         (unsigned long long)rec->refs);
3949         }
3950 out:
3951         return err;
3952 }
3953
3954 static int free_all_extent_backrefs(struct extent_record *rec)
3955 {
3956         struct extent_backref *back;
3957         struct list_head *cur;
3958         while (!list_empty(&rec->backrefs)) {
3959                 cur = rec->backrefs.next;
3960                 back = to_extent_backref(cur);
3961                 list_del(cur);
3962                 free(back);
3963         }
3964         return 0;
3965 }
3966
3967 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3968                                      struct cache_tree *extent_cache)
3969 {
3970         struct cache_extent *cache;
3971         struct extent_record *rec;
3972
3973         while (1) {
3974                 cache = first_cache_extent(extent_cache);
3975                 if (!cache)
3976                         break;
3977                 rec = container_of(cache, struct extent_record, cache);
3978                 remove_cache_extent(extent_cache, cache);
3979                 free_all_extent_backrefs(rec);
3980                 free(rec);
3981         }
3982 }
3983
3984 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3985                                  struct extent_record *rec)
3986 {
3987         if (rec->content_checked && rec->owner_ref_checked &&
3988             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3989             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3990             !rec->bad_full_backref && !rec->crossing_stripes &&
3991             !rec->wrong_chunk_type) {
3992                 remove_cache_extent(extent_cache, &rec->cache);
3993                 free_all_extent_backrefs(rec);
3994                 list_del_init(&rec->list);
3995                 free(rec);
3996         }
3997         return 0;
3998 }
3999
4000 static int check_owner_ref(struct btrfs_root *root,
4001                             struct extent_record *rec,
4002                             struct extent_buffer *buf)
4003 {
4004         struct extent_backref *node;
4005         struct tree_backref *back;
4006         struct btrfs_root *ref_root;
4007         struct btrfs_key key;
4008         struct btrfs_path path;
4009         struct extent_buffer *parent;
4010         int level;
4011         int found = 0;
4012         int ret;
4013
4014         list_for_each_entry(node, &rec->backrefs, list) {
4015                 if (node->is_data)
4016                         continue;
4017                 if (!node->found_ref)
4018                         continue;
4019                 if (node->full_backref)
4020                         continue;
4021                 back = to_tree_backref(node);
4022                 if (btrfs_header_owner(buf) == back->root)
4023                         return 0;
4024         }
4025         BUG_ON(rec->is_root);
4026
4027         /* try to find the block by search corresponding fs tree */
4028         key.objectid = btrfs_header_owner(buf);
4029         key.type = BTRFS_ROOT_ITEM_KEY;
4030         key.offset = (u64)-1;
4031
4032         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4033         if (IS_ERR(ref_root))
4034                 return 1;
4035
4036         level = btrfs_header_level(buf);
4037         if (level == 0)
4038                 btrfs_item_key_to_cpu(buf, &key, 0);
4039         else
4040                 btrfs_node_key_to_cpu(buf, &key, 0);
4041
4042         btrfs_init_path(&path);
4043         path.lowest_level = level + 1;
4044         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4045         if (ret < 0)
4046                 return 0;
4047
4048         parent = path.nodes[level + 1];
4049         if (parent && buf->start == btrfs_node_blockptr(parent,
4050                                                         path.slots[level + 1]))
4051                 found = 1;
4052
4053         btrfs_release_path(&path);
4054         return found ? 0 : 1;
4055 }
4056
4057 static int is_extent_tree_record(struct extent_record *rec)
4058 {
4059         struct list_head *cur = rec->backrefs.next;
4060         struct extent_backref *node;
4061         struct tree_backref *back;
4062         int is_extent = 0;
4063
4064         while(cur != &rec->backrefs) {
4065                 node = to_extent_backref(cur);
4066                 cur = cur->next;
4067                 if (node->is_data)
4068                         return 0;
4069                 back = to_tree_backref(node);
4070                 if (node->full_backref)
4071                         return 0;
4072                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4073                         is_extent = 1;
4074         }
4075         return is_extent;
4076 }
4077
4078
4079 static int record_bad_block_io(struct btrfs_fs_info *info,
4080                                struct cache_tree *extent_cache,
4081                                u64 start, u64 len)
4082 {
4083         struct extent_record *rec;
4084         struct cache_extent *cache;
4085         struct btrfs_key key;
4086
4087         cache = lookup_cache_extent(extent_cache, start, len);
4088         if (!cache)
4089                 return 0;
4090
4091         rec = container_of(cache, struct extent_record, cache);
4092         if (!is_extent_tree_record(rec))
4093                 return 0;
4094
4095         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4096         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4097 }
4098
4099 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4100                        struct extent_buffer *buf, int slot)
4101 {
4102         if (btrfs_header_level(buf)) {
4103                 struct btrfs_key_ptr ptr1, ptr2;
4104
4105                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4106                                    sizeof(struct btrfs_key_ptr));
4107                 read_extent_buffer(buf, &ptr2,
4108                                    btrfs_node_key_ptr_offset(slot + 1),
4109                                    sizeof(struct btrfs_key_ptr));
4110                 write_extent_buffer(buf, &ptr1,
4111                                     btrfs_node_key_ptr_offset(slot + 1),
4112                                     sizeof(struct btrfs_key_ptr));
4113                 write_extent_buffer(buf, &ptr2,
4114                                     btrfs_node_key_ptr_offset(slot),
4115                                     sizeof(struct btrfs_key_ptr));
4116                 if (slot == 0) {
4117                         struct btrfs_disk_key key;
4118                         btrfs_node_key(buf, &key, 0);
4119                         btrfs_fixup_low_keys(root, path, &key,
4120                                              btrfs_header_level(buf) + 1);
4121                 }
4122         } else {
4123                 struct btrfs_item *item1, *item2;
4124                 struct btrfs_key k1, k2;
4125                 char *item1_data, *item2_data;
4126                 u32 item1_offset, item2_offset, item1_size, item2_size;
4127
4128                 item1 = btrfs_item_nr(slot);
4129                 item2 = btrfs_item_nr(slot + 1);
4130                 btrfs_item_key_to_cpu(buf, &k1, slot);
4131                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4132                 item1_offset = btrfs_item_offset(buf, item1);
4133                 item2_offset = btrfs_item_offset(buf, item2);
4134                 item1_size = btrfs_item_size(buf, item1);
4135                 item2_size = btrfs_item_size(buf, item2);
4136
4137                 item1_data = malloc(item1_size);
4138                 if (!item1_data)
4139                         return -ENOMEM;
4140                 item2_data = malloc(item2_size);
4141                 if (!item2_data) {
4142                         free(item1_data);
4143                         return -ENOMEM;
4144                 }
4145
4146                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4147                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4148
4149                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4150                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4151                 free(item1_data);
4152                 free(item2_data);
4153
4154                 btrfs_set_item_offset(buf, item1, item2_offset);
4155                 btrfs_set_item_offset(buf, item2, item1_offset);
4156                 btrfs_set_item_size(buf, item1, item2_size);
4157                 btrfs_set_item_size(buf, item2, item1_size);
4158
4159                 path->slots[0] = slot;
4160                 btrfs_set_item_key_unsafe(root, path, &k2);
4161                 path->slots[0] = slot + 1;
4162                 btrfs_set_item_key_unsafe(root, path, &k1);
4163         }
4164         return 0;
4165 }
4166
4167 static int fix_key_order(struct btrfs_trans_handle *trans,
4168                          struct btrfs_root *root,
4169                          struct btrfs_path *path)
4170 {
4171         struct extent_buffer *buf;
4172         struct btrfs_key k1, k2;
4173         int i;
4174         int level = path->lowest_level;
4175         int ret = -EIO;
4176
4177         buf = path->nodes[level];
4178         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4179                 if (level) {
4180                         btrfs_node_key_to_cpu(buf, &k1, i);
4181                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4182                 } else {
4183                         btrfs_item_key_to_cpu(buf, &k1, i);
4184                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4185                 }
4186                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4187                         continue;
4188                 ret = swap_values(root, path, buf, i);
4189                 if (ret)
4190                         break;
4191                 btrfs_mark_buffer_dirty(buf);
4192                 i = 0;
4193         }
4194         return ret;
4195 }
4196
4197 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4198                              struct btrfs_root *root,
4199                              struct btrfs_path *path,
4200                              struct extent_buffer *buf, int slot)
4201 {
4202         struct btrfs_key key;
4203         int nritems = btrfs_header_nritems(buf);
4204
4205         btrfs_item_key_to_cpu(buf, &key, slot);
4206
4207         /* These are all the keys we can deal with missing. */
4208         if (key.type != BTRFS_DIR_INDEX_KEY &&
4209             key.type != BTRFS_EXTENT_ITEM_KEY &&
4210             key.type != BTRFS_METADATA_ITEM_KEY &&
4211             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4212             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4213                 return -1;
4214
4215         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4216                (unsigned long long)key.objectid, key.type,
4217                (unsigned long long)key.offset, slot, buf->start);
4218         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4219                               btrfs_item_nr_offset(slot + 1),
4220                               sizeof(struct btrfs_item) *
4221                               (nritems - slot - 1));
4222         btrfs_set_header_nritems(buf, nritems - 1);
4223         if (slot == 0) {
4224                 struct btrfs_disk_key disk_key;
4225
4226                 btrfs_item_key(buf, &disk_key, 0);
4227                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4228         }
4229         btrfs_mark_buffer_dirty(buf);
4230         return 0;
4231 }
4232
4233 static int fix_item_offset(struct btrfs_trans_handle *trans,
4234                            struct btrfs_root *root,
4235                            struct btrfs_path *path)
4236 {
4237         struct extent_buffer *buf;
4238         int i;
4239         int ret = 0;
4240
4241         /* We should only get this for leaves */
4242         BUG_ON(path->lowest_level);
4243         buf = path->nodes[0];
4244 again:
4245         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4246                 unsigned int shift = 0, offset;
4247
4248                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4249                     BTRFS_LEAF_DATA_SIZE(root)) {
4250                         if (btrfs_item_end_nr(buf, i) >
4251                             BTRFS_LEAF_DATA_SIZE(root)) {
4252                                 ret = delete_bogus_item(trans, root, path,
4253                                                         buf, i);
4254                                 if (!ret)
4255                                         goto again;
4256                                 fprintf(stderr, "item is off the end of the "
4257                                         "leaf, can't fix\n");
4258                                 ret = -EIO;
4259                                 break;
4260                         }
4261                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4262                                 btrfs_item_end_nr(buf, i);
4263                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4264                            btrfs_item_offset_nr(buf, i - 1)) {
4265                         if (btrfs_item_end_nr(buf, i) >
4266                             btrfs_item_offset_nr(buf, i - 1)) {
4267                                 ret = delete_bogus_item(trans, root, path,
4268                                                         buf, i);
4269                                 if (!ret)
4270                                         goto again;
4271                                 fprintf(stderr, "items overlap, can't fix\n");
4272                                 ret = -EIO;
4273                                 break;
4274                         }
4275                         shift = btrfs_item_offset_nr(buf, i - 1) -
4276                                 btrfs_item_end_nr(buf, i);
4277                 }
4278                 if (!shift)
4279                         continue;
4280
4281                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4282                        i, shift, (unsigned long long)buf->start);
4283                 offset = btrfs_item_offset_nr(buf, i);
4284                 memmove_extent_buffer(buf,
4285                                       btrfs_leaf_data(buf) + offset + shift,
4286                                       btrfs_leaf_data(buf) + offset,
4287                                       btrfs_item_size_nr(buf, i));
4288                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4289                                       offset + shift);
4290                 btrfs_mark_buffer_dirty(buf);
4291         }
4292
4293         /*
4294          * We may have moved things, in which case we want to exit so we don't
4295          * write those changes out.  Once we have proper abort functionality in
4296          * progs this can be changed to something nicer.
4297          */
4298         BUG_ON(ret);
4299         return ret;
4300 }
4301
4302 /*
4303  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4304  * then just return -EIO.
4305  */
4306 static int try_to_fix_bad_block(struct btrfs_root *root,
4307                                 struct extent_buffer *buf,
4308                                 enum btrfs_tree_block_status status)
4309 {
4310         struct btrfs_trans_handle *trans;
4311         struct ulist *roots;
4312         struct ulist_node *node;
4313         struct btrfs_root *search_root;
4314         struct btrfs_path *path;
4315         struct ulist_iterator iter;
4316         struct btrfs_key root_key, key;
4317         int ret;
4318
4319         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4320             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4321                 return -EIO;
4322
4323         path = btrfs_alloc_path();
4324         if (!path)
4325                 return -EIO;
4326
4327         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4328                                    0, &roots);
4329         if (ret) {
4330                 btrfs_free_path(path);
4331                 return -EIO;
4332         }
4333
4334         ULIST_ITER_INIT(&iter);
4335         while ((node = ulist_next(roots, &iter))) {
4336                 root_key.objectid = node->val;
4337                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4338                 root_key.offset = (u64)-1;
4339
4340                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4341                 if (IS_ERR(root)) {
4342                         ret = -EIO;
4343                         break;
4344                 }
4345
4346
4347                 trans = btrfs_start_transaction(search_root, 0);
4348                 if (IS_ERR(trans)) {
4349                         ret = PTR_ERR(trans);
4350                         break;
4351                 }
4352
4353                 path->lowest_level = btrfs_header_level(buf);
4354                 path->skip_check_block = 1;
4355                 if (path->lowest_level)
4356                         btrfs_node_key_to_cpu(buf, &key, 0);
4357                 else
4358                         btrfs_item_key_to_cpu(buf, &key, 0);
4359                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4360                 if (ret) {
4361                         ret = -EIO;
4362                         btrfs_commit_transaction(trans, search_root);
4363                         break;
4364                 }
4365                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4366                         ret = fix_key_order(trans, search_root, path);
4367                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4368                         ret = fix_item_offset(trans, search_root, path);
4369                 if (ret) {
4370                         btrfs_commit_transaction(trans, search_root);
4371                         break;
4372                 }
4373                 btrfs_release_path(path);
4374                 btrfs_commit_transaction(trans, search_root);
4375         }
4376         ulist_free(roots);
4377         btrfs_free_path(path);
4378         return ret;
4379 }
4380
4381 static int check_block(struct btrfs_root *root,
4382                        struct cache_tree *extent_cache,
4383                        struct extent_buffer *buf, u64 flags)
4384 {
4385         struct extent_record *rec;
4386         struct cache_extent *cache;
4387         struct btrfs_key key;
4388         enum btrfs_tree_block_status status;
4389         int ret = 0;
4390         int level;
4391
4392         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4393         if (!cache)
4394                 return 1;
4395         rec = container_of(cache, struct extent_record, cache);
4396         rec->generation = btrfs_header_generation(buf);
4397
4398         level = btrfs_header_level(buf);
4399         if (btrfs_header_nritems(buf) > 0) {
4400
4401                 if (level == 0)
4402                         btrfs_item_key_to_cpu(buf, &key, 0);
4403                 else
4404                         btrfs_node_key_to_cpu(buf, &key, 0);
4405
4406                 rec->info_objectid = key.objectid;
4407         }
4408         rec->info_level = level;
4409
4410         if (btrfs_is_leaf(buf))
4411                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4412         else
4413                 status = btrfs_check_node(root, &rec->parent_key, buf);
4414
4415         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4416                 if (repair)
4417                         status = try_to_fix_bad_block(root, buf, status);
4418                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4419                         ret = -EIO;
4420                         fprintf(stderr, "bad block %llu\n",
4421                                 (unsigned long long)buf->start);
4422                 } else {
4423                         /*
4424                          * Signal to callers we need to start the scan over
4425                          * again since we'll have cowed blocks.
4426                          */
4427                         ret = -EAGAIN;
4428                 }
4429         } else {
4430                 rec->content_checked = 1;
4431                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4432                         rec->owner_ref_checked = 1;
4433                 else {
4434                         ret = check_owner_ref(root, rec, buf);
4435                         if (!ret)
4436                                 rec->owner_ref_checked = 1;
4437                 }
4438         }
4439         if (!ret)
4440                 maybe_free_extent_rec(extent_cache, rec);
4441         return ret;
4442 }
4443
4444 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4445                                                 u64 parent, u64 root)
4446 {
4447         struct list_head *cur = rec->backrefs.next;
4448         struct extent_backref *node;
4449         struct tree_backref *back;
4450
4451         while(cur != &rec->backrefs) {
4452                 node = to_extent_backref(cur);
4453                 cur = cur->next;
4454                 if (node->is_data)
4455                         continue;
4456                 back = to_tree_backref(node);
4457                 if (parent > 0) {
4458                         if (!node->full_backref)
4459                                 continue;
4460                         if (parent == back->parent)
4461                                 return back;
4462                 } else {
4463                         if (node->full_backref)
4464                                 continue;
4465                         if (back->root == root)
4466                                 return back;
4467                 }
4468         }
4469         return NULL;
4470 }
4471
4472 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4473                                                 u64 parent, u64 root)
4474 {
4475         struct tree_backref *ref = malloc(sizeof(*ref));
4476
4477         if (!ref)
4478                 return NULL;
4479         memset(&ref->node, 0, sizeof(ref->node));
4480         if (parent > 0) {
4481                 ref->parent = parent;
4482                 ref->node.full_backref = 1;
4483         } else {
4484                 ref->root = root;
4485                 ref->node.full_backref = 0;
4486         }
4487         list_add_tail(&ref->node.list, &rec->backrefs);
4488
4489         return ref;
4490 }
4491
4492 static struct data_backref *find_data_backref(struct extent_record *rec,
4493                                                 u64 parent, u64 root,
4494                                                 u64 owner, u64 offset,
4495                                                 int found_ref,
4496                                                 u64 disk_bytenr, u64 bytes)
4497 {
4498         struct list_head *cur = rec->backrefs.next;
4499         struct extent_backref *node;
4500         struct data_backref *back;
4501
4502         while(cur != &rec->backrefs) {
4503                 node = to_extent_backref(cur);
4504                 cur = cur->next;
4505                 if (!node->is_data)
4506                         continue;
4507                 back = to_data_backref(node);
4508                 if (parent > 0) {
4509                         if (!node->full_backref)
4510                                 continue;
4511                         if (parent == back->parent)
4512                                 return back;
4513                 } else {
4514                         if (node->full_backref)
4515                                 continue;
4516                         if (back->root == root && back->owner == owner &&
4517                             back->offset == offset) {
4518                                 if (found_ref && node->found_ref &&
4519                                     (back->bytes != bytes ||
4520                                     back->disk_bytenr != disk_bytenr))
4521                                         continue;
4522                                 return back;
4523                         }
4524                 }
4525         }
4526         return NULL;
4527 }
4528
4529 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4530                                                 u64 parent, u64 root,
4531                                                 u64 owner, u64 offset,
4532                                                 u64 max_size)
4533 {
4534         struct data_backref *ref = malloc(sizeof(*ref));
4535
4536         if (!ref)
4537                 return NULL;
4538         memset(&ref->node, 0, sizeof(ref->node));
4539         ref->node.is_data = 1;
4540
4541         if (parent > 0) {
4542                 ref->parent = parent;
4543                 ref->owner = 0;
4544                 ref->offset = 0;
4545                 ref->node.full_backref = 1;
4546         } else {
4547                 ref->root = root;
4548                 ref->owner = owner;
4549                 ref->offset = offset;
4550                 ref->node.full_backref = 0;
4551         }
4552         ref->bytes = max_size;
4553         ref->found_ref = 0;
4554         ref->num_refs = 0;
4555         list_add_tail(&ref->node.list, &rec->backrefs);
4556         if (max_size > rec->max_size)
4557                 rec->max_size = max_size;
4558         return ref;
4559 }
4560
4561 /* Check if the type of extent matches with its chunk */
4562 static void check_extent_type(struct extent_record *rec)
4563 {
4564         struct btrfs_block_group_cache *bg_cache;
4565
4566         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4567         if (!bg_cache)
4568                 return;
4569
4570         /* data extent, check chunk directly*/
4571         if (!rec->metadata) {
4572                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4573                         rec->wrong_chunk_type = 1;
4574                 return;
4575         }
4576
4577         /* metadata extent, check the obvious case first */
4578         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4579                                  BTRFS_BLOCK_GROUP_METADATA))) {
4580                 rec->wrong_chunk_type = 1;
4581                 return;
4582         }
4583
4584         /*
4585          * Check SYSTEM extent, as it's also marked as metadata, we can only
4586          * make sure it's a SYSTEM extent by its backref
4587          */
4588         if (!list_empty(&rec->backrefs)) {
4589                 struct extent_backref *node;
4590                 struct tree_backref *tback;
4591                 u64 bg_type;
4592
4593                 node = to_extent_backref(rec->backrefs.next);
4594                 if (node->is_data) {
4595                         /* tree block shouldn't have data backref */
4596                         rec->wrong_chunk_type = 1;
4597                         return;
4598                 }
4599                 tback = container_of(node, struct tree_backref, node);
4600
4601                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4602                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4603                 else
4604                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4605                 if (!(bg_cache->flags & bg_type))
4606                         rec->wrong_chunk_type = 1;
4607         }
4608 }
4609
4610 /*
4611  * Allocate a new extent record, fill default values from @tmpl and insert int
4612  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4613  * the cache, otherwise it fails.
4614  */
4615 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4616                 struct extent_record *tmpl)
4617 {
4618         struct extent_record *rec;
4619         int ret = 0;
4620
4621         rec = malloc(sizeof(*rec));
4622         if (!rec)
4623                 return -ENOMEM;
4624         rec->start = tmpl->start;
4625         rec->max_size = tmpl->max_size;
4626         rec->nr = max(tmpl->nr, tmpl->max_size);
4627         rec->found_rec = tmpl->found_rec;
4628         rec->content_checked = tmpl->content_checked;
4629         rec->owner_ref_checked = tmpl->owner_ref_checked;
4630         rec->num_duplicates = 0;
4631         rec->metadata = tmpl->metadata;
4632         rec->flag_block_full_backref = FLAG_UNSET;
4633         rec->bad_full_backref = 0;
4634         rec->crossing_stripes = 0;
4635         rec->wrong_chunk_type = 0;
4636         rec->is_root = tmpl->is_root;
4637         rec->refs = tmpl->refs;
4638         rec->extent_item_refs = tmpl->extent_item_refs;
4639         rec->parent_generation = tmpl->parent_generation;
4640         INIT_LIST_HEAD(&rec->backrefs);
4641         INIT_LIST_HEAD(&rec->dups);
4642         INIT_LIST_HEAD(&rec->list);
4643         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4644         rec->cache.start = tmpl->start;
4645         rec->cache.size = tmpl->nr;
4646         ret = insert_cache_extent(extent_cache, &rec->cache);
4647         if (ret) {
4648                 free(rec);
4649                 return ret;
4650         }
4651         bytes_used += rec->nr;
4652
4653         if (tmpl->metadata)
4654                 rec->crossing_stripes = check_crossing_stripes(global_info,
4655                                 rec->start, global_info->tree_root->nodesize);
4656         check_extent_type(rec);
4657         return ret;
4658 }
4659
4660 /*
4661  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4662  * some are hints:
4663  * - refs              - if found, increase refs
4664  * - is_root           - if found, set
4665  * - content_checked   - if found, set
4666  * - owner_ref_checked - if found, set
4667  *
4668  * If not found, create a new one, initialize and insert.
4669  */
4670 static int add_extent_rec(struct cache_tree *extent_cache,
4671                 struct extent_record *tmpl)
4672 {
4673         struct extent_record *rec;
4674         struct cache_extent *cache;
4675         int ret = 0;
4676         int dup = 0;
4677
4678         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4679         if (cache) {
4680                 rec = container_of(cache, struct extent_record, cache);
4681                 if (tmpl->refs)
4682                         rec->refs++;
4683                 if (rec->nr == 1)
4684                         rec->nr = max(tmpl->nr, tmpl->max_size);
4685
4686                 /*
4687                  * We need to make sure to reset nr to whatever the extent
4688                  * record says was the real size, this way we can compare it to
4689                  * the backrefs.
4690                  */
4691                 if (tmpl->found_rec) {
4692                         if (tmpl->start != rec->start || rec->found_rec) {
4693                                 struct extent_record *tmp;
4694
4695                                 dup = 1;
4696                                 if (list_empty(&rec->list))
4697                                         list_add_tail(&rec->list,
4698                                                       &duplicate_extents);
4699
4700                                 /*
4701                                  * We have to do this song and dance in case we
4702                                  * find an extent record that falls inside of
4703                                  * our current extent record but does not have
4704                                  * the same objectid.
4705                                  */
4706                                 tmp = malloc(sizeof(*tmp));
4707                                 if (!tmp)
4708                                         return -ENOMEM;
4709                                 tmp->start = tmpl->start;
4710                                 tmp->max_size = tmpl->max_size;
4711                                 tmp->nr = tmpl->nr;
4712                                 tmp->found_rec = 1;
4713                                 tmp->metadata = tmpl->metadata;
4714                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4715                                 INIT_LIST_HEAD(&tmp->list);
4716                                 list_add_tail(&tmp->list, &rec->dups);
4717                                 rec->num_duplicates++;
4718                         } else {
4719                                 rec->nr = tmpl->nr;
4720                                 rec->found_rec = 1;
4721                         }
4722                 }
4723
4724                 if (tmpl->extent_item_refs && !dup) {
4725                         if (rec->extent_item_refs) {
4726                                 fprintf(stderr, "block %llu rec "
4727                                         "extent_item_refs %llu, passed %llu\n",
4728                                         (unsigned long long)tmpl->start,
4729                                         (unsigned long long)
4730                                                         rec->extent_item_refs,
4731                                         (unsigned long long)tmpl->extent_item_refs);
4732                         }
4733                         rec->extent_item_refs = tmpl->extent_item_refs;
4734                 }
4735                 if (tmpl->is_root)
4736                         rec->is_root = 1;
4737                 if (tmpl->content_checked)
4738                         rec->content_checked = 1;
4739                 if (tmpl->owner_ref_checked)
4740                         rec->owner_ref_checked = 1;
4741                 memcpy(&rec->parent_key, &tmpl->parent_key,
4742                                 sizeof(tmpl->parent_key));
4743                 if (tmpl->parent_generation)
4744                         rec->parent_generation = tmpl->parent_generation;
4745                 if (rec->max_size < tmpl->max_size)
4746                         rec->max_size = tmpl->max_size;
4747
4748                 /*
4749                  * A metadata extent can't cross stripe_len boundary, otherwise
4750                  * kernel scrub won't be able to handle it.
4751                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4752                  * it.
4753                  */
4754                 if (tmpl->metadata)
4755                         rec->crossing_stripes = check_crossing_stripes(
4756                                         global_info, rec->start,
4757                                         global_info->tree_root->nodesize);
4758                 check_extent_type(rec);
4759                 maybe_free_extent_rec(extent_cache, rec);
4760                 return ret;
4761         }
4762
4763         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4764
4765         return ret;
4766 }
4767
4768 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4769                             u64 parent, u64 root, int found_ref)
4770 {
4771         struct extent_record *rec;
4772         struct tree_backref *back;
4773         struct cache_extent *cache;
4774         int ret;
4775
4776         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4777         if (!cache) {
4778                 struct extent_record tmpl;
4779
4780                 memset(&tmpl, 0, sizeof(tmpl));
4781                 tmpl.start = bytenr;
4782                 tmpl.nr = 1;
4783                 tmpl.metadata = 1;
4784
4785                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4786                 if (ret)
4787                         return ret;
4788
4789                 /* really a bug in cache_extent implement now */
4790                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4791                 if (!cache)
4792                         return -ENOENT;
4793         }
4794
4795         rec = container_of(cache, struct extent_record, cache);
4796         if (rec->start != bytenr) {
4797                 /*
4798                  * Several cause, from unaligned bytenr to over lapping extents
4799                  */
4800                 return -EEXIST;
4801         }
4802
4803         back = find_tree_backref(rec, parent, root);
4804         if (!back) {
4805                 back = alloc_tree_backref(rec, parent, root);
4806                 if (!back)
4807                         return -ENOMEM;
4808         }
4809
4810         if (found_ref) {
4811                 if (back->node.found_ref) {
4812                         fprintf(stderr, "Extent back ref already exists "
4813                                 "for %llu parent %llu root %llu \n",
4814                                 (unsigned long long)bytenr,
4815                                 (unsigned long long)parent,
4816                                 (unsigned long long)root);
4817                 }
4818                 back->node.found_ref = 1;
4819         } else {
4820                 if (back->node.found_extent_tree) {
4821                         fprintf(stderr, "Extent back ref already exists "
4822                                 "for %llu parent %llu root %llu \n",
4823                                 (unsigned long long)bytenr,
4824                                 (unsigned long long)parent,
4825                                 (unsigned long long)root);
4826                 }
4827                 back->node.found_extent_tree = 1;
4828         }
4829         check_extent_type(rec);
4830         maybe_free_extent_rec(extent_cache, rec);
4831         return 0;
4832 }
4833
4834 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4835                             u64 parent, u64 root, u64 owner, u64 offset,
4836                             u32 num_refs, int found_ref, u64 max_size)
4837 {
4838         struct extent_record *rec;
4839         struct data_backref *back;
4840         struct cache_extent *cache;
4841         int ret;
4842
4843         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4844         if (!cache) {
4845                 struct extent_record tmpl;
4846
4847                 memset(&tmpl, 0, sizeof(tmpl));
4848                 tmpl.start = bytenr;
4849                 tmpl.nr = 1;
4850                 tmpl.max_size = max_size;
4851
4852                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4853                 if (ret)
4854                         return ret;
4855
4856                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4857                 if (!cache)
4858                         abort();
4859         }
4860
4861         rec = container_of(cache, struct extent_record, cache);
4862         if (rec->max_size < max_size)
4863                 rec->max_size = max_size;
4864
4865         /*
4866          * If found_ref is set then max_size is the real size and must match the
4867          * existing refs.  So if we have already found a ref then we need to
4868          * make sure that this ref matches the existing one, otherwise we need
4869          * to add a new backref so we can notice that the backrefs don't match
4870          * and we need to figure out who is telling the truth.  This is to
4871          * account for that awful fsync bug I introduced where we'd end up with
4872          * a btrfs_file_extent_item that would have its length include multiple
4873          * prealloc extents or point inside of a prealloc extent.
4874          */
4875         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4876                                  bytenr, max_size);
4877         if (!back) {
4878                 back = alloc_data_backref(rec, parent, root, owner, offset,
4879                                           max_size);
4880                 BUG_ON(!back);
4881         }
4882
4883         if (found_ref) {
4884                 BUG_ON(num_refs != 1);
4885                 if (back->node.found_ref)
4886                         BUG_ON(back->bytes != max_size);
4887                 back->node.found_ref = 1;
4888                 back->found_ref += 1;
4889                 back->bytes = max_size;
4890                 back->disk_bytenr = bytenr;
4891                 rec->refs += 1;
4892                 rec->content_checked = 1;
4893                 rec->owner_ref_checked = 1;
4894         } else {
4895                 if (back->node.found_extent_tree) {
4896                         fprintf(stderr, "Extent back ref already exists "
4897                                 "for %llu parent %llu root %llu "
4898                                 "owner %llu offset %llu num_refs %lu\n",
4899                                 (unsigned long long)bytenr,
4900                                 (unsigned long long)parent,
4901                                 (unsigned long long)root,
4902                                 (unsigned long long)owner,
4903                                 (unsigned long long)offset,
4904                                 (unsigned long)num_refs);
4905                 }
4906                 back->num_refs = num_refs;
4907                 back->node.found_extent_tree = 1;
4908         }
4909         maybe_free_extent_rec(extent_cache, rec);
4910         return 0;
4911 }
4912
4913 static int add_pending(struct cache_tree *pending,
4914                        struct cache_tree *seen, u64 bytenr, u32 size)
4915 {
4916         int ret;
4917         ret = add_cache_extent(seen, bytenr, size);
4918         if (ret)
4919                 return ret;
4920         add_cache_extent(pending, bytenr, size);
4921         return 0;
4922 }
4923
4924 static int pick_next_pending(struct cache_tree *pending,
4925                         struct cache_tree *reada,
4926                         struct cache_tree *nodes,
4927                         u64 last, struct block_info *bits, int bits_nr,
4928                         int *reada_bits)
4929 {
4930         unsigned long node_start = last;
4931         struct cache_extent *cache;
4932         int ret;
4933
4934         cache = search_cache_extent(reada, 0);
4935         if (cache) {
4936                 bits[0].start = cache->start;
4937                 bits[0].size = cache->size;
4938                 *reada_bits = 1;
4939                 return 1;
4940         }
4941         *reada_bits = 0;
4942         if (node_start > 32768)
4943                 node_start -= 32768;
4944
4945         cache = search_cache_extent(nodes, node_start);
4946         if (!cache)
4947                 cache = search_cache_extent(nodes, 0);
4948
4949         if (!cache) {
4950                  cache = search_cache_extent(pending, 0);
4951                  if (!cache)
4952                          return 0;
4953                  ret = 0;
4954                  do {
4955                          bits[ret].start = cache->start;
4956                          bits[ret].size = cache->size;
4957                          cache = next_cache_extent(cache);
4958                          ret++;
4959                  } while (cache && ret < bits_nr);
4960                  return ret;
4961         }
4962
4963         ret = 0;
4964         do {
4965                 bits[ret].start = cache->start;
4966                 bits[ret].size = cache->size;
4967                 cache = next_cache_extent(cache);
4968                 ret++;
4969         } while (cache && ret < bits_nr);
4970
4971         if (bits_nr - ret > 8) {
4972                 u64 lookup = bits[0].start + bits[0].size;
4973                 struct cache_extent *next;
4974                 next = search_cache_extent(pending, lookup);
4975                 while(next) {
4976                         if (next->start - lookup > 32768)
4977                                 break;
4978                         bits[ret].start = next->start;
4979                         bits[ret].size = next->size;
4980                         lookup = next->start + next->size;
4981                         ret++;
4982                         if (ret == bits_nr)
4983                                 break;
4984                         next = next_cache_extent(next);
4985                         if (!next)
4986                                 break;
4987                 }
4988         }
4989         return ret;
4990 }
4991
4992 static void free_chunk_record(struct cache_extent *cache)
4993 {
4994         struct chunk_record *rec;
4995
4996         rec = container_of(cache, struct chunk_record, cache);
4997         list_del_init(&rec->list);
4998         list_del_init(&rec->dextents);
4999         free(rec);
5000 }
5001
5002 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5003 {
5004         cache_tree_free_extents(chunk_cache, free_chunk_record);
5005 }
5006
5007 static void free_device_record(struct rb_node *node)
5008 {
5009         struct device_record *rec;
5010
5011         rec = container_of(node, struct device_record, node);
5012         free(rec);
5013 }
5014
5015 FREE_RB_BASED_TREE(device_cache, free_device_record);
5016
5017 int insert_block_group_record(struct block_group_tree *tree,
5018                               struct block_group_record *bg_rec)
5019 {
5020         int ret;
5021
5022         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5023         if (ret)
5024                 return ret;
5025
5026         list_add_tail(&bg_rec->list, &tree->block_groups);
5027         return 0;
5028 }
5029
5030 static void free_block_group_record(struct cache_extent *cache)
5031 {
5032         struct block_group_record *rec;
5033
5034         rec = container_of(cache, struct block_group_record, cache);
5035         list_del_init(&rec->list);
5036         free(rec);
5037 }
5038
5039 void free_block_group_tree(struct block_group_tree *tree)
5040 {
5041         cache_tree_free_extents(&tree->tree, free_block_group_record);
5042 }
5043
5044 int insert_device_extent_record(struct device_extent_tree *tree,
5045                                 struct device_extent_record *de_rec)
5046 {
5047         int ret;
5048
5049         /*
5050          * Device extent is a bit different from the other extents, because
5051          * the extents which belong to the different devices may have the
5052          * same start and size, so we need use the special extent cache
5053          * search/insert functions.
5054          */
5055         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5056         if (ret)
5057                 return ret;
5058
5059         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5060         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5061         return 0;
5062 }
5063
5064 static void free_device_extent_record(struct cache_extent *cache)
5065 {
5066         struct device_extent_record *rec;
5067
5068         rec = container_of(cache, struct device_extent_record, cache);
5069         if (!list_empty(&rec->chunk_list))
5070                 list_del_init(&rec->chunk_list);
5071         if (!list_empty(&rec->device_list))
5072                 list_del_init(&rec->device_list);
5073         free(rec);
5074 }
5075
5076 void free_device_extent_tree(struct device_extent_tree *tree)
5077 {
5078         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5079 }
5080
5081 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5082 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5083                                  struct extent_buffer *leaf, int slot)
5084 {
5085         struct btrfs_extent_ref_v0 *ref0;
5086         struct btrfs_key key;
5087         int ret;
5088
5089         btrfs_item_key_to_cpu(leaf, &key, slot);
5090         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5091         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5092                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5093                                 0, 0);
5094         } else {
5095                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5096                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5097         }
5098         return ret;
5099 }
5100 #endif
5101
5102 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5103                                             struct btrfs_key *key,
5104                                             int slot)
5105 {
5106         struct btrfs_chunk *ptr;
5107         struct chunk_record *rec;
5108         int num_stripes, i;
5109
5110         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5111         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5112
5113         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5114         if (!rec) {
5115                 fprintf(stderr, "memory allocation failed\n");
5116                 exit(-1);
5117         }
5118
5119         INIT_LIST_HEAD(&rec->list);
5120         INIT_LIST_HEAD(&rec->dextents);
5121         rec->bg_rec = NULL;
5122
5123         rec->cache.start = key->offset;
5124         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5125
5126         rec->generation = btrfs_header_generation(leaf);
5127
5128         rec->objectid = key->objectid;
5129         rec->type = key->type;
5130         rec->offset = key->offset;
5131
5132         rec->length = rec->cache.size;
5133         rec->owner = btrfs_chunk_owner(leaf, ptr);
5134         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5135         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5136         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5137         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5138         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5139         rec->num_stripes = num_stripes;
5140         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5141
5142         for (i = 0; i < rec->num_stripes; ++i) {
5143                 rec->stripes[i].devid =
5144                         btrfs_stripe_devid_nr(leaf, ptr, i);
5145                 rec->stripes[i].offset =
5146                         btrfs_stripe_offset_nr(leaf, ptr, i);
5147                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5148                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5149                                 BTRFS_UUID_SIZE);
5150         }
5151
5152         return rec;
5153 }
5154
5155 static int process_chunk_item(struct cache_tree *chunk_cache,
5156                               struct btrfs_key *key, struct extent_buffer *eb,
5157                               int slot)
5158 {
5159         struct chunk_record *rec;
5160         struct btrfs_chunk *chunk;
5161         int ret = 0;
5162
5163         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5164         /*
5165          * Do extra check for this chunk item,
5166          *
5167          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5168          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5169          * and owner<->key_type check.
5170          */
5171         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5172                                       key->offset);
5173         if (ret < 0) {
5174                 error("chunk(%llu, %llu) is not valid, ignore it",
5175                       key->offset, btrfs_chunk_length(eb, chunk));
5176                 return 0;
5177         }
5178         rec = btrfs_new_chunk_record(eb, key, slot);
5179         ret = insert_cache_extent(chunk_cache, &rec->cache);
5180         if (ret) {
5181                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5182                         rec->offset, rec->length);
5183                 free(rec);
5184         }
5185
5186         return ret;
5187 }
5188
5189 static int process_device_item(struct rb_root *dev_cache,
5190                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5191 {
5192         struct btrfs_dev_item *ptr;
5193         struct device_record *rec;
5194         int ret = 0;
5195
5196         ptr = btrfs_item_ptr(eb,
5197                 slot, struct btrfs_dev_item);
5198
5199         rec = malloc(sizeof(*rec));
5200         if (!rec) {
5201                 fprintf(stderr, "memory allocation failed\n");
5202                 return -ENOMEM;
5203         }
5204
5205         rec->devid = key->offset;
5206         rec->generation = btrfs_header_generation(eb);
5207
5208         rec->objectid = key->objectid;
5209         rec->type = key->type;
5210         rec->offset = key->offset;
5211
5212         rec->devid = btrfs_device_id(eb, ptr);
5213         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5214         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5215
5216         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5217         if (ret) {
5218                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5219                 free(rec);
5220         }
5221
5222         return ret;
5223 }
5224
5225 struct block_group_record *
5226 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5227                              int slot)
5228 {
5229         struct btrfs_block_group_item *ptr;
5230         struct block_group_record *rec;
5231
5232         rec = calloc(1, sizeof(*rec));
5233         if (!rec) {
5234                 fprintf(stderr, "memory allocation failed\n");
5235                 exit(-1);
5236         }
5237
5238         rec->cache.start = key->objectid;
5239         rec->cache.size = key->offset;
5240
5241         rec->generation = btrfs_header_generation(leaf);
5242
5243         rec->objectid = key->objectid;
5244         rec->type = key->type;
5245         rec->offset = key->offset;
5246
5247         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5248         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5249
5250         INIT_LIST_HEAD(&rec->list);
5251
5252         return rec;
5253 }
5254
5255 static int process_block_group_item(struct block_group_tree *block_group_cache,
5256                                     struct btrfs_key *key,
5257                                     struct extent_buffer *eb, int slot)
5258 {
5259         struct block_group_record *rec;
5260         int ret = 0;
5261
5262         rec = btrfs_new_block_group_record(eb, key, slot);
5263         ret = insert_block_group_record(block_group_cache, rec);
5264         if (ret) {
5265                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5266                         rec->objectid, rec->offset);
5267                 free(rec);
5268         }
5269
5270         return ret;
5271 }
5272
5273 struct device_extent_record *
5274 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5275                                struct btrfs_key *key, int slot)
5276 {
5277         struct device_extent_record *rec;
5278         struct btrfs_dev_extent *ptr;
5279
5280         rec = calloc(1, sizeof(*rec));
5281         if (!rec) {
5282                 fprintf(stderr, "memory allocation failed\n");
5283                 exit(-1);
5284         }
5285
5286         rec->cache.objectid = key->objectid;
5287         rec->cache.start = key->offset;
5288
5289         rec->generation = btrfs_header_generation(leaf);
5290
5291         rec->objectid = key->objectid;
5292         rec->type = key->type;
5293         rec->offset = key->offset;
5294
5295         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5296         rec->chunk_objecteid =
5297                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5298         rec->chunk_offset =
5299                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5300         rec->length = btrfs_dev_extent_length(leaf, ptr);
5301         rec->cache.size = rec->length;
5302
5303         INIT_LIST_HEAD(&rec->chunk_list);
5304         INIT_LIST_HEAD(&rec->device_list);
5305
5306         return rec;
5307 }
5308
5309 static int
5310 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5311                            struct btrfs_key *key, struct extent_buffer *eb,
5312                            int slot)
5313 {
5314         struct device_extent_record *rec;
5315         int ret;
5316
5317         rec = btrfs_new_device_extent_record(eb, key, slot);
5318         ret = insert_device_extent_record(dev_extent_cache, rec);
5319         if (ret) {
5320                 fprintf(stderr,
5321                         "Device extent[%llu, %llu, %llu] existed.\n",
5322                         rec->objectid, rec->offset, rec->length);
5323                 free(rec);
5324         }
5325
5326         return ret;
5327 }
5328
5329 static int process_extent_item(struct btrfs_root *root,
5330                                struct cache_tree *extent_cache,
5331                                struct extent_buffer *eb, int slot)
5332 {
5333         struct btrfs_extent_item *ei;
5334         struct btrfs_extent_inline_ref *iref;
5335         struct btrfs_extent_data_ref *dref;
5336         struct btrfs_shared_data_ref *sref;
5337         struct btrfs_key key;
5338         struct extent_record tmpl;
5339         unsigned long end;
5340         unsigned long ptr;
5341         int ret;
5342         int type;
5343         u32 item_size = btrfs_item_size_nr(eb, slot);
5344         u64 refs = 0;
5345         u64 offset;
5346         u64 num_bytes;
5347         int metadata = 0;
5348
5349         btrfs_item_key_to_cpu(eb, &key, slot);
5350
5351         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5352                 metadata = 1;
5353                 num_bytes = root->nodesize;
5354         } else {
5355                 num_bytes = key.offset;
5356         }
5357
5358         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5359                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5360                       key.objectid, root->sectorsize);
5361                 return -EIO;
5362         }
5363         if (item_size < sizeof(*ei)) {
5364 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5365                 struct btrfs_extent_item_v0 *ei0;
5366                 BUG_ON(item_size != sizeof(*ei0));
5367                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5368                 refs = btrfs_extent_refs_v0(eb, ei0);
5369 #else
5370                 BUG();
5371 #endif
5372                 memset(&tmpl, 0, sizeof(tmpl));
5373                 tmpl.start = key.objectid;
5374                 tmpl.nr = num_bytes;
5375                 tmpl.extent_item_refs = refs;
5376                 tmpl.metadata = metadata;
5377                 tmpl.found_rec = 1;
5378                 tmpl.max_size = num_bytes;
5379
5380                 return add_extent_rec(extent_cache, &tmpl);
5381         }
5382
5383         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5384         refs = btrfs_extent_refs(eb, ei);
5385         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5386                 metadata = 1;
5387         else
5388                 metadata = 0;
5389         if (metadata && num_bytes != root->nodesize) {
5390                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5391                       num_bytes, root->nodesize);
5392                 return -EIO;
5393         }
5394         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5395                 error("ignore invalid data extent, length %llu is not aligned to %u",
5396                       num_bytes, root->sectorsize);
5397                 return -EIO;
5398         }
5399
5400         memset(&tmpl, 0, sizeof(tmpl));
5401         tmpl.start = key.objectid;
5402         tmpl.nr = num_bytes;
5403         tmpl.extent_item_refs = refs;
5404         tmpl.metadata = metadata;
5405         tmpl.found_rec = 1;
5406         tmpl.max_size = num_bytes;
5407         add_extent_rec(extent_cache, &tmpl);
5408
5409         ptr = (unsigned long)(ei + 1);
5410         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5411             key.type == BTRFS_EXTENT_ITEM_KEY)
5412                 ptr += sizeof(struct btrfs_tree_block_info);
5413
5414         end = (unsigned long)ei + item_size;
5415         while (ptr < end) {
5416                 iref = (struct btrfs_extent_inline_ref *)ptr;
5417                 type = btrfs_extent_inline_ref_type(eb, iref);
5418                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5419                 switch (type) {
5420                 case BTRFS_TREE_BLOCK_REF_KEY:
5421                         ret = add_tree_backref(extent_cache, key.objectid,
5422                                         0, offset, 0);
5423                         if (ret < 0)
5424                                 error("add_tree_backref failed: %s",
5425                                       strerror(-ret));
5426                         break;
5427                 case BTRFS_SHARED_BLOCK_REF_KEY:
5428                         ret = add_tree_backref(extent_cache, key.objectid,
5429                                         offset, 0, 0);
5430                         if (ret < 0)
5431                                 error("add_tree_backref failed: %s",
5432                                       strerror(-ret));
5433                         break;
5434                 case BTRFS_EXTENT_DATA_REF_KEY:
5435                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5436                         add_data_backref(extent_cache, key.objectid, 0,
5437                                         btrfs_extent_data_ref_root(eb, dref),
5438                                         btrfs_extent_data_ref_objectid(eb,
5439                                                                        dref),
5440                                         btrfs_extent_data_ref_offset(eb, dref),
5441                                         btrfs_extent_data_ref_count(eb, dref),
5442                                         0, num_bytes);
5443                         break;
5444                 case BTRFS_SHARED_DATA_REF_KEY:
5445                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5446                         add_data_backref(extent_cache, key.objectid, offset,
5447                                         0, 0, 0,
5448                                         btrfs_shared_data_ref_count(eb, sref),
5449                                         0, num_bytes);
5450                         break;
5451                 default:
5452                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5453                                 key.objectid, key.type, num_bytes);
5454                         goto out;
5455                 }
5456                 ptr += btrfs_extent_inline_ref_size(type);
5457         }
5458         WARN_ON(ptr > end);
5459 out:
5460         return 0;
5461 }
5462
5463 static int check_cache_range(struct btrfs_root *root,
5464                              struct btrfs_block_group_cache *cache,
5465                              u64 offset, u64 bytes)
5466 {
5467         struct btrfs_free_space *entry;
5468         u64 *logical;
5469         u64 bytenr;
5470         int stripe_len;
5471         int i, nr, ret;
5472
5473         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5474                 bytenr = btrfs_sb_offset(i);
5475                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5476                                        cache->key.objectid, bytenr, 0,
5477                                        &logical, &nr, &stripe_len);
5478                 if (ret)
5479                         return ret;
5480
5481                 while (nr--) {
5482                         if (logical[nr] + stripe_len <= offset)
5483                                 continue;
5484                         if (offset + bytes <= logical[nr])
5485                                 continue;
5486                         if (logical[nr] == offset) {
5487                                 if (stripe_len >= bytes) {
5488                                         free(logical);
5489                                         return 0;
5490                                 }
5491                                 bytes -= stripe_len;
5492                                 offset += stripe_len;
5493                         } else if (logical[nr] < offset) {
5494                                 if (logical[nr] + stripe_len >=
5495                                     offset + bytes) {
5496                                         free(logical);
5497                                         return 0;
5498                                 }
5499                                 bytes = (offset + bytes) -
5500                                         (logical[nr] + stripe_len);
5501                                 offset = logical[nr] + stripe_len;
5502                         } else {
5503                                 /*
5504                                  * Could be tricky, the super may land in the
5505                                  * middle of the area we're checking.  First
5506                                  * check the easiest case, it's at the end.
5507                                  */
5508                                 if (logical[nr] + stripe_len >=
5509                                     bytes + offset) {
5510                                         bytes = logical[nr] - offset;
5511                                         continue;
5512                                 }
5513
5514                                 /* Check the left side */
5515                                 ret = check_cache_range(root, cache,
5516                                                         offset,
5517                                                         logical[nr] - offset);
5518                                 if (ret) {
5519                                         free(logical);
5520                                         return ret;
5521                                 }
5522
5523                                 /* Now we continue with the right side */
5524                                 bytes = (offset + bytes) -
5525                                         (logical[nr] + stripe_len);
5526                                 offset = logical[nr] + stripe_len;
5527                         }
5528                 }
5529
5530                 free(logical);
5531         }
5532
5533         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5534         if (!entry) {
5535                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5536                         offset, offset+bytes);
5537                 return -EINVAL;
5538         }
5539
5540         if (entry->offset != offset) {
5541                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5542                         entry->offset);
5543                 return -EINVAL;
5544         }
5545
5546         if (entry->bytes != bytes) {
5547                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5548                         bytes, entry->bytes, offset);
5549                 return -EINVAL;
5550         }
5551
5552         unlink_free_space(cache->free_space_ctl, entry);
5553         free(entry);
5554         return 0;
5555 }
5556
5557 static int verify_space_cache(struct btrfs_root *root,
5558                               struct btrfs_block_group_cache *cache)
5559 {
5560         struct btrfs_path *path;
5561         struct extent_buffer *leaf;
5562         struct btrfs_key key;
5563         u64 last;
5564         int ret = 0;
5565
5566         path = btrfs_alloc_path();
5567         if (!path)
5568                 return -ENOMEM;
5569
5570         root = root->fs_info->extent_root;
5571
5572         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5573
5574         key.objectid = last;
5575         key.offset = 0;
5576         key.type = BTRFS_EXTENT_ITEM_KEY;
5577
5578         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5579         if (ret < 0)
5580                 goto out;
5581         ret = 0;
5582         while (1) {
5583                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5584                         ret = btrfs_next_leaf(root, path);
5585                         if (ret < 0)
5586                                 goto out;
5587                         if (ret > 0) {
5588                                 ret = 0;
5589                                 break;
5590                         }
5591                 }
5592                 leaf = path->nodes[0];
5593                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5594                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5595                         break;
5596                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5597                     key.type != BTRFS_METADATA_ITEM_KEY) {
5598                         path->slots[0]++;
5599                         continue;
5600                 }
5601
5602                 if (last == key.objectid) {
5603                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5604                                 last = key.objectid + key.offset;
5605                         else
5606                                 last = key.objectid + root->nodesize;
5607                         path->slots[0]++;
5608                         continue;
5609                 }
5610
5611                 ret = check_cache_range(root, cache, last,
5612                                         key.objectid - last);
5613                 if (ret)
5614                         break;
5615                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5616                         last = key.objectid + key.offset;
5617                 else
5618                         last = key.objectid + root->nodesize;
5619                 path->slots[0]++;
5620         }
5621
5622         if (last < cache->key.objectid + cache->key.offset)
5623                 ret = check_cache_range(root, cache, last,
5624                                         cache->key.objectid +
5625                                         cache->key.offset - last);
5626
5627 out:
5628         btrfs_free_path(path);
5629
5630         if (!ret &&
5631             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5632                 fprintf(stderr, "There are still entries left in the space "
5633                         "cache\n");
5634                 ret = -EINVAL;
5635         }
5636
5637         return ret;
5638 }
5639
5640 static int check_space_cache(struct btrfs_root *root)
5641 {
5642         struct btrfs_block_group_cache *cache;
5643         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5644         int ret;
5645         int error = 0;
5646
5647         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5648             btrfs_super_generation(root->fs_info->super_copy) !=
5649             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5650                 printf("cache and super generation don't match, space cache "
5651                        "will be invalidated\n");
5652                 return 0;
5653         }
5654
5655         if (ctx.progress_enabled) {
5656                 ctx.tp = TASK_FREE_SPACE;
5657                 task_start(ctx.info);
5658         }
5659
5660         while (1) {
5661                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5662                 if (!cache)
5663                         break;
5664
5665                 start = cache->key.objectid + cache->key.offset;
5666                 if (!cache->free_space_ctl) {
5667                         if (btrfs_init_free_space_ctl(cache,
5668                                                       root->sectorsize)) {
5669                                 ret = -ENOMEM;
5670                                 break;
5671                         }
5672                 } else {
5673                         btrfs_remove_free_space_cache(cache);
5674                 }
5675
5676                 if (btrfs_fs_compat_ro(root->fs_info,
5677                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5678                         ret = exclude_super_stripes(root, cache);
5679                         if (ret) {
5680                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5681                                         strerror(-ret));
5682                                 error++;
5683                                 continue;
5684                         }
5685                         ret = load_free_space_tree(root->fs_info, cache);
5686                         free_excluded_extents(root, cache);
5687                         if (ret < 0) {
5688                                 fprintf(stderr, "could not load free space tree: %s\n",
5689                                         strerror(-ret));
5690                                 error++;
5691                                 continue;
5692                         }
5693                         error += ret;
5694                 } else {
5695                         ret = load_free_space_cache(root->fs_info, cache);
5696                         if (!ret)
5697                                 continue;
5698                 }
5699
5700                 ret = verify_space_cache(root, cache);
5701                 if (ret) {
5702                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5703                                 cache->key.objectid);
5704                         error++;
5705                 }
5706         }
5707
5708         task_stop(ctx.info);
5709
5710         return error ? -EINVAL : 0;
5711 }
5712
5713 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5714                         u64 num_bytes, unsigned long leaf_offset,
5715                         struct extent_buffer *eb) {
5716
5717         u64 offset = 0;
5718         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5719         char *data;
5720         unsigned long csum_offset;
5721         u32 csum;
5722         u32 csum_expected;
5723         u64 read_len;
5724         u64 data_checked = 0;
5725         u64 tmp;
5726         int ret = 0;
5727         int mirror;
5728         int num_copies;
5729
5730         if (num_bytes % root->sectorsize)
5731                 return -EINVAL;
5732
5733         data = malloc(num_bytes);
5734         if (!data)
5735                 return -ENOMEM;
5736
5737         while (offset < num_bytes) {
5738                 mirror = 0;
5739 again:
5740                 read_len = num_bytes - offset;
5741                 /* read as much space once a time */
5742                 ret = read_extent_data(root, data + offset,
5743                                 bytenr + offset, &read_len, mirror);
5744                 if (ret)
5745                         goto out;
5746                 data_checked = 0;
5747                 /* verify every 4k data's checksum */
5748                 while (data_checked < read_len) {
5749                         csum = ~(u32)0;
5750                         tmp = offset + data_checked;
5751
5752                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5753                                                csum, root->sectorsize);
5754                         btrfs_csum_final(csum, (u8 *)&csum);
5755
5756                         csum_offset = leaf_offset +
5757                                  tmp / root->sectorsize * csum_size;
5758                         read_extent_buffer(eb, (char *)&csum_expected,
5759                                            csum_offset, csum_size);
5760                         /* try another mirror */
5761                         if (csum != csum_expected) {
5762                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5763                                                 mirror, bytenr + tmp,
5764                                                 csum, csum_expected);
5765                                 num_copies = btrfs_num_copies(
5766                                                 &root->fs_info->mapping_tree,
5767                                                 bytenr, num_bytes);
5768                                 if (mirror < num_copies - 1) {
5769                                         mirror += 1;
5770                                         goto again;
5771                                 }
5772                         }
5773                         data_checked += root->sectorsize;
5774                 }
5775                 offset += read_len;
5776         }
5777 out:
5778         free(data);
5779         return ret;
5780 }
5781
5782 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5783                                u64 num_bytes)
5784 {
5785         struct btrfs_path *path;
5786         struct extent_buffer *leaf;
5787         struct btrfs_key key;
5788         int ret;
5789
5790         path = btrfs_alloc_path();
5791         if (!path) {
5792                 fprintf(stderr, "Error allocating path\n");
5793                 return -ENOMEM;
5794         }
5795
5796         key.objectid = bytenr;
5797         key.type = BTRFS_EXTENT_ITEM_KEY;
5798         key.offset = (u64)-1;
5799
5800 again:
5801         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5802                                 0, 0);
5803         if (ret < 0) {
5804                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5805                 btrfs_free_path(path);
5806                 return ret;
5807         } else if (ret) {
5808                 if (path->slots[0] > 0) {
5809                         path->slots[0]--;
5810                 } else {
5811                         ret = btrfs_prev_leaf(root, path);
5812                         if (ret < 0) {
5813                                 goto out;
5814                         } else if (ret > 0) {
5815                                 ret = 0;
5816                                 goto out;
5817                         }
5818                 }
5819         }
5820
5821         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5822
5823         /*
5824          * Block group items come before extent items if they have the same
5825          * bytenr, so walk back one more just in case.  Dear future traveller,
5826          * first congrats on mastering time travel.  Now if it's not too much
5827          * trouble could you go back to 2006 and tell Chris to make the
5828          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5829          * EXTENT_ITEM_KEY please?
5830          */
5831         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5832                 if (path->slots[0] > 0) {
5833                         path->slots[0]--;
5834                 } else {
5835                         ret = btrfs_prev_leaf(root, path);
5836                         if (ret < 0) {
5837                                 goto out;
5838                         } else if (ret > 0) {
5839                                 ret = 0;
5840                                 goto out;
5841                         }
5842                 }
5843                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5844         }
5845
5846         while (num_bytes) {
5847                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5848                         ret = btrfs_next_leaf(root, path);
5849                         if (ret < 0) {
5850                                 fprintf(stderr, "Error going to next leaf "
5851                                         "%d\n", ret);
5852                                 btrfs_free_path(path);
5853                                 return ret;
5854                         } else if (ret) {
5855                                 break;
5856                         }
5857                 }
5858                 leaf = path->nodes[0];
5859                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5860                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5861                         path->slots[0]++;
5862                         continue;
5863                 }
5864                 if (key.objectid + key.offset < bytenr) {
5865                         path->slots[0]++;
5866                         continue;
5867                 }
5868                 if (key.objectid > bytenr + num_bytes)
5869                         break;
5870
5871                 if (key.objectid == bytenr) {
5872                         if (key.offset >= num_bytes) {
5873                                 num_bytes = 0;
5874                                 break;
5875                         }
5876                         num_bytes -= key.offset;
5877                         bytenr += key.offset;
5878                 } else if (key.objectid < bytenr) {
5879                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5880                                 num_bytes = 0;
5881                                 break;
5882                         }
5883                         num_bytes = (bytenr + num_bytes) -
5884                                 (key.objectid + key.offset);
5885                         bytenr = key.objectid + key.offset;
5886                 } else {
5887                         if (key.objectid + key.offset < bytenr + num_bytes) {
5888                                 u64 new_start = key.objectid + key.offset;
5889                                 u64 new_bytes = bytenr + num_bytes - new_start;
5890
5891                                 /*
5892                                  * Weird case, the extent is in the middle of
5893                                  * our range, we'll have to search one side
5894                                  * and then the other.  Not sure if this happens
5895                                  * in real life, but no harm in coding it up
5896                                  * anyway just in case.
5897                                  */
5898                                 btrfs_release_path(path);
5899                                 ret = check_extent_exists(root, new_start,
5900                                                           new_bytes);
5901                                 if (ret) {
5902                                         fprintf(stderr, "Right section didn't "
5903                                                 "have a record\n");
5904                                         break;
5905                                 }
5906                                 num_bytes = key.objectid - bytenr;
5907                                 goto again;
5908                         }
5909                         num_bytes = key.objectid - bytenr;
5910                 }
5911                 path->slots[0]++;
5912         }
5913         ret = 0;
5914
5915 out:
5916         if (num_bytes && !ret) {
5917                 fprintf(stderr, "There are no extents for csum range "
5918                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5919                 ret = 1;
5920         }
5921
5922         btrfs_free_path(path);
5923         return ret;
5924 }
5925
5926 static int check_csums(struct btrfs_root *root)
5927 {
5928         struct btrfs_path *path;
5929         struct extent_buffer *leaf;
5930         struct btrfs_key key;
5931         u64 offset = 0, num_bytes = 0;
5932         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5933         int errors = 0;
5934         int ret;
5935         u64 data_len;
5936         unsigned long leaf_offset;
5937
5938         root = root->fs_info->csum_root;
5939         if (!extent_buffer_uptodate(root->node)) {
5940                 fprintf(stderr, "No valid csum tree found\n");
5941                 return -ENOENT;
5942         }
5943
5944         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5945         key.type = BTRFS_EXTENT_CSUM_KEY;
5946         key.offset = 0;
5947
5948         path = btrfs_alloc_path();
5949         if (!path)
5950                 return -ENOMEM;
5951
5952         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5953         if (ret < 0) {
5954                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5955                 btrfs_free_path(path);
5956                 return ret;
5957         }
5958
5959         if (ret > 0 && path->slots[0])
5960                 path->slots[0]--;
5961         ret = 0;
5962
5963         while (1) {
5964                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5965                         ret = btrfs_next_leaf(root, path);
5966                         if (ret < 0) {
5967                                 fprintf(stderr, "Error going to next leaf "
5968                                         "%d\n", ret);
5969                                 break;
5970                         }
5971                         if (ret)
5972                                 break;
5973                 }
5974                 leaf = path->nodes[0];
5975
5976                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5977                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5978                         path->slots[0]++;
5979                         continue;
5980                 }
5981
5982                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5983                               csum_size) * root->sectorsize;
5984                 if (!check_data_csum)
5985                         goto skip_csum_check;
5986                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5987                 ret = check_extent_csums(root, key.offset, data_len,
5988                                          leaf_offset, leaf);
5989                 if (ret)
5990                         break;
5991 skip_csum_check:
5992                 if (!num_bytes) {
5993                         offset = key.offset;
5994                 } else if (key.offset != offset + num_bytes) {
5995                         ret = check_extent_exists(root, offset, num_bytes);
5996                         if (ret) {
5997                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5998                                         "there is no extent record\n",
5999                                         offset, offset+num_bytes);
6000                                 errors++;
6001                         }
6002                         offset = key.offset;
6003                         num_bytes = 0;
6004                 }
6005                 num_bytes += data_len;
6006                 path->slots[0]++;
6007         }
6008
6009         btrfs_free_path(path);
6010         return errors;
6011 }
6012
6013 static int is_dropped_key(struct btrfs_key *key,
6014                           struct btrfs_key *drop_key) {
6015         if (key->objectid < drop_key->objectid)
6016                 return 1;
6017         else if (key->objectid == drop_key->objectid) {
6018                 if (key->type < drop_key->type)
6019                         return 1;
6020                 else if (key->type == drop_key->type) {
6021                         if (key->offset < drop_key->offset)
6022                                 return 1;
6023                 }
6024         }
6025         return 0;
6026 }
6027
6028 /*
6029  * Here are the rules for FULL_BACKREF.
6030  *
6031  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6032  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6033  *      FULL_BACKREF set.
6034  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6035  *    if it happened after the relocation occurred since we'll have dropped the
6036  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6037  *    have no real way to know for sure.
6038  *
6039  * We process the blocks one root at a time, and we start from the lowest root
6040  * objectid and go to the highest.  So we can just lookup the owner backref for
6041  * the record and if we don't find it then we know it doesn't exist and we have
6042  * a FULL BACKREF.
6043  *
6044  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6045  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6046  * be set or not and then we can check later once we've gathered all the refs.
6047  */
6048 static int calc_extent_flag(struct btrfs_root *root,
6049                            struct cache_tree *extent_cache,
6050                            struct extent_buffer *buf,
6051                            struct root_item_record *ri,
6052                            u64 *flags)
6053 {
6054         struct extent_record *rec;
6055         struct cache_extent *cache;
6056         struct tree_backref *tback;
6057         u64 owner = 0;
6058
6059         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6060         /* we have added this extent before */
6061         if (!cache)
6062                 return -ENOENT;
6063
6064         rec = container_of(cache, struct extent_record, cache);
6065
6066         /*
6067          * Except file/reloc tree, we can not have
6068          * FULL BACKREF MODE
6069          */
6070         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6071                 goto normal;
6072         /*
6073          * root node
6074          */
6075         if (buf->start == ri->bytenr)
6076                 goto normal;
6077
6078         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6079                 goto full_backref;
6080
6081         owner = btrfs_header_owner(buf);
6082         if (owner == ri->objectid)
6083                 goto normal;
6084
6085         tback = find_tree_backref(rec, 0, owner);
6086         if (!tback)
6087                 goto full_backref;
6088 normal:
6089         *flags = 0;
6090         if (rec->flag_block_full_backref != FLAG_UNSET &&
6091             rec->flag_block_full_backref != 0)
6092                 rec->bad_full_backref = 1;
6093         return 0;
6094 full_backref:
6095         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6096         if (rec->flag_block_full_backref != FLAG_UNSET &&
6097             rec->flag_block_full_backref != 1)
6098                 rec->bad_full_backref = 1;
6099         return 0;
6100 }
6101
6102 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6103 {
6104         fprintf(stderr, "Invalid key type(");
6105         print_key_type(stderr, 0, key_type);
6106         fprintf(stderr, ") found in root(");
6107         print_objectid(stderr, rootid, 0);
6108         fprintf(stderr, ")\n");
6109 }
6110
6111 /*
6112  * Check if the key is valid with its extent buffer.
6113  *
6114  * This is a early check in case invalid key exists in a extent buffer
6115  * This is not comprehensive yet, but should prevent wrong key/item passed
6116  * further
6117  */
6118 static int check_type_with_root(u64 rootid, u8 key_type)
6119 {
6120         switch (key_type) {
6121         /* Only valid in chunk tree */
6122         case BTRFS_DEV_ITEM_KEY:
6123         case BTRFS_CHUNK_ITEM_KEY:
6124                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6125                         goto err;
6126                 break;
6127         /* valid in csum and log tree */
6128         case BTRFS_CSUM_TREE_OBJECTID:
6129                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6130                       is_fstree(rootid)))
6131                         goto err;
6132                 break;
6133         case BTRFS_EXTENT_ITEM_KEY:
6134         case BTRFS_METADATA_ITEM_KEY:
6135         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6136                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6137                         goto err;
6138                 break;
6139         case BTRFS_ROOT_ITEM_KEY:
6140                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6141                         goto err;
6142                 break;
6143         case BTRFS_DEV_EXTENT_KEY:
6144                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6145                         goto err;
6146                 break;
6147         }
6148         return 0;
6149 err:
6150         report_mismatch_key_root(key_type, rootid);
6151         return -EINVAL;
6152 }
6153
6154 static int run_next_block(struct btrfs_root *root,
6155                           struct block_info *bits,
6156                           int bits_nr,
6157                           u64 *last,
6158                           struct cache_tree *pending,
6159                           struct cache_tree *seen,
6160                           struct cache_tree *reada,
6161                           struct cache_tree *nodes,
6162                           struct cache_tree *extent_cache,
6163                           struct cache_tree *chunk_cache,
6164                           struct rb_root *dev_cache,
6165                           struct block_group_tree *block_group_cache,
6166                           struct device_extent_tree *dev_extent_cache,
6167                           struct root_item_record *ri)
6168 {
6169         struct extent_buffer *buf;
6170         struct extent_record *rec = NULL;
6171         u64 bytenr;
6172         u32 size;
6173         u64 parent;
6174         u64 owner;
6175         u64 flags;
6176         u64 ptr;
6177         u64 gen = 0;
6178         int ret = 0;
6179         int i;
6180         int nritems;
6181         struct btrfs_key key;
6182         struct cache_extent *cache;
6183         int reada_bits;
6184
6185         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6186                                     bits_nr, &reada_bits);
6187         if (nritems == 0)
6188                 return 1;
6189
6190         if (!reada_bits) {
6191                 for(i = 0; i < nritems; i++) {
6192                         ret = add_cache_extent(reada, bits[i].start,
6193                                                bits[i].size);
6194                         if (ret == -EEXIST)
6195                                 continue;
6196
6197                         /* fixme, get the parent transid */
6198                         readahead_tree_block(root, bits[i].start,
6199                                              bits[i].size, 0);
6200                 }
6201         }
6202         *last = bits[0].start;
6203         bytenr = bits[0].start;
6204         size = bits[0].size;
6205
6206         cache = lookup_cache_extent(pending, bytenr, size);
6207         if (cache) {
6208                 remove_cache_extent(pending, cache);
6209                 free(cache);
6210         }
6211         cache = lookup_cache_extent(reada, bytenr, size);
6212         if (cache) {
6213                 remove_cache_extent(reada, cache);
6214                 free(cache);
6215         }
6216         cache = lookup_cache_extent(nodes, bytenr, size);
6217         if (cache) {
6218                 remove_cache_extent(nodes, cache);
6219                 free(cache);
6220         }
6221         cache = lookup_cache_extent(extent_cache, bytenr, size);
6222         if (cache) {
6223                 rec = container_of(cache, struct extent_record, cache);
6224                 gen = rec->parent_generation;
6225         }
6226
6227         /* fixme, get the real parent transid */
6228         buf = read_tree_block(root, bytenr, size, gen);
6229         if (!extent_buffer_uptodate(buf)) {
6230                 record_bad_block_io(root->fs_info,
6231                                     extent_cache, bytenr, size);
6232                 goto out;
6233         }
6234
6235         nritems = btrfs_header_nritems(buf);
6236
6237         flags = 0;
6238         if (!init_extent_tree) {
6239                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6240                                        btrfs_header_level(buf), 1, NULL,
6241                                        &flags);
6242                 if (ret < 0) {
6243                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6244                         if (ret < 0) {
6245                                 fprintf(stderr, "Couldn't calc extent flags\n");
6246                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6247                         }
6248                 }
6249         } else {
6250                 flags = 0;
6251                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6252                 if (ret < 0) {
6253                         fprintf(stderr, "Couldn't calc extent flags\n");
6254                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6255                 }
6256         }
6257
6258         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6259                 if (ri != NULL &&
6260                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6261                     ri->objectid == btrfs_header_owner(buf)) {
6262                         /*
6263                          * Ok we got to this block from it's original owner and
6264                          * we have FULL_BACKREF set.  Relocation can leave
6265                          * converted blocks over so this is altogether possible,
6266                          * however it's not possible if the generation > the
6267                          * last snapshot, so check for this case.
6268                          */
6269                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6270                             btrfs_header_generation(buf) > ri->last_snapshot) {
6271                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6272                                 rec->bad_full_backref = 1;
6273                         }
6274                 }
6275         } else {
6276                 if (ri != NULL &&
6277                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6278                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6279                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6280                         rec->bad_full_backref = 1;
6281                 }
6282         }
6283
6284         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6285                 rec->flag_block_full_backref = 1;
6286                 parent = bytenr;
6287                 owner = 0;
6288         } else {
6289                 rec->flag_block_full_backref = 0;
6290                 parent = 0;
6291                 owner = btrfs_header_owner(buf);
6292         }
6293
6294         ret = check_block(root, extent_cache, buf, flags);
6295         if (ret)
6296                 goto out;
6297
6298         if (btrfs_is_leaf(buf)) {
6299                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6300                 for (i = 0; i < nritems; i++) {
6301                         struct btrfs_file_extent_item *fi;
6302                         btrfs_item_key_to_cpu(buf, &key, i);
6303                         /*
6304                          * Check key type against the leaf owner.
6305                          * Could filter quite a lot of early error if
6306                          * owner is correct
6307                          */
6308                         if (check_type_with_root(btrfs_header_owner(buf),
6309                                                  key.type)) {
6310                                 fprintf(stderr, "ignoring invalid key\n");
6311                                 continue;
6312                         }
6313                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6314                                 process_extent_item(root, extent_cache, buf,
6315                                                     i);
6316                                 continue;
6317                         }
6318                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6319                                 process_extent_item(root, extent_cache, buf,
6320                                                     i);
6321                                 continue;
6322                         }
6323                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6324                                 total_csum_bytes +=
6325                                         btrfs_item_size_nr(buf, i);
6326                                 continue;
6327                         }
6328                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6329                                 process_chunk_item(chunk_cache, &key, buf, i);
6330                                 continue;
6331                         }
6332                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6333                                 process_device_item(dev_cache, &key, buf, i);
6334                                 continue;
6335                         }
6336                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6337                                 process_block_group_item(block_group_cache,
6338                                         &key, buf, i);
6339                                 continue;
6340                         }
6341                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6342                                 process_device_extent_item(dev_extent_cache,
6343                                         &key, buf, i);
6344                                 continue;
6345
6346                         }
6347                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6348 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6349                                 process_extent_ref_v0(extent_cache, buf, i);
6350 #else
6351                                 BUG();
6352 #endif
6353                                 continue;
6354                         }
6355
6356                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6357                                 ret = add_tree_backref(extent_cache,
6358                                                 key.objectid, 0, key.offset, 0);
6359                                 if (ret < 0)
6360                                         error("add_tree_backref failed: %s",
6361                                               strerror(-ret));
6362                                 continue;
6363                         }
6364                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6365                                 ret = add_tree_backref(extent_cache,
6366                                                 key.objectid, key.offset, 0, 0);
6367                                 if (ret < 0)
6368                                         error("add_tree_backref failed: %s",
6369                                               strerror(-ret));
6370                                 continue;
6371                         }
6372                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6373                                 struct btrfs_extent_data_ref *ref;
6374                                 ref = btrfs_item_ptr(buf, i,
6375                                                 struct btrfs_extent_data_ref);
6376                                 add_data_backref(extent_cache,
6377                                         key.objectid, 0,
6378                                         btrfs_extent_data_ref_root(buf, ref),
6379                                         btrfs_extent_data_ref_objectid(buf,
6380                                                                        ref),
6381                                         btrfs_extent_data_ref_offset(buf, ref),
6382                                         btrfs_extent_data_ref_count(buf, ref),
6383                                         0, root->sectorsize);
6384                                 continue;
6385                         }
6386                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6387                                 struct btrfs_shared_data_ref *ref;
6388                                 ref = btrfs_item_ptr(buf, i,
6389                                                 struct btrfs_shared_data_ref);
6390                                 add_data_backref(extent_cache,
6391                                         key.objectid, key.offset, 0, 0, 0,
6392                                         btrfs_shared_data_ref_count(buf, ref),
6393                                         0, root->sectorsize);
6394                                 continue;
6395                         }
6396                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6397                                 struct bad_item *bad;
6398
6399                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6400                                         continue;
6401                                 if (!owner)
6402                                         continue;
6403                                 bad = malloc(sizeof(struct bad_item));
6404                                 if (!bad)
6405                                         continue;
6406                                 INIT_LIST_HEAD(&bad->list);
6407                                 memcpy(&bad->key, &key,
6408                                        sizeof(struct btrfs_key));
6409                                 bad->root_id = owner;
6410                                 list_add_tail(&bad->list, &delete_items);
6411                                 continue;
6412                         }
6413                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6414                                 continue;
6415                         fi = btrfs_item_ptr(buf, i,
6416                                             struct btrfs_file_extent_item);
6417                         if (btrfs_file_extent_type(buf, fi) ==
6418                             BTRFS_FILE_EXTENT_INLINE)
6419                                 continue;
6420                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6421                                 continue;
6422
6423                         data_bytes_allocated +=
6424                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6425                         if (data_bytes_allocated < root->sectorsize) {
6426                                 abort();
6427                         }
6428                         data_bytes_referenced +=
6429                                 btrfs_file_extent_num_bytes(buf, fi);
6430                         add_data_backref(extent_cache,
6431                                 btrfs_file_extent_disk_bytenr(buf, fi),
6432                                 parent, owner, key.objectid, key.offset -
6433                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6434                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6435                 }
6436         } else {
6437                 int level;
6438                 struct btrfs_key first_key;
6439
6440                 first_key.objectid = 0;
6441
6442                 if (nritems > 0)
6443                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6444                 level = btrfs_header_level(buf);
6445                 for (i = 0; i < nritems; i++) {
6446                         struct extent_record tmpl;
6447
6448                         ptr = btrfs_node_blockptr(buf, i);
6449                         size = root->nodesize;
6450                         btrfs_node_key_to_cpu(buf, &key, i);
6451                         if (ri != NULL) {
6452                                 if ((level == ri->drop_level)
6453                                     && is_dropped_key(&key, &ri->drop_key)) {
6454                                         continue;
6455                                 }
6456                         }
6457
6458                         memset(&tmpl, 0, sizeof(tmpl));
6459                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6460                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6461                         tmpl.start = ptr;
6462                         tmpl.nr = size;
6463                         tmpl.refs = 1;
6464                         tmpl.metadata = 1;
6465                         tmpl.max_size = size;
6466                         ret = add_extent_rec(extent_cache, &tmpl);
6467                         if (ret < 0)
6468                                 goto out;
6469
6470                         ret = add_tree_backref(extent_cache, ptr, parent,
6471                                         owner, 1);
6472                         if (ret < 0) {
6473                                 error("add_tree_backref failed: %s",
6474                                       strerror(-ret));
6475                                 continue;
6476                         }
6477
6478                         if (level > 1) {
6479                                 add_pending(nodes, seen, ptr, size);
6480                         } else {
6481                                 add_pending(pending, seen, ptr, size);
6482                         }
6483                 }
6484                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6485                                       nritems) * sizeof(struct btrfs_key_ptr);
6486         }
6487         total_btree_bytes += buf->len;
6488         if (fs_root_objectid(btrfs_header_owner(buf)))
6489                 total_fs_tree_bytes += buf->len;
6490         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6491                 total_extent_tree_bytes += buf->len;
6492         if (!found_old_backref &&
6493             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6494             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6495             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6496                 found_old_backref = 1;
6497 out:
6498         free_extent_buffer(buf);
6499         return ret;
6500 }
6501
6502 static int add_root_to_pending(struct extent_buffer *buf,
6503                                struct cache_tree *extent_cache,
6504                                struct cache_tree *pending,
6505                                struct cache_tree *seen,
6506                                struct cache_tree *nodes,
6507                                u64 objectid)
6508 {
6509         struct extent_record tmpl;
6510         int ret;
6511
6512         if (btrfs_header_level(buf) > 0)
6513                 add_pending(nodes, seen, buf->start, buf->len);
6514         else
6515                 add_pending(pending, seen, buf->start, buf->len);
6516
6517         memset(&tmpl, 0, sizeof(tmpl));
6518         tmpl.start = buf->start;
6519         tmpl.nr = buf->len;
6520         tmpl.is_root = 1;
6521         tmpl.refs = 1;
6522         tmpl.metadata = 1;
6523         tmpl.max_size = buf->len;
6524         add_extent_rec(extent_cache, &tmpl);
6525
6526         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6527             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6528                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6529                                 0, 1);
6530         else
6531                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6532                                 1);
6533         return ret;
6534 }
6535
6536 /* as we fix the tree, we might be deleting blocks that
6537  * we're tracking for repair.  This hook makes sure we
6538  * remove any backrefs for blocks as we are fixing them.
6539  */
6540 static int free_extent_hook(struct btrfs_trans_handle *trans,
6541                             struct btrfs_root *root,
6542                             u64 bytenr, u64 num_bytes, u64 parent,
6543                             u64 root_objectid, u64 owner, u64 offset,
6544                             int refs_to_drop)
6545 {
6546         struct extent_record *rec;
6547         struct cache_extent *cache;
6548         int is_data;
6549         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6550
6551         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6552         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6553         if (!cache)
6554                 return 0;
6555
6556         rec = container_of(cache, struct extent_record, cache);
6557         if (is_data) {
6558                 struct data_backref *back;
6559                 back = find_data_backref(rec, parent, root_objectid, owner,
6560                                          offset, 1, bytenr, num_bytes);
6561                 if (!back)
6562                         goto out;
6563                 if (back->node.found_ref) {
6564                         back->found_ref -= refs_to_drop;
6565                         if (rec->refs)
6566                                 rec->refs -= refs_to_drop;
6567                 }
6568                 if (back->node.found_extent_tree) {
6569                         back->num_refs -= refs_to_drop;
6570                         if (rec->extent_item_refs)
6571                                 rec->extent_item_refs -= refs_to_drop;
6572                 }
6573                 if (back->found_ref == 0)
6574                         back->node.found_ref = 0;
6575                 if (back->num_refs == 0)
6576                         back->node.found_extent_tree = 0;
6577
6578                 if (!back->node.found_extent_tree && back->node.found_ref) {
6579                         list_del(&back->node.list);
6580                         free(back);
6581                 }
6582         } else {
6583                 struct tree_backref *back;
6584                 back = find_tree_backref(rec, parent, root_objectid);
6585                 if (!back)
6586                         goto out;
6587                 if (back->node.found_ref) {
6588                         if (rec->refs)
6589                                 rec->refs--;
6590                         back->node.found_ref = 0;
6591                 }
6592                 if (back->node.found_extent_tree) {
6593                         if (rec->extent_item_refs)
6594                                 rec->extent_item_refs--;
6595                         back->node.found_extent_tree = 0;
6596                 }
6597                 if (!back->node.found_extent_tree && back->node.found_ref) {
6598                         list_del(&back->node.list);
6599                         free(back);
6600                 }
6601         }
6602         maybe_free_extent_rec(extent_cache, rec);
6603 out:
6604         return 0;
6605 }
6606
6607 static int delete_extent_records(struct btrfs_trans_handle *trans,
6608                                  struct btrfs_root *root,
6609                                  struct btrfs_path *path,
6610                                  u64 bytenr, u64 new_len)
6611 {
6612         struct btrfs_key key;
6613         struct btrfs_key found_key;
6614         struct extent_buffer *leaf;
6615         int ret;
6616         int slot;
6617
6618
6619         key.objectid = bytenr;
6620         key.type = (u8)-1;
6621         key.offset = (u64)-1;
6622
6623         while(1) {
6624                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6625                                         &key, path, 0, 1);
6626                 if (ret < 0)
6627                         break;
6628
6629                 if (ret > 0) {
6630                         ret = 0;
6631                         if (path->slots[0] == 0)
6632                                 break;
6633                         path->slots[0]--;
6634                 }
6635                 ret = 0;
6636
6637                 leaf = path->nodes[0];
6638                 slot = path->slots[0];
6639
6640                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6641                 if (found_key.objectid != bytenr)
6642                         break;
6643
6644                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6645                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6646                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6647                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6648                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6649                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6650                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6651                         btrfs_release_path(path);
6652                         if (found_key.type == 0) {
6653                                 if (found_key.offset == 0)
6654                                         break;
6655                                 key.offset = found_key.offset - 1;
6656                                 key.type = found_key.type;
6657                         }
6658                         key.type = found_key.type - 1;
6659                         key.offset = (u64)-1;
6660                         continue;
6661                 }
6662
6663                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6664                         found_key.objectid, found_key.type, found_key.offset);
6665
6666                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6667                 if (ret)
6668                         break;
6669                 btrfs_release_path(path);
6670
6671                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6672                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6673                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6674                                 found_key.offset : root->nodesize;
6675
6676                         ret = btrfs_update_block_group(trans, root, bytenr,
6677                                                        bytes, 0, 0);
6678                         if (ret)
6679                                 break;
6680                 }
6681         }
6682
6683         btrfs_release_path(path);
6684         return ret;
6685 }
6686
6687 /*
6688  * for a single backref, this will allocate a new extent
6689  * and add the backref to it.
6690  */
6691 static int record_extent(struct btrfs_trans_handle *trans,
6692                          struct btrfs_fs_info *info,
6693                          struct btrfs_path *path,
6694                          struct extent_record *rec,
6695                          struct extent_backref *back,
6696                          int allocated, u64 flags)
6697 {
6698         int ret;
6699         struct btrfs_root *extent_root = info->extent_root;
6700         struct extent_buffer *leaf;
6701         struct btrfs_key ins_key;
6702         struct btrfs_extent_item *ei;
6703         struct tree_backref *tback;
6704         struct data_backref *dback;
6705         struct btrfs_tree_block_info *bi;
6706
6707         if (!back->is_data)
6708                 rec->max_size = max_t(u64, rec->max_size,
6709                                     info->extent_root->nodesize);
6710
6711         if (!allocated) {
6712                 u32 item_size = sizeof(*ei);
6713
6714                 if (!back->is_data)
6715                         item_size += sizeof(*bi);
6716
6717                 ins_key.objectid = rec->start;
6718                 ins_key.offset = rec->max_size;
6719                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6720
6721                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6722                                         &ins_key, item_size);
6723                 if (ret)
6724                         goto fail;
6725
6726                 leaf = path->nodes[0];
6727                 ei = btrfs_item_ptr(leaf, path->slots[0],
6728                                     struct btrfs_extent_item);
6729
6730                 btrfs_set_extent_refs(leaf, ei, 0);
6731                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6732
6733                 if (back->is_data) {
6734                         btrfs_set_extent_flags(leaf, ei,
6735                                                BTRFS_EXTENT_FLAG_DATA);
6736                 } else {
6737                         struct btrfs_disk_key copy_key;;
6738
6739                         tback = to_tree_backref(back);
6740                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6741                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6742                                              sizeof(*bi));
6743
6744                         btrfs_set_disk_key_objectid(&copy_key,
6745                                                     rec->info_objectid);
6746                         btrfs_set_disk_key_type(&copy_key, 0);
6747                         btrfs_set_disk_key_offset(&copy_key, 0);
6748
6749                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6750                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6751
6752                         btrfs_set_extent_flags(leaf, ei,
6753                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6754                 }
6755
6756                 btrfs_mark_buffer_dirty(leaf);
6757                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6758                                                rec->max_size, 1, 0);
6759                 if (ret)
6760                         goto fail;
6761                 btrfs_release_path(path);
6762         }
6763
6764         if (back->is_data) {
6765                 u64 parent;
6766                 int i;
6767
6768                 dback = to_data_backref(back);
6769                 if (back->full_backref)
6770                         parent = dback->parent;
6771                 else
6772                         parent = 0;
6773
6774                 for (i = 0; i < dback->found_ref; i++) {
6775                         /* if parent != 0, we're doing a full backref
6776                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6777                          * just makes the backref allocator create a data
6778                          * backref
6779                          */
6780                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6781                                                    rec->start, rec->max_size,
6782                                                    parent,
6783                                                    dback->root,
6784                                                    parent ?
6785                                                    BTRFS_FIRST_FREE_OBJECTID :
6786                                                    dback->owner,
6787                                                    dback->offset);
6788                         if (ret)
6789                                 break;
6790                 }
6791                 fprintf(stderr, "adding new data backref"
6792                                 " on %llu %s %llu owner %llu"
6793                                 " offset %llu found %d\n",
6794                                 (unsigned long long)rec->start,
6795                                 back->full_backref ?
6796                                 "parent" : "root",
6797                                 back->full_backref ?
6798                                 (unsigned long long)parent :
6799                                 (unsigned long long)dback->root,
6800                                 (unsigned long long)dback->owner,
6801                                 (unsigned long long)dback->offset,
6802                                 dback->found_ref);
6803         } else {
6804                 u64 parent;
6805
6806                 tback = to_tree_backref(back);
6807                 if (back->full_backref)
6808                         parent = tback->parent;
6809                 else
6810                         parent = 0;
6811
6812                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6813                                            rec->start, rec->max_size,
6814                                            parent, tback->root, 0, 0);
6815                 fprintf(stderr, "adding new tree backref on "
6816                         "start %llu len %llu parent %llu root %llu\n",
6817                         rec->start, rec->max_size, parent, tback->root);
6818         }
6819 fail:
6820         btrfs_release_path(path);
6821         return ret;
6822 }
6823
6824 static struct extent_entry *find_entry(struct list_head *entries,
6825                                        u64 bytenr, u64 bytes)
6826 {
6827         struct extent_entry *entry = NULL;
6828
6829         list_for_each_entry(entry, entries, list) {
6830                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6831                         return entry;
6832         }
6833
6834         return NULL;
6835 }
6836
6837 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6838 {
6839         struct extent_entry *entry, *best = NULL, *prev = NULL;
6840
6841         list_for_each_entry(entry, entries, list) {
6842                 if (!prev) {
6843                         prev = entry;
6844                         continue;
6845                 }
6846
6847                 /*
6848                  * If there are as many broken entries as entries then we know
6849                  * not to trust this particular entry.
6850                  */
6851                 if (entry->broken == entry->count)
6852                         continue;
6853
6854                 /*
6855                  * If our current entry == best then we can't be sure our best
6856                  * is really the best, so we need to keep searching.
6857                  */
6858                 if (best && best->count == entry->count) {
6859                         prev = entry;
6860                         best = NULL;
6861                         continue;
6862                 }
6863
6864                 /* Prev == entry, not good enough, have to keep searching */
6865                 if (!prev->broken && prev->count == entry->count)
6866                         continue;
6867
6868                 if (!best)
6869                         best = (prev->count > entry->count) ? prev : entry;
6870                 else if (best->count < entry->count)
6871                         best = entry;
6872                 prev = entry;
6873         }
6874
6875         return best;
6876 }
6877
6878 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6879                       struct data_backref *dback, struct extent_entry *entry)
6880 {
6881         struct btrfs_trans_handle *trans;
6882         struct btrfs_root *root;
6883         struct btrfs_file_extent_item *fi;
6884         struct extent_buffer *leaf;
6885         struct btrfs_key key;
6886         u64 bytenr, bytes;
6887         int ret, err;
6888
6889         key.objectid = dback->root;
6890         key.type = BTRFS_ROOT_ITEM_KEY;
6891         key.offset = (u64)-1;
6892         root = btrfs_read_fs_root(info, &key);
6893         if (IS_ERR(root)) {
6894                 fprintf(stderr, "Couldn't find root for our ref\n");
6895                 return -EINVAL;
6896         }
6897
6898         /*
6899          * The backref points to the original offset of the extent if it was
6900          * split, so we need to search down to the offset we have and then walk
6901          * forward until we find the backref we're looking for.
6902          */
6903         key.objectid = dback->owner;
6904         key.type = BTRFS_EXTENT_DATA_KEY;
6905         key.offset = dback->offset;
6906         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6907         if (ret < 0) {
6908                 fprintf(stderr, "Error looking up ref %d\n", ret);
6909                 return ret;
6910         }
6911
6912         while (1) {
6913                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6914                         ret = btrfs_next_leaf(root, path);
6915                         if (ret) {
6916                                 fprintf(stderr, "Couldn't find our ref, next\n");
6917                                 return -EINVAL;
6918                         }
6919                 }
6920                 leaf = path->nodes[0];
6921                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6922                 if (key.objectid != dback->owner ||
6923                     key.type != BTRFS_EXTENT_DATA_KEY) {
6924                         fprintf(stderr, "Couldn't find our ref, search\n");
6925                         return -EINVAL;
6926                 }
6927                 fi = btrfs_item_ptr(leaf, path->slots[0],
6928                                     struct btrfs_file_extent_item);
6929                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6930                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6931
6932                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6933                         break;
6934                 path->slots[0]++;
6935         }
6936
6937         btrfs_release_path(path);
6938
6939         trans = btrfs_start_transaction(root, 1);
6940         if (IS_ERR(trans))
6941                 return PTR_ERR(trans);
6942
6943         /*
6944          * Ok we have the key of the file extent we want to fix, now we can cow
6945          * down to the thing and fix it.
6946          */
6947         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6948         if (ret < 0) {
6949                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6950                         key.objectid, key.type, key.offset, ret);
6951                 goto out;
6952         }
6953         if (ret > 0) {
6954                 fprintf(stderr, "Well that's odd, we just found this key "
6955                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6956                         key.offset);
6957                 ret = -EINVAL;
6958                 goto out;
6959         }
6960         leaf = path->nodes[0];
6961         fi = btrfs_item_ptr(leaf, path->slots[0],
6962                             struct btrfs_file_extent_item);
6963
6964         if (btrfs_file_extent_compression(leaf, fi) &&
6965             dback->disk_bytenr != entry->bytenr) {
6966                 fprintf(stderr, "Ref doesn't match the record start and is "
6967                         "compressed, please take a btrfs-image of this file "
6968                         "system and send it to a btrfs developer so they can "
6969                         "complete this functionality for bytenr %Lu\n",
6970                         dback->disk_bytenr);
6971                 ret = -EINVAL;
6972                 goto out;
6973         }
6974
6975         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6976                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6977         } else if (dback->disk_bytenr > entry->bytenr) {
6978                 u64 off_diff, offset;
6979
6980                 off_diff = dback->disk_bytenr - entry->bytenr;
6981                 offset = btrfs_file_extent_offset(leaf, fi);
6982                 if (dback->disk_bytenr + offset +
6983                     btrfs_file_extent_num_bytes(leaf, fi) >
6984                     entry->bytenr + entry->bytes) {
6985                         fprintf(stderr, "Ref is past the entry end, please "
6986                                 "take a btrfs-image of this file system and "
6987                                 "send it to a btrfs developer, ref %Lu\n",
6988                                 dback->disk_bytenr);
6989                         ret = -EINVAL;
6990                         goto out;
6991                 }
6992                 offset += off_diff;
6993                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6994                 btrfs_set_file_extent_offset(leaf, fi, offset);
6995         } else if (dback->disk_bytenr < entry->bytenr) {
6996                 u64 offset;
6997
6998                 offset = btrfs_file_extent_offset(leaf, fi);
6999                 if (dback->disk_bytenr + offset < entry->bytenr) {
7000                         fprintf(stderr, "Ref is before the entry start, please"
7001                                 " take a btrfs-image of this file system and "
7002                                 "send it to a btrfs developer, ref %Lu\n",
7003                                 dback->disk_bytenr);
7004                         ret = -EINVAL;
7005                         goto out;
7006                 }
7007
7008                 offset += dback->disk_bytenr;
7009                 offset -= entry->bytenr;
7010                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7011                 btrfs_set_file_extent_offset(leaf, fi, offset);
7012         }
7013
7014         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7015
7016         /*
7017          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7018          * only do this if we aren't using compression, otherwise it's a
7019          * trickier case.
7020          */
7021         if (!btrfs_file_extent_compression(leaf, fi))
7022                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7023         else
7024                 printf("ram bytes may be wrong?\n");
7025         btrfs_mark_buffer_dirty(leaf);
7026 out:
7027         err = btrfs_commit_transaction(trans, root);
7028         btrfs_release_path(path);
7029         return ret ? ret : err;
7030 }
7031
7032 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7033                            struct extent_record *rec)
7034 {
7035         struct extent_backref *back;
7036         struct data_backref *dback;
7037         struct extent_entry *entry, *best = NULL;
7038         LIST_HEAD(entries);
7039         int nr_entries = 0;
7040         int broken_entries = 0;
7041         int ret = 0;
7042         short mismatch = 0;
7043
7044         /*
7045          * Metadata is easy and the backrefs should always agree on bytenr and
7046          * size, if not we've got bigger issues.
7047          */
7048         if (rec->metadata)
7049                 return 0;
7050
7051         list_for_each_entry(back, &rec->backrefs, list) {
7052                 if (back->full_backref || !back->is_data)
7053                         continue;
7054
7055                 dback = to_data_backref(back);
7056
7057                 /*
7058                  * We only pay attention to backrefs that we found a real
7059                  * backref for.
7060                  */
7061                 if (dback->found_ref == 0)
7062                         continue;
7063
7064                 /*
7065                  * For now we only catch when the bytes don't match, not the
7066                  * bytenr.  We can easily do this at the same time, but I want
7067                  * to have a fs image to test on before we just add repair
7068                  * functionality willy-nilly so we know we won't screw up the
7069                  * repair.
7070                  */
7071
7072                 entry = find_entry(&entries, dback->disk_bytenr,
7073                                    dback->bytes);
7074                 if (!entry) {
7075                         entry = malloc(sizeof(struct extent_entry));
7076                         if (!entry) {
7077                                 ret = -ENOMEM;
7078                                 goto out;
7079                         }
7080                         memset(entry, 0, sizeof(*entry));
7081                         entry->bytenr = dback->disk_bytenr;
7082                         entry->bytes = dback->bytes;
7083                         list_add_tail(&entry->list, &entries);
7084                         nr_entries++;
7085                 }
7086
7087                 /*
7088                  * If we only have on entry we may think the entries agree when
7089                  * in reality they don't so we have to do some extra checking.
7090                  */
7091                 if (dback->disk_bytenr != rec->start ||
7092                     dback->bytes != rec->nr || back->broken)
7093                         mismatch = 1;
7094
7095                 if (back->broken) {
7096                         entry->broken++;
7097                         broken_entries++;
7098                 }
7099
7100                 entry->count++;
7101         }
7102
7103         /* Yay all the backrefs agree, carry on good sir */
7104         if (nr_entries <= 1 && !mismatch)
7105                 goto out;
7106
7107         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7108                 "%Lu\n", rec->start);
7109
7110         /*
7111          * First we want to see if the backrefs can agree amongst themselves who
7112          * is right, so figure out which one of the entries has the highest
7113          * count.
7114          */
7115         best = find_most_right_entry(&entries);
7116
7117         /*
7118          * Ok so we may have an even split between what the backrefs think, so
7119          * this is where we use the extent ref to see what it thinks.
7120          */
7121         if (!best) {
7122                 entry = find_entry(&entries, rec->start, rec->nr);
7123                 if (!entry && (!broken_entries || !rec->found_rec)) {
7124                         fprintf(stderr, "Backrefs don't agree with each other "
7125                                 "and extent record doesn't agree with anybody,"
7126                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7127                                 rec->start, rec->nr);
7128                         ret = -EINVAL;
7129                         goto out;
7130                 } else if (!entry) {
7131                         /*
7132                          * Ok our backrefs were broken, we'll assume this is the
7133                          * correct value and add an entry for this range.
7134                          */
7135                         entry = malloc(sizeof(struct extent_entry));
7136                         if (!entry) {
7137                                 ret = -ENOMEM;
7138                                 goto out;
7139                         }
7140                         memset(entry, 0, sizeof(*entry));
7141                         entry->bytenr = rec->start;
7142                         entry->bytes = rec->nr;
7143                         list_add_tail(&entry->list, &entries);
7144                         nr_entries++;
7145                 }
7146                 entry->count++;
7147                 best = find_most_right_entry(&entries);
7148                 if (!best) {
7149                         fprintf(stderr, "Backrefs and extent record evenly "
7150                                 "split on who is right, this is going to "
7151                                 "require user input to fix bytenr %Lu bytes "
7152                                 "%Lu\n", rec->start, rec->nr);
7153                         ret = -EINVAL;
7154                         goto out;
7155                 }
7156         }
7157
7158         /*
7159          * I don't think this can happen currently as we'll abort() if we catch
7160          * this case higher up, but in case somebody removes that we still can't
7161          * deal with it properly here yet, so just bail out of that's the case.
7162          */
7163         if (best->bytenr != rec->start) {
7164                 fprintf(stderr, "Extent start and backref starts don't match, "
7165                         "please use btrfs-image on this file system and send "
7166                         "it to a btrfs developer so they can make fsck fix "
7167                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7168                         rec->start, rec->nr);
7169                 ret = -EINVAL;
7170                 goto out;
7171         }
7172
7173         /*
7174          * Ok great we all agreed on an extent record, let's go find the real
7175          * references and fix up the ones that don't match.
7176          */
7177         list_for_each_entry(back, &rec->backrefs, list) {
7178                 if (back->full_backref || !back->is_data)
7179                         continue;
7180
7181                 dback = to_data_backref(back);
7182
7183                 /*
7184                  * Still ignoring backrefs that don't have a real ref attached
7185                  * to them.
7186                  */
7187                 if (dback->found_ref == 0)
7188                         continue;
7189
7190                 if (dback->bytes == best->bytes &&
7191                     dback->disk_bytenr == best->bytenr)
7192                         continue;
7193
7194                 ret = repair_ref(info, path, dback, best);
7195                 if (ret)
7196                         goto out;
7197         }
7198
7199         /*
7200          * Ok we messed with the actual refs, which means we need to drop our
7201          * entire cache and go back and rescan.  I know this is a huge pain and
7202          * adds a lot of extra work, but it's the only way to be safe.  Once all
7203          * the backrefs agree we may not need to do anything to the extent
7204          * record itself.
7205          */
7206         ret = -EAGAIN;
7207 out:
7208         while (!list_empty(&entries)) {
7209                 entry = list_entry(entries.next, struct extent_entry, list);
7210                 list_del_init(&entry->list);
7211                 free(entry);
7212         }
7213         return ret;
7214 }
7215
7216 static int process_duplicates(struct btrfs_root *root,
7217                               struct cache_tree *extent_cache,
7218                               struct extent_record *rec)
7219 {
7220         struct extent_record *good, *tmp;
7221         struct cache_extent *cache;
7222         int ret;
7223
7224         /*
7225          * If we found a extent record for this extent then return, or if we
7226          * have more than one duplicate we are likely going to need to delete
7227          * something.
7228          */
7229         if (rec->found_rec || rec->num_duplicates > 1)
7230                 return 0;
7231
7232         /* Shouldn't happen but just in case */
7233         BUG_ON(!rec->num_duplicates);
7234
7235         /*
7236          * So this happens if we end up with a backref that doesn't match the
7237          * actual extent entry.  So either the backref is bad or the extent
7238          * entry is bad.  Either way we want to have the extent_record actually
7239          * reflect what we found in the extent_tree, so we need to take the
7240          * duplicate out and use that as the extent_record since the only way we
7241          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7242          */
7243         remove_cache_extent(extent_cache, &rec->cache);
7244
7245         good = to_extent_record(rec->dups.next);
7246         list_del_init(&good->list);
7247         INIT_LIST_HEAD(&good->backrefs);
7248         INIT_LIST_HEAD(&good->dups);
7249         good->cache.start = good->start;
7250         good->cache.size = good->nr;
7251         good->content_checked = 0;
7252         good->owner_ref_checked = 0;
7253         good->num_duplicates = 0;
7254         good->refs = rec->refs;
7255         list_splice_init(&rec->backrefs, &good->backrefs);
7256         while (1) {
7257                 cache = lookup_cache_extent(extent_cache, good->start,
7258                                             good->nr);
7259                 if (!cache)
7260                         break;
7261                 tmp = container_of(cache, struct extent_record, cache);
7262
7263                 /*
7264                  * If we find another overlapping extent and it's found_rec is
7265                  * set then it's a duplicate and we need to try and delete
7266                  * something.
7267                  */
7268                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7269                         if (list_empty(&good->list))
7270                                 list_add_tail(&good->list,
7271                                               &duplicate_extents);
7272                         good->num_duplicates += tmp->num_duplicates + 1;
7273                         list_splice_init(&tmp->dups, &good->dups);
7274                         list_del_init(&tmp->list);
7275                         list_add_tail(&tmp->list, &good->dups);
7276                         remove_cache_extent(extent_cache, &tmp->cache);
7277                         continue;
7278                 }
7279
7280                 /*
7281                  * Ok we have another non extent item backed extent rec, so lets
7282                  * just add it to this extent and carry on like we did above.
7283                  */
7284                 good->refs += tmp->refs;
7285                 list_splice_init(&tmp->backrefs, &good->backrefs);
7286                 remove_cache_extent(extent_cache, &tmp->cache);
7287                 free(tmp);
7288         }
7289         ret = insert_cache_extent(extent_cache, &good->cache);
7290         BUG_ON(ret);
7291         free(rec);
7292         return good->num_duplicates ? 0 : 1;
7293 }
7294
7295 static int delete_duplicate_records(struct btrfs_root *root,
7296                                     struct extent_record *rec)
7297 {
7298         struct btrfs_trans_handle *trans;
7299         LIST_HEAD(delete_list);
7300         struct btrfs_path *path;
7301         struct extent_record *tmp, *good, *n;
7302         int nr_del = 0;
7303         int ret = 0, err;
7304         struct btrfs_key key;
7305
7306         path = btrfs_alloc_path();
7307         if (!path) {
7308                 ret = -ENOMEM;
7309                 goto out;
7310         }
7311
7312         good = rec;
7313         /* Find the record that covers all of the duplicates. */
7314         list_for_each_entry(tmp, &rec->dups, list) {
7315                 if (good->start < tmp->start)
7316                         continue;
7317                 if (good->nr > tmp->nr)
7318                         continue;
7319
7320                 if (tmp->start + tmp->nr < good->start + good->nr) {
7321                         fprintf(stderr, "Ok we have overlapping extents that "
7322                                 "aren't completely covered by each other, this "
7323                                 "is going to require more careful thought.  "
7324                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7325                                 tmp->start, tmp->nr, good->start, good->nr);
7326                         abort();
7327                 }
7328                 good = tmp;
7329         }
7330
7331         if (good != rec)
7332                 list_add_tail(&rec->list, &delete_list);
7333
7334         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7335                 if (tmp == good)
7336                         continue;
7337                 list_move_tail(&tmp->list, &delete_list);
7338         }
7339
7340         root = root->fs_info->extent_root;
7341         trans = btrfs_start_transaction(root, 1);
7342         if (IS_ERR(trans)) {
7343                 ret = PTR_ERR(trans);
7344                 goto out;
7345         }
7346
7347         list_for_each_entry(tmp, &delete_list, list) {
7348                 if (tmp->found_rec == 0)
7349                         continue;
7350                 key.objectid = tmp->start;
7351                 key.type = BTRFS_EXTENT_ITEM_KEY;
7352                 key.offset = tmp->nr;
7353
7354                 /* Shouldn't happen but just in case */
7355                 if (tmp->metadata) {
7356                         fprintf(stderr, "Well this shouldn't happen, extent "
7357                                 "record overlaps but is metadata? "
7358                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7359                         abort();
7360                 }
7361
7362                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7363                 if (ret) {
7364                         if (ret > 0)
7365                                 ret = -EINVAL;
7366                         break;
7367                 }
7368                 ret = btrfs_del_item(trans, root, path);
7369                 if (ret)
7370                         break;
7371                 btrfs_release_path(path);
7372                 nr_del++;
7373         }
7374         err = btrfs_commit_transaction(trans, root);
7375         if (err && !ret)
7376                 ret = err;
7377 out:
7378         while (!list_empty(&delete_list)) {
7379                 tmp = to_extent_record(delete_list.next);
7380                 list_del_init(&tmp->list);
7381                 if (tmp == rec)
7382                         continue;
7383                 free(tmp);
7384         }
7385
7386         while (!list_empty(&rec->dups)) {
7387                 tmp = to_extent_record(rec->dups.next);
7388                 list_del_init(&tmp->list);
7389                 free(tmp);
7390         }
7391
7392         btrfs_free_path(path);
7393
7394         if (!ret && !nr_del)
7395                 rec->num_duplicates = 0;
7396
7397         return ret ? ret : nr_del;
7398 }
7399
7400 static int find_possible_backrefs(struct btrfs_fs_info *info,
7401                                   struct btrfs_path *path,
7402                                   struct cache_tree *extent_cache,
7403                                   struct extent_record *rec)
7404 {
7405         struct btrfs_root *root;
7406         struct extent_backref *back;
7407         struct data_backref *dback;
7408         struct cache_extent *cache;
7409         struct btrfs_file_extent_item *fi;
7410         struct btrfs_key key;
7411         u64 bytenr, bytes;
7412         int ret;
7413
7414         list_for_each_entry(back, &rec->backrefs, list) {
7415                 /* Don't care about full backrefs (poor unloved backrefs) */
7416                 if (back->full_backref || !back->is_data)
7417                         continue;
7418
7419                 dback = to_data_backref(back);
7420
7421                 /* We found this one, we don't need to do a lookup */
7422                 if (dback->found_ref)
7423                         continue;
7424
7425                 key.objectid = dback->root;
7426                 key.type = BTRFS_ROOT_ITEM_KEY;
7427                 key.offset = (u64)-1;
7428
7429                 root = btrfs_read_fs_root(info, &key);
7430
7431                 /* No root, definitely a bad ref, skip */
7432                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7433                         continue;
7434                 /* Other err, exit */
7435                 if (IS_ERR(root))
7436                         return PTR_ERR(root);
7437
7438                 key.objectid = dback->owner;
7439                 key.type = BTRFS_EXTENT_DATA_KEY;
7440                 key.offset = dback->offset;
7441                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7442                 if (ret) {
7443                         btrfs_release_path(path);
7444                         if (ret < 0)
7445                                 return ret;
7446                         /* Didn't find it, we can carry on */
7447                         ret = 0;
7448                         continue;
7449                 }
7450
7451                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7452                                     struct btrfs_file_extent_item);
7453                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7454                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7455                 btrfs_release_path(path);
7456                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7457                 if (cache) {
7458                         struct extent_record *tmp;
7459                         tmp = container_of(cache, struct extent_record, cache);
7460
7461                         /*
7462                          * If we found an extent record for the bytenr for this
7463                          * particular backref then we can't add it to our
7464                          * current extent record.  We only want to add backrefs
7465                          * that don't have a corresponding extent item in the
7466                          * extent tree since they likely belong to this record
7467                          * and we need to fix it if it doesn't match bytenrs.
7468                          */
7469                         if  (tmp->found_rec)
7470                                 continue;
7471                 }
7472
7473                 dback->found_ref += 1;
7474                 dback->disk_bytenr = bytenr;
7475                 dback->bytes = bytes;
7476
7477                 /*
7478                  * Set this so the verify backref code knows not to trust the
7479                  * values in this backref.
7480                  */
7481                 back->broken = 1;
7482         }
7483
7484         return 0;
7485 }
7486
7487 /*
7488  * Record orphan data ref into corresponding root.
7489  *
7490  * Return 0 if the extent item contains data ref and recorded.
7491  * Return 1 if the extent item contains no useful data ref
7492  *   On that case, it may contains only shared_dataref or metadata backref
7493  *   or the file extent exists(this should be handled by the extent bytenr
7494  *   recovery routine)
7495  * Return <0 if something goes wrong.
7496  */
7497 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7498                                       struct extent_record *rec)
7499 {
7500         struct btrfs_key key;
7501         struct btrfs_root *dest_root;
7502         struct extent_backref *back;
7503         struct data_backref *dback;
7504         struct orphan_data_extent *orphan;
7505         struct btrfs_path *path;
7506         int recorded_data_ref = 0;
7507         int ret = 0;
7508
7509         if (rec->metadata)
7510                 return 1;
7511         path = btrfs_alloc_path();
7512         if (!path)
7513                 return -ENOMEM;
7514         list_for_each_entry(back, &rec->backrefs, list) {
7515                 if (back->full_backref || !back->is_data ||
7516                     !back->found_extent_tree)
7517                         continue;
7518                 dback = to_data_backref(back);
7519                 if (dback->found_ref)
7520                         continue;
7521                 key.objectid = dback->root;
7522                 key.type = BTRFS_ROOT_ITEM_KEY;
7523                 key.offset = (u64)-1;
7524
7525                 dest_root = btrfs_read_fs_root(fs_info, &key);
7526
7527                 /* For non-exist root we just skip it */
7528                 if (IS_ERR(dest_root) || !dest_root)
7529                         continue;
7530
7531                 key.objectid = dback->owner;
7532                 key.type = BTRFS_EXTENT_DATA_KEY;
7533                 key.offset = dback->offset;
7534
7535                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7536                 btrfs_release_path(path);
7537                 /*
7538                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7539                  * we need to record it for inode/file extent rebuild.
7540                  * For ret > 0, we record it only for file extent rebuild.
7541                  * For ret == 0, the file extent exists but only bytenr
7542                  * mismatch, let the original bytenr fix routine to handle,
7543                  * don't record it.
7544                  */
7545                 if (ret == 0)
7546                         continue;
7547                 ret = 0;
7548                 orphan = malloc(sizeof(*orphan));
7549                 if (!orphan) {
7550                         ret = -ENOMEM;
7551                         goto out;
7552                 }
7553                 INIT_LIST_HEAD(&orphan->list);
7554                 orphan->root = dback->root;
7555                 orphan->objectid = dback->owner;
7556                 orphan->offset = dback->offset;
7557                 orphan->disk_bytenr = rec->cache.start;
7558                 orphan->disk_len = rec->cache.size;
7559                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7560                 recorded_data_ref = 1;
7561         }
7562 out:
7563         btrfs_free_path(path);
7564         if (!ret)
7565                 return !recorded_data_ref;
7566         else
7567                 return ret;
7568 }
7569
7570 /*
7571  * when an incorrect extent item is found, this will delete
7572  * all of the existing entries for it and recreate them
7573  * based on what the tree scan found.
7574  */
7575 static int fixup_extent_refs(struct btrfs_fs_info *info,
7576                              struct cache_tree *extent_cache,
7577                              struct extent_record *rec)
7578 {
7579         struct btrfs_trans_handle *trans = NULL;
7580         int ret;
7581         struct btrfs_path *path;
7582         struct list_head *cur = rec->backrefs.next;
7583         struct cache_extent *cache;
7584         struct extent_backref *back;
7585         int allocated = 0;
7586         u64 flags = 0;
7587
7588         if (rec->flag_block_full_backref)
7589                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7590
7591         path = btrfs_alloc_path();
7592         if (!path)
7593                 return -ENOMEM;
7594
7595         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7596                 /*
7597                  * Sometimes the backrefs themselves are so broken they don't
7598                  * get attached to any meaningful rec, so first go back and
7599                  * check any of our backrefs that we couldn't find and throw
7600                  * them into the list if we find the backref so that
7601                  * verify_backrefs can figure out what to do.
7602                  */
7603                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7604                 if (ret < 0)
7605                         goto out;
7606         }
7607
7608         /* step one, make sure all of the backrefs agree */
7609         ret = verify_backrefs(info, path, rec);
7610         if (ret < 0)
7611                 goto out;
7612
7613         trans = btrfs_start_transaction(info->extent_root, 1);
7614         if (IS_ERR(trans)) {
7615                 ret = PTR_ERR(trans);
7616                 goto out;
7617         }
7618
7619         /* step two, delete all the existing records */
7620         ret = delete_extent_records(trans, info->extent_root, path,
7621                                     rec->start, rec->max_size);
7622
7623         if (ret < 0)
7624                 goto out;
7625
7626         /* was this block corrupt?  If so, don't add references to it */
7627         cache = lookup_cache_extent(info->corrupt_blocks,
7628                                     rec->start, rec->max_size);
7629         if (cache) {
7630                 ret = 0;
7631                 goto out;
7632         }
7633
7634         /* step three, recreate all the refs we did find */
7635         while(cur != &rec->backrefs) {
7636                 back = to_extent_backref(cur);
7637                 cur = cur->next;
7638
7639                 /*
7640                  * if we didn't find any references, don't create a
7641                  * new extent record
7642                  */
7643                 if (!back->found_ref)
7644                         continue;
7645
7646                 rec->bad_full_backref = 0;
7647                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7648                 allocated = 1;
7649
7650                 if (ret)
7651                         goto out;
7652         }
7653 out:
7654         if (trans) {
7655                 int err = btrfs_commit_transaction(trans, info->extent_root);
7656                 if (!ret)
7657                         ret = err;
7658         }
7659
7660         btrfs_free_path(path);
7661         return ret;
7662 }
7663
7664 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7665                               struct extent_record *rec)
7666 {
7667         struct btrfs_trans_handle *trans;
7668         struct btrfs_root *root = fs_info->extent_root;
7669         struct btrfs_path *path;
7670         struct btrfs_extent_item *ei;
7671         struct btrfs_key key;
7672         u64 flags;
7673         int ret = 0;
7674
7675         key.objectid = rec->start;
7676         if (rec->metadata) {
7677                 key.type = BTRFS_METADATA_ITEM_KEY;
7678                 key.offset = rec->info_level;
7679         } else {
7680                 key.type = BTRFS_EXTENT_ITEM_KEY;
7681                 key.offset = rec->max_size;
7682         }
7683
7684         path = btrfs_alloc_path();
7685         if (!path)
7686                 return -ENOMEM;
7687
7688         trans = btrfs_start_transaction(root, 0);
7689         if (IS_ERR(trans)) {
7690                 btrfs_free_path(path);
7691                 return PTR_ERR(trans);
7692         }
7693
7694         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7695         if (ret < 0) {
7696                 btrfs_free_path(path);
7697                 btrfs_commit_transaction(trans, root);
7698                 return ret;
7699         } else if (ret) {
7700                 fprintf(stderr, "Didn't find extent for %llu\n",
7701                         (unsigned long long)rec->start);
7702                 btrfs_free_path(path);
7703                 btrfs_commit_transaction(trans, root);
7704                 return -ENOENT;
7705         }
7706
7707         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7708                             struct btrfs_extent_item);
7709         flags = btrfs_extent_flags(path->nodes[0], ei);
7710         if (rec->flag_block_full_backref) {
7711                 fprintf(stderr, "setting full backref on %llu\n",
7712                         (unsigned long long)key.objectid);
7713                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7714         } else {
7715                 fprintf(stderr, "clearing full backref on %llu\n",
7716                         (unsigned long long)key.objectid);
7717                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7718         }
7719         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7720         btrfs_mark_buffer_dirty(path->nodes[0]);
7721         btrfs_free_path(path);
7722         return btrfs_commit_transaction(trans, root);
7723 }
7724
7725 /* right now we only prune from the extent allocation tree */
7726 static int prune_one_block(struct btrfs_trans_handle *trans,
7727                            struct btrfs_fs_info *info,
7728                            struct btrfs_corrupt_block *corrupt)
7729 {
7730         int ret;
7731         struct btrfs_path path;
7732         struct extent_buffer *eb;
7733         u64 found;
7734         int slot;
7735         int nritems;
7736         int level = corrupt->level + 1;
7737
7738         btrfs_init_path(&path);
7739 again:
7740         /* we want to stop at the parent to our busted block */
7741         path.lowest_level = level;
7742
7743         ret = btrfs_search_slot(trans, info->extent_root,
7744                                 &corrupt->key, &path, -1, 1);
7745
7746         if (ret < 0)
7747                 goto out;
7748
7749         eb = path.nodes[level];
7750         if (!eb) {
7751                 ret = -ENOENT;
7752                 goto out;
7753         }
7754
7755         /*
7756          * hopefully the search gave us the block we want to prune,
7757          * lets try that first
7758          */
7759         slot = path.slots[level];
7760         found =  btrfs_node_blockptr(eb, slot);
7761         if (found == corrupt->cache.start)
7762                 goto del_ptr;
7763
7764         nritems = btrfs_header_nritems(eb);
7765
7766         /* the search failed, lets scan this node and hope we find it */
7767         for (slot = 0; slot < nritems; slot++) {
7768                 found =  btrfs_node_blockptr(eb, slot);
7769                 if (found == corrupt->cache.start)
7770                         goto del_ptr;
7771         }
7772         /*
7773          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7774          * to this block
7775          */
7776         if (eb == info->extent_root->node) {
7777                 ret = -ENOENT;
7778                 goto out;
7779         } else {
7780                 level++;
7781                 btrfs_release_path(&path);
7782                 goto again;
7783         }
7784
7785 del_ptr:
7786         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7787         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7788
7789 out:
7790         btrfs_release_path(&path);
7791         return ret;
7792 }
7793
7794 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7795 {
7796         struct btrfs_trans_handle *trans = NULL;
7797         struct cache_extent *cache;
7798         struct btrfs_corrupt_block *corrupt;
7799
7800         while (1) {
7801                 cache = search_cache_extent(info->corrupt_blocks, 0);
7802                 if (!cache)
7803                         break;
7804                 if (!trans) {
7805                         trans = btrfs_start_transaction(info->extent_root, 1);
7806                         if (IS_ERR(trans))
7807                                 return PTR_ERR(trans);
7808                 }
7809                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7810                 prune_one_block(trans, info, corrupt);
7811                 remove_cache_extent(info->corrupt_blocks, cache);
7812         }
7813         if (trans)
7814                 return btrfs_commit_transaction(trans, info->extent_root);
7815         return 0;
7816 }
7817
7818 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7819 {
7820         struct btrfs_block_group_cache *cache;
7821         u64 start, end;
7822         int ret;
7823
7824         while (1) {
7825                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7826                                             &start, &end, EXTENT_DIRTY);
7827                 if (ret)
7828                         break;
7829                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7830                                    GFP_NOFS);
7831         }
7832
7833         start = 0;
7834         while (1) {
7835                 cache = btrfs_lookup_first_block_group(fs_info, start);
7836                 if (!cache)
7837                         break;
7838                 if (cache->cached)
7839                         cache->cached = 0;
7840                 start = cache->key.objectid + cache->key.offset;
7841         }
7842 }
7843
7844 static int check_extent_refs(struct btrfs_root *root,
7845                              struct cache_tree *extent_cache)
7846 {
7847         struct extent_record *rec;
7848         struct cache_extent *cache;
7849         int err = 0;
7850         int ret = 0;
7851         int fixed = 0;
7852         int had_dups = 0;
7853         int recorded = 0;
7854
7855         if (repair) {
7856                 /*
7857                  * if we're doing a repair, we have to make sure
7858                  * we don't allocate from the problem extents.
7859                  * In the worst case, this will be all the
7860                  * extents in the FS
7861                  */
7862                 cache = search_cache_extent(extent_cache, 0);
7863                 while(cache) {
7864                         rec = container_of(cache, struct extent_record, cache);
7865                         set_extent_dirty(root->fs_info->excluded_extents,
7866                                          rec->start,
7867                                          rec->start + rec->max_size - 1,
7868                                          GFP_NOFS);
7869                         cache = next_cache_extent(cache);
7870                 }
7871
7872                 /* pin down all the corrupted blocks too */
7873                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7874                 while(cache) {
7875                         set_extent_dirty(root->fs_info->excluded_extents,
7876                                          cache->start,
7877                                          cache->start + cache->size - 1,
7878                                          GFP_NOFS);
7879                         cache = next_cache_extent(cache);
7880                 }
7881                 prune_corrupt_blocks(root->fs_info);
7882                 reset_cached_block_groups(root->fs_info);
7883         }
7884
7885         reset_cached_block_groups(root->fs_info);
7886
7887         /*
7888          * We need to delete any duplicate entries we find first otherwise we
7889          * could mess up the extent tree when we have backrefs that actually
7890          * belong to a different extent item and not the weird duplicate one.
7891          */
7892         while (repair && !list_empty(&duplicate_extents)) {
7893                 rec = to_extent_record(duplicate_extents.next);
7894                 list_del_init(&rec->list);
7895
7896                 /* Sometimes we can find a backref before we find an actual
7897                  * extent, so we need to process it a little bit to see if there
7898                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7899                  * if this is a backref screwup.  If we need to delete stuff
7900                  * process_duplicates() will return 0, otherwise it will return
7901                  * 1 and we
7902                  */
7903                 if (process_duplicates(root, extent_cache, rec))
7904                         continue;
7905                 ret = delete_duplicate_records(root, rec);
7906                 if (ret < 0)
7907                         return ret;
7908                 /*
7909                  * delete_duplicate_records will return the number of entries
7910                  * deleted, so if it's greater than 0 then we know we actually
7911                  * did something and we need to remove.
7912                  */
7913                 if (ret)
7914                         had_dups = 1;
7915         }
7916
7917         if (had_dups)
7918                 return -EAGAIN;
7919
7920         while(1) {
7921                 int cur_err = 0;
7922
7923                 fixed = 0;
7924                 recorded = 0;
7925                 cache = search_cache_extent(extent_cache, 0);
7926                 if (!cache)
7927                         break;
7928                 rec = container_of(cache, struct extent_record, cache);
7929                 if (rec->num_duplicates) {
7930                         fprintf(stderr, "extent item %llu has multiple extent "
7931                                 "items\n", (unsigned long long)rec->start);
7932                         err = 1;
7933                         cur_err = 1;
7934                 }
7935
7936                 if (rec->refs != rec->extent_item_refs) {
7937                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7938                                 (unsigned long long)rec->start,
7939                                 (unsigned long long)rec->nr);
7940                         fprintf(stderr, "extent item %llu, found %llu\n",
7941                                 (unsigned long long)rec->extent_item_refs,
7942                                 (unsigned long long)rec->refs);
7943                         ret = record_orphan_data_extents(root->fs_info, rec);
7944                         if (ret < 0)
7945                                 goto repair_abort;
7946                         if (ret == 0) {
7947                                 recorded = 1;
7948                         } else {
7949                                 /*
7950                                  * we can't use the extent to repair file
7951                                  * extent, let the fallback method handle it.
7952                                  */
7953                                 if (!fixed && repair) {
7954                                         ret = fixup_extent_refs(
7955                                                         root->fs_info,
7956                                                         extent_cache, rec);
7957                                         if (ret)
7958                                                 goto repair_abort;
7959                                         fixed = 1;
7960                                 }
7961                         }
7962                         err = 1;
7963                         cur_err = 1;
7964                 }
7965                 if (all_backpointers_checked(rec, 1)) {
7966                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7967                                 (unsigned long long)rec->start,
7968                                 (unsigned long long)rec->nr);
7969
7970                         if (!fixed && !recorded && repair) {
7971                                 ret = fixup_extent_refs(root->fs_info,
7972                                                         extent_cache, rec);
7973                                 if (ret)
7974                                         goto repair_abort;
7975                                 fixed = 1;
7976                         }
7977                         cur_err = 1;
7978                         err = 1;
7979                 }
7980                 if (!rec->owner_ref_checked) {
7981                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7982                                 (unsigned long long)rec->start,
7983                                 (unsigned long long)rec->nr);
7984                         if (!fixed && !recorded && repair) {
7985                                 ret = fixup_extent_refs(root->fs_info,
7986                                                         extent_cache, rec);
7987                                 if (ret)
7988                                         goto repair_abort;
7989                                 fixed = 1;
7990                         }
7991                         err = 1;
7992                         cur_err = 1;
7993                 }
7994                 if (rec->bad_full_backref) {
7995                         fprintf(stderr, "bad full backref, on [%llu]\n",
7996                                 (unsigned long long)rec->start);
7997                         if (repair) {
7998                                 ret = fixup_extent_flags(root->fs_info, rec);
7999                                 if (ret)
8000                                         goto repair_abort;
8001                                 fixed = 1;
8002                         }
8003                         err = 1;
8004                         cur_err = 1;
8005                 }
8006                 /*
8007                  * Although it's not a extent ref's problem, we reuse this
8008                  * routine for error reporting.
8009                  * No repair function yet.
8010                  */
8011                 if (rec->crossing_stripes) {
8012                         fprintf(stderr,
8013                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8014                                 rec->start, rec->start + rec->max_size);
8015                         err = 1;
8016                         cur_err = 1;
8017                 }
8018
8019                 if (rec->wrong_chunk_type) {
8020                         fprintf(stderr,
8021                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8022                                 rec->start, rec->start + rec->max_size);
8023                         err = 1;
8024                         cur_err = 1;
8025                 }
8026
8027                 remove_cache_extent(extent_cache, cache);
8028                 free_all_extent_backrefs(rec);
8029                 if (!init_extent_tree && repair && (!cur_err || fixed))
8030                         clear_extent_dirty(root->fs_info->excluded_extents,
8031                                            rec->start,
8032                                            rec->start + rec->max_size - 1,
8033                                            GFP_NOFS);
8034                 free(rec);
8035         }
8036 repair_abort:
8037         if (repair) {
8038                 if (ret && ret != -EAGAIN) {
8039                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8040                         exit(1);
8041                 } else if (!ret) {
8042                         struct btrfs_trans_handle *trans;
8043
8044                         root = root->fs_info->extent_root;
8045                         trans = btrfs_start_transaction(root, 1);
8046                         if (IS_ERR(trans)) {
8047                                 ret = PTR_ERR(trans);
8048                                 goto repair_abort;
8049                         }
8050
8051                         btrfs_fix_block_accounting(trans, root);
8052                         ret = btrfs_commit_transaction(trans, root);
8053                         if (ret)
8054                                 goto repair_abort;
8055                 }
8056                 if (err)
8057                         fprintf(stderr, "repaired damaged extent references\n");
8058                 return ret;
8059         }
8060         return err;
8061 }
8062
8063 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8064 {
8065         u64 stripe_size;
8066
8067         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8068                 stripe_size = length;
8069                 stripe_size /= num_stripes;
8070         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8071                 stripe_size = length * 2;
8072                 stripe_size /= num_stripes;
8073         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8074                 stripe_size = length;
8075                 stripe_size /= (num_stripes - 1);
8076         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8077                 stripe_size = length;
8078                 stripe_size /= (num_stripes - 2);
8079         } else {
8080                 stripe_size = length;
8081         }
8082         return stripe_size;
8083 }
8084
8085 /*
8086  * Check the chunk with its block group/dev list ref:
8087  * Return 0 if all refs seems valid.
8088  * Return 1 if part of refs seems valid, need later check for rebuild ref
8089  * like missing block group and needs to search extent tree to rebuild them.
8090  * Return -1 if essential refs are missing and unable to rebuild.
8091  */
8092 static int check_chunk_refs(struct chunk_record *chunk_rec,
8093                             struct block_group_tree *block_group_cache,
8094                             struct device_extent_tree *dev_extent_cache,
8095                             int silent)
8096 {
8097         struct cache_extent *block_group_item;
8098         struct block_group_record *block_group_rec;
8099         struct cache_extent *dev_extent_item;
8100         struct device_extent_record *dev_extent_rec;
8101         u64 devid;
8102         u64 offset;
8103         u64 length;
8104         int metadump_v2 = 0;
8105         int i;
8106         int ret = 0;
8107
8108         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8109                                                chunk_rec->offset,
8110                                                chunk_rec->length);
8111         if (block_group_item) {
8112                 block_group_rec = container_of(block_group_item,
8113                                                struct block_group_record,
8114                                                cache);
8115                 if (chunk_rec->length != block_group_rec->offset ||
8116                     chunk_rec->offset != block_group_rec->objectid ||
8117                     (!metadump_v2 &&
8118                      chunk_rec->type_flags != block_group_rec->flags)) {
8119                         if (!silent)
8120                                 fprintf(stderr,
8121                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8122                                         chunk_rec->objectid,
8123                                         chunk_rec->type,
8124                                         chunk_rec->offset,
8125                                         chunk_rec->length,
8126                                         chunk_rec->offset,
8127                                         chunk_rec->type_flags,
8128                                         block_group_rec->objectid,
8129                                         block_group_rec->type,
8130                                         block_group_rec->offset,
8131                                         block_group_rec->offset,
8132                                         block_group_rec->objectid,
8133                                         block_group_rec->flags);
8134                         ret = -1;
8135                 } else {
8136                         list_del_init(&block_group_rec->list);
8137                         chunk_rec->bg_rec = block_group_rec;
8138                 }
8139         } else {
8140                 if (!silent)
8141                         fprintf(stderr,
8142                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8143                                 chunk_rec->objectid,
8144                                 chunk_rec->type,
8145                                 chunk_rec->offset,
8146                                 chunk_rec->length,
8147                                 chunk_rec->offset,
8148                                 chunk_rec->type_flags);
8149                 ret = 1;
8150         }
8151
8152         if (metadump_v2)
8153                 return ret;
8154
8155         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8156                                     chunk_rec->num_stripes);
8157         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8158                 devid = chunk_rec->stripes[i].devid;
8159                 offset = chunk_rec->stripes[i].offset;
8160                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8161                                                        devid, offset, length);
8162                 if (dev_extent_item) {
8163                         dev_extent_rec = container_of(dev_extent_item,
8164                                                 struct device_extent_record,
8165                                                 cache);
8166                         if (dev_extent_rec->objectid != devid ||
8167                             dev_extent_rec->offset != offset ||
8168                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8169                             dev_extent_rec->length != length) {
8170                                 if (!silent)
8171                                         fprintf(stderr,
8172                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8173                                                 chunk_rec->objectid,
8174                                                 chunk_rec->type,
8175                                                 chunk_rec->offset,
8176                                                 chunk_rec->stripes[i].devid,
8177                                                 chunk_rec->stripes[i].offset,
8178                                                 dev_extent_rec->objectid,
8179                                                 dev_extent_rec->offset,
8180                                                 dev_extent_rec->length);
8181                                 ret = -1;
8182                         } else {
8183                                 list_move(&dev_extent_rec->chunk_list,
8184                                           &chunk_rec->dextents);
8185                         }
8186                 } else {
8187                         if (!silent)
8188                                 fprintf(stderr,
8189                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8190                                         chunk_rec->objectid,
8191                                         chunk_rec->type,
8192                                         chunk_rec->offset,
8193                                         chunk_rec->stripes[i].devid,
8194                                         chunk_rec->stripes[i].offset);
8195                         ret = -1;
8196                 }
8197         }
8198         return ret;
8199 }
8200
8201 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8202 int check_chunks(struct cache_tree *chunk_cache,
8203                  struct block_group_tree *block_group_cache,
8204                  struct device_extent_tree *dev_extent_cache,
8205                  struct list_head *good, struct list_head *bad,
8206                  struct list_head *rebuild, int silent)
8207 {
8208         struct cache_extent *chunk_item;
8209         struct chunk_record *chunk_rec;
8210         struct block_group_record *bg_rec;
8211         struct device_extent_record *dext_rec;
8212         int err;
8213         int ret = 0;
8214
8215         chunk_item = first_cache_extent(chunk_cache);
8216         while (chunk_item) {
8217                 chunk_rec = container_of(chunk_item, struct chunk_record,
8218                                          cache);
8219                 err = check_chunk_refs(chunk_rec, block_group_cache,
8220                                        dev_extent_cache, silent);
8221                 if (err < 0)
8222                         ret = err;
8223                 if (err == 0 && good)
8224                         list_add_tail(&chunk_rec->list, good);
8225                 if (err > 0 && rebuild)
8226                         list_add_tail(&chunk_rec->list, rebuild);
8227                 if (err < 0 && bad)
8228                         list_add_tail(&chunk_rec->list, bad);
8229                 chunk_item = next_cache_extent(chunk_item);
8230         }
8231
8232         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8233                 if (!silent)
8234                         fprintf(stderr,
8235                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8236                                 bg_rec->objectid,
8237                                 bg_rec->offset,
8238                                 bg_rec->flags);
8239                 if (!ret)
8240                         ret = 1;
8241         }
8242
8243         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8244                             chunk_list) {
8245                 if (!silent)
8246                         fprintf(stderr,
8247                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8248                                 dext_rec->objectid,
8249                                 dext_rec->offset,
8250                                 dext_rec->length);
8251                 if (!ret)
8252                         ret = 1;
8253         }
8254         return ret;
8255 }
8256
8257
8258 static int check_device_used(struct device_record *dev_rec,
8259                              struct device_extent_tree *dext_cache)
8260 {
8261         struct cache_extent *cache;
8262         struct device_extent_record *dev_extent_rec;
8263         u64 total_byte = 0;
8264
8265         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8266         while (cache) {
8267                 dev_extent_rec = container_of(cache,
8268                                               struct device_extent_record,
8269                                               cache);
8270                 if (dev_extent_rec->objectid != dev_rec->devid)
8271                         break;
8272
8273                 list_del_init(&dev_extent_rec->device_list);
8274                 total_byte += dev_extent_rec->length;
8275                 cache = next_cache_extent(cache);
8276         }
8277
8278         if (total_byte != dev_rec->byte_used) {
8279                 fprintf(stderr,
8280                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8281                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8282                         dev_rec->type, dev_rec->offset);
8283                 return -1;
8284         } else {
8285                 return 0;
8286         }
8287 }
8288
8289 /* check btrfs_dev_item -> btrfs_dev_extent */
8290 static int check_devices(struct rb_root *dev_cache,
8291                          struct device_extent_tree *dev_extent_cache)
8292 {
8293         struct rb_node *dev_node;
8294         struct device_record *dev_rec;
8295         struct device_extent_record *dext_rec;
8296         int err;
8297         int ret = 0;
8298
8299         dev_node = rb_first(dev_cache);
8300         while (dev_node) {
8301                 dev_rec = container_of(dev_node, struct device_record, node);
8302                 err = check_device_used(dev_rec, dev_extent_cache);
8303                 if (err)
8304                         ret = err;
8305
8306                 dev_node = rb_next(dev_node);
8307         }
8308         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8309                             device_list) {
8310                 fprintf(stderr,
8311                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8312                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8313                 if (!ret)
8314                         ret = 1;
8315         }
8316         return ret;
8317 }
8318
8319 static int add_root_item_to_list(struct list_head *head,
8320                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8321                                   u8 level, u8 drop_level,
8322                                   int level_size, struct btrfs_key *drop_key)
8323 {
8324
8325         struct root_item_record *ri_rec;
8326         ri_rec = malloc(sizeof(*ri_rec));
8327         if (!ri_rec)
8328                 return -ENOMEM;
8329         ri_rec->bytenr = bytenr;
8330         ri_rec->objectid = objectid;
8331         ri_rec->level = level;
8332         ri_rec->level_size = level_size;
8333         ri_rec->drop_level = drop_level;
8334         ri_rec->last_snapshot = last_snapshot;
8335         if (drop_key)
8336                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8337         list_add_tail(&ri_rec->list, head);
8338
8339         return 0;
8340 }
8341
8342 static void free_root_item_list(struct list_head *list)
8343 {
8344         struct root_item_record *ri_rec;
8345
8346         while (!list_empty(list)) {
8347                 ri_rec = list_first_entry(list, struct root_item_record,
8348                                           list);
8349                 list_del_init(&ri_rec->list);
8350                 free(ri_rec);
8351         }
8352 }
8353
8354 static int deal_root_from_list(struct list_head *list,
8355                                struct btrfs_root *root,
8356                                struct block_info *bits,
8357                                int bits_nr,
8358                                struct cache_tree *pending,
8359                                struct cache_tree *seen,
8360                                struct cache_tree *reada,
8361                                struct cache_tree *nodes,
8362                                struct cache_tree *extent_cache,
8363                                struct cache_tree *chunk_cache,
8364                                struct rb_root *dev_cache,
8365                                struct block_group_tree *block_group_cache,
8366                                struct device_extent_tree *dev_extent_cache)
8367 {
8368         int ret = 0;
8369         u64 last;
8370
8371         while (!list_empty(list)) {
8372                 struct root_item_record *rec;
8373                 struct extent_buffer *buf;
8374                 rec = list_entry(list->next,
8375                                  struct root_item_record, list);
8376                 last = 0;
8377                 buf = read_tree_block(root->fs_info->tree_root,
8378                                       rec->bytenr, rec->level_size, 0);
8379                 if (!extent_buffer_uptodate(buf)) {
8380                         free_extent_buffer(buf);
8381                         ret = -EIO;
8382                         break;
8383                 }
8384                 ret = add_root_to_pending(buf, extent_cache, pending,
8385                                     seen, nodes, rec->objectid);
8386                 if (ret < 0)
8387                         break;
8388                 /*
8389                  * To rebuild extent tree, we need deal with snapshot
8390                  * one by one, otherwise we deal with node firstly which
8391                  * can maximize readahead.
8392                  */
8393                 while (1) {
8394                         ret = run_next_block(root, bits, bits_nr, &last,
8395                                              pending, seen, reada, nodes,
8396                                              extent_cache, chunk_cache,
8397                                              dev_cache, block_group_cache,
8398                                              dev_extent_cache, rec);
8399                         if (ret != 0)
8400                                 break;
8401                 }
8402                 free_extent_buffer(buf);
8403                 list_del(&rec->list);
8404                 free(rec);
8405                 if (ret < 0)
8406                         break;
8407         }
8408         while (ret >= 0) {
8409                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8410                                      reada, nodes, extent_cache, chunk_cache,
8411                                      dev_cache, block_group_cache,
8412                                      dev_extent_cache, NULL);
8413                 if (ret != 0) {
8414                         if (ret > 0)
8415                                 ret = 0;
8416                         break;
8417                 }
8418         }
8419         return ret;
8420 }
8421
8422 static int check_chunks_and_extents(struct btrfs_root *root)
8423 {
8424         struct rb_root dev_cache;
8425         struct cache_tree chunk_cache;
8426         struct block_group_tree block_group_cache;
8427         struct device_extent_tree dev_extent_cache;
8428         struct cache_tree extent_cache;
8429         struct cache_tree seen;
8430         struct cache_tree pending;
8431         struct cache_tree reada;
8432         struct cache_tree nodes;
8433         struct extent_io_tree excluded_extents;
8434         struct cache_tree corrupt_blocks;
8435         struct btrfs_path path;
8436         struct btrfs_key key;
8437         struct btrfs_key found_key;
8438         int ret, err = 0;
8439         struct block_info *bits;
8440         int bits_nr;
8441         struct extent_buffer *leaf;
8442         int slot;
8443         struct btrfs_root_item ri;
8444         struct list_head dropping_trees;
8445         struct list_head normal_trees;
8446         struct btrfs_root *root1;
8447         u64 objectid;
8448         u32 level_size;
8449         u8 level;
8450
8451         dev_cache = RB_ROOT;
8452         cache_tree_init(&chunk_cache);
8453         block_group_tree_init(&block_group_cache);
8454         device_extent_tree_init(&dev_extent_cache);
8455
8456         cache_tree_init(&extent_cache);
8457         cache_tree_init(&seen);
8458         cache_tree_init(&pending);
8459         cache_tree_init(&nodes);
8460         cache_tree_init(&reada);
8461         cache_tree_init(&corrupt_blocks);
8462         extent_io_tree_init(&excluded_extents);
8463         INIT_LIST_HEAD(&dropping_trees);
8464         INIT_LIST_HEAD(&normal_trees);
8465
8466         if (repair) {
8467                 root->fs_info->excluded_extents = &excluded_extents;
8468                 root->fs_info->fsck_extent_cache = &extent_cache;
8469                 root->fs_info->free_extent_hook = free_extent_hook;
8470                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8471         }
8472
8473         bits_nr = 1024;
8474         bits = malloc(bits_nr * sizeof(struct block_info));
8475         if (!bits) {
8476                 perror("malloc");
8477                 exit(1);
8478         }
8479
8480         if (ctx.progress_enabled) {
8481                 ctx.tp = TASK_EXTENTS;
8482                 task_start(ctx.info);
8483         }
8484
8485 again:
8486         root1 = root->fs_info->tree_root;
8487         level = btrfs_header_level(root1->node);
8488         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8489                                     root1->node->start, 0, level, 0,
8490                                     root1->nodesize, NULL);
8491         if (ret < 0)
8492                 goto out;
8493         root1 = root->fs_info->chunk_root;
8494         level = btrfs_header_level(root1->node);
8495         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8496                                     root1->node->start, 0, level, 0,
8497                                     root1->nodesize, NULL);
8498         if (ret < 0)
8499                 goto out;
8500         btrfs_init_path(&path);
8501         key.offset = 0;
8502         key.objectid = 0;
8503         key.type = BTRFS_ROOT_ITEM_KEY;
8504         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8505                                         &key, &path, 0, 0);
8506         if (ret < 0)
8507                 goto out;
8508         while(1) {
8509                 leaf = path.nodes[0];
8510                 slot = path.slots[0];
8511                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8512                         ret = btrfs_next_leaf(root, &path);
8513                         if (ret != 0)
8514                                 break;
8515                         leaf = path.nodes[0];
8516                         slot = path.slots[0];
8517                 }
8518                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8519                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8520                         unsigned long offset;
8521                         u64 last_snapshot;
8522
8523                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8524                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8525                         last_snapshot = btrfs_root_last_snapshot(&ri);
8526                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8527                                 level = btrfs_root_level(&ri);
8528                                 level_size = root->nodesize;
8529                                 ret = add_root_item_to_list(&normal_trees,
8530                                                 found_key.objectid,
8531                                                 btrfs_root_bytenr(&ri),
8532                                                 last_snapshot, level,
8533                                                 0, level_size, NULL);
8534                                 if (ret < 0)
8535                                         goto out;
8536                         } else {
8537                                 level = btrfs_root_level(&ri);
8538                                 level_size = root->nodesize;
8539                                 objectid = found_key.objectid;
8540                                 btrfs_disk_key_to_cpu(&found_key,
8541                                                       &ri.drop_progress);
8542                                 ret = add_root_item_to_list(&dropping_trees,
8543                                                 objectid,
8544                                                 btrfs_root_bytenr(&ri),
8545                                                 last_snapshot, level,
8546                                                 ri.drop_level,
8547                                                 level_size, &found_key);
8548                                 if (ret < 0)
8549                                         goto out;
8550                         }
8551                 }
8552                 path.slots[0]++;
8553         }
8554         btrfs_release_path(&path);
8555
8556         /*
8557          * check_block can return -EAGAIN if it fixes something, please keep
8558          * this in mind when dealing with return values from these functions, if
8559          * we get -EAGAIN we want to fall through and restart the loop.
8560          */
8561         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8562                                   &seen, &reada, &nodes, &extent_cache,
8563                                   &chunk_cache, &dev_cache, &block_group_cache,
8564                                   &dev_extent_cache);
8565         if (ret < 0) {
8566                 if (ret == -EAGAIN)
8567                         goto loop;
8568                 goto out;
8569         }
8570         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8571                                   &pending, &seen, &reada, &nodes,
8572                                   &extent_cache, &chunk_cache, &dev_cache,
8573                                   &block_group_cache, &dev_extent_cache);
8574         if (ret < 0) {
8575                 if (ret == -EAGAIN)
8576                         goto loop;
8577                 goto out;
8578         }
8579
8580         ret = check_chunks(&chunk_cache, &block_group_cache,
8581                            &dev_extent_cache, NULL, NULL, NULL, 0);
8582         if (ret) {
8583                 if (ret == -EAGAIN)
8584                         goto loop;
8585                 err = ret;
8586         }
8587
8588         ret = check_extent_refs(root, &extent_cache);
8589         if (ret < 0) {
8590                 if (ret == -EAGAIN)
8591                         goto loop;
8592                 goto out;
8593         }
8594
8595         ret = check_devices(&dev_cache, &dev_extent_cache);
8596         if (ret && err)
8597                 ret = err;
8598
8599 out:
8600         task_stop(ctx.info);
8601         if (repair) {
8602                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8603                 extent_io_tree_cleanup(&excluded_extents);
8604                 root->fs_info->fsck_extent_cache = NULL;
8605                 root->fs_info->free_extent_hook = NULL;
8606                 root->fs_info->corrupt_blocks = NULL;
8607                 root->fs_info->excluded_extents = NULL;
8608         }
8609         free(bits);
8610         free_chunk_cache_tree(&chunk_cache);
8611         free_device_cache_tree(&dev_cache);
8612         free_block_group_tree(&block_group_cache);
8613         free_device_extent_tree(&dev_extent_cache);
8614         free_extent_cache_tree(&seen);
8615         free_extent_cache_tree(&pending);
8616         free_extent_cache_tree(&reada);
8617         free_extent_cache_tree(&nodes);
8618         return ret;
8619 loop:
8620         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8621         free_extent_cache_tree(&seen);
8622         free_extent_cache_tree(&pending);
8623         free_extent_cache_tree(&reada);
8624         free_extent_cache_tree(&nodes);
8625         free_chunk_cache_tree(&chunk_cache);
8626         free_block_group_tree(&block_group_cache);
8627         free_device_cache_tree(&dev_cache);
8628         free_device_extent_tree(&dev_extent_cache);
8629         free_extent_record_cache(root->fs_info, &extent_cache);
8630         free_root_item_list(&normal_trees);
8631         free_root_item_list(&dropping_trees);
8632         extent_io_tree_cleanup(&excluded_extents);
8633         goto again;
8634 }
8635
8636 /*
8637  * Check backrefs of a tree block given by @bytenr or @eb.
8638  *
8639  * @root:       the root containing the @bytenr or @eb
8640  * @eb:         tree block extent buffer, can be NULL
8641  * @bytenr:     bytenr of the tree block to search
8642  * @level:      tree level of the tree block
8643  * @owner:      owner of the tree block
8644  *
8645  * Return >0 for any error found and output error message
8646  * Return 0 for no error found
8647  */
8648 static int check_tree_block_ref(struct btrfs_root *root,
8649                                 struct extent_buffer *eb, u64 bytenr,
8650                                 int level, u64 owner)
8651 {
8652         struct btrfs_key key;
8653         struct btrfs_root *extent_root = root->fs_info->extent_root;
8654         struct btrfs_path path;
8655         struct btrfs_extent_item *ei;
8656         struct btrfs_extent_inline_ref *iref;
8657         struct extent_buffer *leaf;
8658         unsigned long end;
8659         unsigned long ptr;
8660         int slot;
8661         int skinny_level;
8662         int type;
8663         u32 nodesize = root->nodesize;
8664         u32 item_size;
8665         u64 offset;
8666         int found_ref = 0;
8667         int err = 0;
8668         int ret;
8669
8670         btrfs_init_path(&path);
8671         key.objectid = bytenr;
8672         if (btrfs_fs_incompat(root->fs_info,
8673                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8674                 key.type = BTRFS_METADATA_ITEM_KEY;
8675         else
8676                 key.type = BTRFS_EXTENT_ITEM_KEY;
8677         key.offset = (u64)-1;
8678
8679         /* Search for the backref in extent tree */
8680         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8681         if (ret < 0) {
8682                 err |= BACKREF_MISSING;
8683                 goto out;
8684         }
8685         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8686         if (ret) {
8687                 err |= BACKREF_MISSING;
8688                 goto out;
8689         }
8690
8691         leaf = path.nodes[0];
8692         slot = path.slots[0];
8693         btrfs_item_key_to_cpu(leaf, &key, slot);
8694
8695         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8696
8697         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8698                 skinny_level = (int)key.offset;
8699                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8700         } else {
8701                 struct btrfs_tree_block_info *info;
8702
8703                 info = (struct btrfs_tree_block_info *)(ei + 1);
8704                 skinny_level = btrfs_tree_block_level(leaf, info);
8705                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8706         }
8707
8708         if (eb) {
8709                 u64 header_gen;
8710                 u64 extent_gen;
8711
8712                 if (!(btrfs_extent_flags(leaf, ei) &
8713                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8714                         error(
8715                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8716                                 key.objectid, nodesize,
8717                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8718                         err = BACKREF_MISMATCH;
8719                 }
8720                 header_gen = btrfs_header_generation(eb);
8721                 extent_gen = btrfs_extent_generation(leaf, ei);
8722                 if (header_gen != extent_gen) {
8723                         error(
8724         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8725                                 key.objectid, nodesize, header_gen,
8726                                 extent_gen);
8727                         err = BACKREF_MISMATCH;
8728                 }
8729                 if (level != skinny_level) {
8730                         error(
8731                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8732                                 key.objectid, nodesize, level, skinny_level);
8733                         err = BACKREF_MISMATCH;
8734                 }
8735                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8736                         error(
8737                         "extent[%llu %u] is referred by other roots than %llu",
8738                                 key.objectid, nodesize, root->objectid);
8739                         err = BACKREF_MISMATCH;
8740                 }
8741         }
8742
8743         /*
8744          * Iterate the extent/metadata item to find the exact backref
8745          */
8746         item_size = btrfs_item_size_nr(leaf, slot);
8747         ptr = (unsigned long)iref;
8748         end = (unsigned long)ei + item_size;
8749         while (ptr < end) {
8750                 iref = (struct btrfs_extent_inline_ref *)ptr;
8751                 type = btrfs_extent_inline_ref_type(leaf, iref);
8752                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8753
8754                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8755                         (offset == root->objectid || offset == owner)) {
8756                         found_ref = 1;
8757                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8758                         /* Check if the backref points to valid referencer */
8759                         found_ref = !check_tree_block_ref(root, NULL, offset,
8760                                                           level + 1, owner);
8761                 }
8762
8763                 if (found_ref)
8764                         break;
8765                 ptr += btrfs_extent_inline_ref_size(type);
8766         }
8767
8768         /*
8769          * Inlined extent item doesn't have what we need, check
8770          * TREE_BLOCK_REF_KEY
8771          */
8772         if (!found_ref) {
8773                 btrfs_release_path(&path);
8774                 key.objectid = bytenr;
8775                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8776                 key.offset = root->objectid;
8777
8778                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8779                 if (!ret)
8780                         found_ref = 1;
8781         }
8782         if (!found_ref)
8783                 err |= BACKREF_MISSING;
8784 out:
8785         btrfs_release_path(&path);
8786         if (eb && (err & BACKREF_MISSING))
8787                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8788                         bytenr, nodesize, owner, level);
8789         return err;
8790 }
8791
8792 /*
8793  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8794  *
8795  * Return >0 any error found and output error message
8796  * Return 0 for no error found
8797  */
8798 static int check_extent_data_item(struct btrfs_root *root,
8799                                   struct extent_buffer *eb, int slot)
8800 {
8801         struct btrfs_file_extent_item *fi;
8802         struct btrfs_path path;
8803         struct btrfs_root *extent_root = root->fs_info->extent_root;
8804         struct btrfs_key fi_key;
8805         struct btrfs_key dbref_key;
8806         struct extent_buffer *leaf;
8807         struct btrfs_extent_item *ei;
8808         struct btrfs_extent_inline_ref *iref;
8809         struct btrfs_extent_data_ref *dref;
8810         u64 owner;
8811         u64 file_extent_gen;
8812         u64 disk_bytenr;
8813         u64 disk_num_bytes;
8814         u64 extent_num_bytes;
8815         u64 extent_flags;
8816         u64 extent_gen;
8817         u32 item_size;
8818         unsigned long end;
8819         unsigned long ptr;
8820         int type;
8821         u64 ref_root;
8822         int found_dbackref = 0;
8823         int err = 0;
8824         int ret;
8825
8826         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8827         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8828         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8829
8830         /* Nothing to check for hole and inline data extents */
8831         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8832             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8833                 return 0;
8834
8835         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8836         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8837         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8838
8839         /* Check unaligned disk_num_bytes and num_bytes */
8840         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8841                 error(
8842 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8843                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8844                         root->sectorsize);
8845                 err |= BYTES_UNALIGNED;
8846         } else {
8847                 data_bytes_allocated += disk_num_bytes;
8848         }
8849         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8850                 error(
8851 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8852                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8853                         root->sectorsize);
8854                 err |= BYTES_UNALIGNED;
8855         } else {
8856                 data_bytes_referenced += extent_num_bytes;
8857         }
8858         owner = btrfs_header_owner(eb);
8859
8860         /* Check the extent item of the file extent in extent tree */
8861         btrfs_init_path(&path);
8862         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8863         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8864         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8865
8866         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8867         if (ret) {
8868                 err |= BACKREF_MISSING;
8869                 goto error;
8870         }
8871
8872         leaf = path.nodes[0];
8873         slot = path.slots[0];
8874         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8875
8876         extent_flags = btrfs_extent_flags(leaf, ei);
8877         extent_gen = btrfs_extent_generation(leaf, ei);
8878
8879         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8880                 error(
8881                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8882                     disk_bytenr, disk_num_bytes,
8883                     BTRFS_EXTENT_FLAG_DATA);
8884                 err |= BACKREF_MISMATCH;
8885         }
8886
8887         if (file_extent_gen < extent_gen) {
8888                 error(
8889 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8890                         disk_bytenr, disk_num_bytes, file_extent_gen,
8891                         extent_gen);
8892                 err |= BACKREF_MISMATCH;
8893         }
8894
8895         /* Check data backref inside that extent item */
8896         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8897         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8898         ptr = (unsigned long)iref;
8899         end = (unsigned long)ei + item_size;
8900         while (ptr < end) {
8901                 iref = (struct btrfs_extent_inline_ref *)ptr;
8902                 type = btrfs_extent_inline_ref_type(leaf, iref);
8903                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8904
8905                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8906                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8907                         if (ref_root == owner || ref_root == root->objectid)
8908                                 found_dbackref = 1;
8909                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8910                         found_dbackref = !check_tree_block_ref(root, NULL,
8911                                 btrfs_extent_inline_ref_offset(leaf, iref),
8912                                 0, owner);
8913                 }
8914
8915                 if (found_dbackref)
8916                         break;
8917                 ptr += btrfs_extent_inline_ref_size(type);
8918         }
8919
8920         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8921         if (!found_dbackref) {
8922                 btrfs_release_path(&path);
8923
8924                 btrfs_init_path(&path);
8925                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8926                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8927                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8928                                 fi_key.objectid, fi_key.offset);
8929
8930                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8931                                         &dbref_key, &path, 0, 0);
8932                 if (!ret)
8933                         found_dbackref = 1;
8934         }
8935
8936         if (!found_dbackref)
8937                 err |= BACKREF_MISSING;
8938 error:
8939         btrfs_release_path(&path);
8940         if (err & BACKREF_MISSING) {
8941                 error("data extent[%llu %llu] backref lost",
8942                       disk_bytenr, disk_num_bytes);
8943         }
8944         return err;
8945 }
8946
8947 /*
8948  * Get real tree block level for the case like shared block
8949  * Return >= 0 as tree level
8950  * Return <0 for error
8951  */
8952 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8953 {
8954         struct extent_buffer *eb;
8955         struct btrfs_path path;
8956         struct btrfs_key key;
8957         struct btrfs_extent_item *ei;
8958         u64 flags;
8959         u64 transid;
8960         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8961         u8 backref_level;
8962         u8 header_level;
8963         int ret;
8964
8965         /* Search extent tree for extent generation and level */
8966         key.objectid = bytenr;
8967         key.type = BTRFS_METADATA_ITEM_KEY;
8968         key.offset = (u64)-1;
8969
8970         btrfs_init_path(&path);
8971         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8972         if (ret < 0)
8973                 goto release_out;
8974         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8975         if (ret < 0)
8976                 goto release_out;
8977         if (ret > 0) {
8978                 ret = -ENOENT;
8979                 goto release_out;
8980         }
8981
8982         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8983         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8984                             struct btrfs_extent_item);
8985         flags = btrfs_extent_flags(path.nodes[0], ei);
8986         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8987                 ret = -ENOENT;
8988                 goto release_out;
8989         }
8990
8991         /* Get transid for later read_tree_block() check */
8992         transid = btrfs_extent_generation(path.nodes[0], ei);
8993
8994         /* Get backref level as one source */
8995         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8996                 backref_level = key.offset;
8997         } else {
8998                 struct btrfs_tree_block_info *info;
8999
9000                 info = (struct btrfs_tree_block_info *)(ei + 1);
9001                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9002         }
9003         btrfs_release_path(&path);
9004
9005         /* Get level from tree block as an alternative source */
9006         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9007         if (!extent_buffer_uptodate(eb)) {
9008                 free_extent_buffer(eb);
9009                 return -EIO;
9010         }
9011         header_level = btrfs_header_level(eb);
9012         free_extent_buffer(eb);
9013
9014         if (header_level != backref_level)
9015                 return -EIO;
9016         return header_level;
9017
9018 release_out:
9019         btrfs_release_path(&path);
9020         return ret;
9021 }
9022
9023 /*
9024  * Check if a tree block backref is valid (points to a valid tree block)
9025  * if level == -1, level will be resolved
9026  * Return >0 for any error found and print error message
9027  */
9028 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9029                                     u64 bytenr, int level)
9030 {
9031         struct btrfs_root *root;
9032         struct btrfs_key key;
9033         struct btrfs_path path;
9034         struct extent_buffer *eb;
9035         struct extent_buffer *node;
9036         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9037         int err = 0;
9038         int ret;
9039
9040         /* Query level for level == -1 special case */
9041         if (level == -1)
9042                 level = query_tree_block_level(fs_info, bytenr);
9043         if (level < 0) {
9044                 err |= REFERENCER_MISSING;
9045                 goto out;
9046         }
9047
9048         key.objectid = root_id;
9049         key.type = BTRFS_ROOT_ITEM_KEY;
9050         key.offset = (u64)-1;
9051
9052         root = btrfs_read_fs_root(fs_info, &key);
9053         if (IS_ERR(root)) {
9054                 err |= REFERENCER_MISSING;
9055                 goto out;
9056         }
9057
9058         /* Read out the tree block to get item/node key */
9059         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9060         if (!extent_buffer_uptodate(eb)) {
9061                 err |= REFERENCER_MISSING;
9062                 free_extent_buffer(eb);
9063                 goto out;
9064         }
9065
9066         /* Empty tree, no need to check key */
9067         if (!btrfs_header_nritems(eb) && !level) {
9068                 free_extent_buffer(eb);
9069                 goto out;
9070         }
9071
9072         if (level)
9073                 btrfs_node_key_to_cpu(eb, &key, 0);
9074         else
9075                 btrfs_item_key_to_cpu(eb, &key, 0);
9076
9077         free_extent_buffer(eb);
9078
9079         btrfs_init_path(&path);
9080         path.lowest_level = level;
9081         /* Search with the first key, to ensure we can reach it */
9082         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9083         if (ret < 0) {
9084                 err |= REFERENCER_MISSING;
9085                 goto release_out;
9086         }
9087
9088         node = path.nodes[level];
9089         if (btrfs_header_bytenr(node) != bytenr) {
9090                 error(
9091         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9092                         bytenr, nodesize, bytenr,
9093                         btrfs_header_bytenr(node));
9094                 err |= REFERENCER_MISMATCH;
9095         }
9096         if (btrfs_header_level(node) != level) {
9097                 error(
9098         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9099                         bytenr, nodesize, level,
9100                         btrfs_header_level(node));
9101                 err |= REFERENCER_MISMATCH;
9102         }
9103
9104 release_out:
9105         btrfs_release_path(&path);
9106 out:
9107         if (err & REFERENCER_MISSING) {
9108                 if (level < 0)
9109                         error("extent [%llu %d] lost referencer (owner: %llu)",
9110                                 bytenr, nodesize, root_id);
9111                 else
9112                         error(
9113                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9114                                 bytenr, nodesize, root_id, level);
9115         }
9116
9117         return err;
9118 }
9119
9120 /*
9121  * Check referencer for shared block backref
9122  * If level == -1, this function will resolve the level.
9123  */
9124 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9125                                      u64 parent, u64 bytenr, int level)
9126 {
9127         struct extent_buffer *eb;
9128         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9129         u32 nr;
9130         int found_parent = 0;
9131         int i;
9132
9133         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9134         if (!extent_buffer_uptodate(eb))
9135                 goto out;
9136
9137         if (level == -1)
9138                 level = query_tree_block_level(fs_info, bytenr);
9139         if (level < 0)
9140                 goto out;
9141
9142         if (level + 1 != btrfs_header_level(eb))
9143                 goto out;
9144
9145         nr = btrfs_header_nritems(eb);
9146         for (i = 0; i < nr; i++) {
9147                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9148                         found_parent = 1;
9149                         break;
9150                 }
9151         }
9152 out:
9153         free_extent_buffer(eb);
9154         if (!found_parent) {
9155                 error(
9156         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9157                         bytenr, nodesize, parent, level);
9158                 return REFERENCER_MISSING;
9159         }
9160         return 0;
9161 }
9162
9163 /*
9164  * Check referencer for normal (inlined) data ref
9165  * If len == 0, it will be resolved by searching in extent tree
9166  */
9167 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9168                                      u64 root_id, u64 objectid, u64 offset,
9169                                      u64 bytenr, u64 len, u32 count)
9170 {
9171         struct btrfs_root *root;
9172         struct btrfs_root *extent_root = fs_info->extent_root;
9173         struct btrfs_key key;
9174         struct btrfs_path path;
9175         struct extent_buffer *leaf;
9176         struct btrfs_file_extent_item *fi;
9177         u32 found_count = 0;
9178         int slot;
9179         int ret = 0;
9180
9181         if (!len) {
9182                 key.objectid = bytenr;
9183                 key.type = BTRFS_EXTENT_ITEM_KEY;
9184                 key.offset = (u64)-1;
9185
9186                 btrfs_init_path(&path);
9187                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9188                 if (ret < 0)
9189                         goto out;
9190                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9191                 if (ret)
9192                         goto out;
9193                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9194                 if (key.objectid != bytenr ||
9195                     key.type != BTRFS_EXTENT_ITEM_KEY)
9196                         goto out;
9197                 len = key.offset;
9198                 btrfs_release_path(&path);
9199         }
9200         key.objectid = root_id;
9201         key.type = BTRFS_ROOT_ITEM_KEY;
9202         key.offset = (u64)-1;
9203         btrfs_init_path(&path);
9204
9205         root = btrfs_read_fs_root(fs_info, &key);
9206         if (IS_ERR(root))
9207                 goto out;
9208
9209         key.objectid = objectid;
9210         key.type = BTRFS_EXTENT_DATA_KEY;
9211         /*
9212          * It can be nasty as data backref offset is
9213          * file offset - file extent offset, which is smaller or
9214          * equal to original backref offset.  The only special case is
9215          * overflow.  So we need to special check and do further search.
9216          */
9217         key.offset = offset & (1ULL << 63) ? 0 : offset;
9218
9219         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9220         if (ret < 0)
9221                 goto out;
9222
9223         /*
9224          * Search afterwards to get correct one
9225          * NOTE: As we must do a comprehensive check on the data backref to
9226          * make sure the dref count also matches, we must iterate all file
9227          * extents for that inode.
9228          */
9229         while (1) {
9230                 leaf = path.nodes[0];
9231                 slot = path.slots[0];
9232
9233                 btrfs_item_key_to_cpu(leaf, &key, slot);
9234                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9235                         break;
9236                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9237                 /*
9238                  * Except normal disk bytenr and disk num bytes, we still
9239                  * need to do extra check on dbackref offset as
9240                  * dbackref offset = file_offset - file_extent_offset
9241                  */
9242                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9243                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9244                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9245                     offset)
9246                         found_count++;
9247
9248                 ret = btrfs_next_item(root, &path);
9249                 if (ret)
9250                         break;
9251         }
9252 out:
9253         btrfs_release_path(&path);
9254         if (found_count != count) {
9255                 error(
9256 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9257                         bytenr, len, root_id, objectid, offset, count, found_count);
9258                 return REFERENCER_MISSING;
9259         }
9260         return 0;
9261 }
9262
9263 /*
9264  * Check if the referencer of a shared data backref exists
9265  */
9266 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9267                                      u64 parent, u64 bytenr)
9268 {
9269         struct extent_buffer *eb;
9270         struct btrfs_key key;
9271         struct btrfs_file_extent_item *fi;
9272         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9273         u32 nr;
9274         int found_parent = 0;
9275         int i;
9276
9277         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9278         if (!extent_buffer_uptodate(eb))
9279                 goto out;
9280
9281         nr = btrfs_header_nritems(eb);
9282         for (i = 0; i < nr; i++) {
9283                 btrfs_item_key_to_cpu(eb, &key, i);
9284                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9285                         continue;
9286
9287                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9288                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9289                         continue;
9290
9291                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9292                         found_parent = 1;
9293                         break;
9294                 }
9295         }
9296
9297 out:
9298         free_extent_buffer(eb);
9299         if (!found_parent) {
9300                 error("shared extent %llu referencer lost (parent: %llu)",
9301                         bytenr, parent);
9302                 return REFERENCER_MISSING;
9303         }
9304         return 0;
9305 }
9306
9307 /*
9308  * This function will check a given extent item, including its backref and
9309  * itself (like crossing stripe boundary and type)
9310  *
9311  * Since we don't use extent_record anymore, introduce new error bit
9312  */
9313 static int check_extent_item(struct btrfs_fs_info *fs_info,
9314                              struct extent_buffer *eb, int slot)
9315 {
9316         struct btrfs_extent_item *ei;
9317         struct btrfs_extent_inline_ref *iref;
9318         struct btrfs_extent_data_ref *dref;
9319         unsigned long end;
9320         unsigned long ptr;
9321         int type;
9322         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9323         u32 item_size = btrfs_item_size_nr(eb, slot);
9324         u64 flags;
9325         u64 offset;
9326         int metadata = 0;
9327         int level;
9328         struct btrfs_key key;
9329         int ret;
9330         int err = 0;
9331
9332         btrfs_item_key_to_cpu(eb, &key, slot);
9333         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9334                 bytes_used += key.offset;
9335         else
9336                 bytes_used += nodesize;
9337
9338         if (item_size < sizeof(*ei)) {
9339                 /*
9340                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9341                  * old thing when on disk format is still un-determined.
9342                  * No need to care about it anymore
9343                  */
9344                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9345                 return -ENOTTY;
9346         }
9347
9348         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9349         flags = btrfs_extent_flags(eb, ei);
9350
9351         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9352                 metadata = 1;
9353         if (metadata && check_crossing_stripes(global_info, key.objectid,
9354                                                eb->len)) {
9355                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9356                       key.objectid, key.objectid + nodesize);
9357                 err |= CROSSING_STRIPE_BOUNDARY;
9358         }
9359
9360         ptr = (unsigned long)(ei + 1);
9361
9362         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9363                 /* Old EXTENT_ITEM metadata */
9364                 struct btrfs_tree_block_info *info;
9365
9366                 info = (struct btrfs_tree_block_info *)ptr;
9367                 level = btrfs_tree_block_level(eb, info);
9368                 ptr += sizeof(struct btrfs_tree_block_info);
9369         } else {
9370                 /* New METADATA_ITEM */
9371                 level = key.offset;
9372         }
9373         end = (unsigned long)ei + item_size;
9374
9375         if (ptr >= end) {
9376                 err |= ITEM_SIZE_MISMATCH;
9377                 goto out;
9378         }
9379
9380         /* Now check every backref in this extent item */
9381 next:
9382         iref = (struct btrfs_extent_inline_ref *)ptr;
9383         type = btrfs_extent_inline_ref_type(eb, iref);
9384         offset = btrfs_extent_inline_ref_offset(eb, iref);
9385         switch (type) {
9386         case BTRFS_TREE_BLOCK_REF_KEY:
9387                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9388                                                level);
9389                 err |= ret;
9390                 break;
9391         case BTRFS_SHARED_BLOCK_REF_KEY:
9392                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9393                                                  level);
9394                 err |= ret;
9395                 break;
9396         case BTRFS_EXTENT_DATA_REF_KEY:
9397                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9398                 ret = check_extent_data_backref(fs_info,
9399                                 btrfs_extent_data_ref_root(eb, dref),
9400                                 btrfs_extent_data_ref_objectid(eb, dref),
9401                                 btrfs_extent_data_ref_offset(eb, dref),
9402                                 key.objectid, key.offset,
9403                                 btrfs_extent_data_ref_count(eb, dref));
9404                 err |= ret;
9405                 break;
9406         case BTRFS_SHARED_DATA_REF_KEY:
9407                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9408                 err |= ret;
9409                 break;
9410         default:
9411                 error("extent[%llu %d %llu] has unknown ref type: %d",
9412                         key.objectid, key.type, key.offset, type);
9413                 err |= UNKNOWN_TYPE;
9414                 goto out;
9415         }
9416
9417         ptr += btrfs_extent_inline_ref_size(type);
9418         if (ptr < end)
9419                 goto next;
9420
9421 out:
9422         return err;
9423 }
9424
9425 /*
9426  * Check if a dev extent item is referred correctly by its chunk
9427  */
9428 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9429                                  struct extent_buffer *eb, int slot)
9430 {
9431         struct btrfs_root *chunk_root = fs_info->chunk_root;
9432         struct btrfs_dev_extent *ptr;
9433         struct btrfs_path path;
9434         struct btrfs_key chunk_key;
9435         struct btrfs_key devext_key;
9436         struct btrfs_chunk *chunk;
9437         struct extent_buffer *l;
9438         int num_stripes;
9439         u64 length;
9440         int i;
9441         int found_chunk = 0;
9442         int ret;
9443
9444         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9445         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9446         length = btrfs_dev_extent_length(eb, ptr);
9447
9448         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9449         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9450         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9451
9452         btrfs_init_path(&path);
9453         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9454         if (ret)
9455                 goto out;
9456
9457         l = path.nodes[0];
9458         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9459         if (btrfs_chunk_length(l, chunk) != length)
9460                 goto out;
9461
9462         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9463         for (i = 0; i < num_stripes; i++) {
9464                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9465                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9466
9467                 if (devid == devext_key.objectid &&
9468                     offset == devext_key.offset) {
9469                         found_chunk = 1;
9470                         break;
9471                 }
9472         }
9473 out:
9474         btrfs_release_path(&path);
9475         if (!found_chunk) {
9476                 error(
9477                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9478                         devext_key.objectid, devext_key.offset, length);
9479                 return REFERENCER_MISSING;
9480         }
9481         return 0;
9482 }
9483
9484 /*
9485  * Check if the used space is correct with the dev item
9486  */
9487 static int check_dev_item(struct btrfs_fs_info *fs_info,
9488                           struct extent_buffer *eb, int slot)
9489 {
9490         struct btrfs_root *dev_root = fs_info->dev_root;
9491         struct btrfs_dev_item *dev_item;
9492         struct btrfs_path path;
9493         struct btrfs_key key;
9494         struct btrfs_dev_extent *ptr;
9495         u64 dev_id;
9496         u64 used;
9497         u64 total = 0;
9498         int ret;
9499
9500         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9501         dev_id = btrfs_device_id(eb, dev_item);
9502         used = btrfs_device_bytes_used(eb, dev_item);
9503
9504         key.objectid = dev_id;
9505         key.type = BTRFS_DEV_EXTENT_KEY;
9506         key.offset = 0;
9507
9508         btrfs_init_path(&path);
9509         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9510         if (ret < 0) {
9511                 btrfs_item_key_to_cpu(eb, &key, slot);
9512                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9513                         key.objectid, key.type, key.offset);
9514                 btrfs_release_path(&path);
9515                 return REFERENCER_MISSING;
9516         }
9517
9518         /* Iterate dev_extents to calculate the used space of a device */
9519         while (1) {
9520                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9521
9522                 if (key.objectid > dev_id)
9523                         break;
9524                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9525                         goto next;
9526
9527                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9528                                      struct btrfs_dev_extent);
9529                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9530 next:
9531                 ret = btrfs_next_item(dev_root, &path);
9532                 if (ret)
9533                         break;
9534         }
9535         btrfs_release_path(&path);
9536
9537         if (used != total) {
9538                 btrfs_item_key_to_cpu(eb, &key, slot);
9539                 error(
9540 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9541                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9542                         BTRFS_DEV_EXTENT_KEY, dev_id);
9543                 return ACCOUNTING_MISMATCH;
9544         }
9545         return 0;
9546 }
9547
9548 /*
9549  * Check a block group item with its referener (chunk) and its used space
9550  * with extent/metadata item
9551  */
9552 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9553                                   struct extent_buffer *eb, int slot)
9554 {
9555         struct btrfs_root *extent_root = fs_info->extent_root;
9556         struct btrfs_root *chunk_root = fs_info->chunk_root;
9557         struct btrfs_block_group_item *bi;
9558         struct btrfs_block_group_item bg_item;
9559         struct btrfs_path path;
9560         struct btrfs_key bg_key;
9561         struct btrfs_key chunk_key;
9562         struct btrfs_key extent_key;
9563         struct btrfs_chunk *chunk;
9564         struct extent_buffer *leaf;
9565         struct btrfs_extent_item *ei;
9566         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9567         u64 flags;
9568         u64 bg_flags;
9569         u64 used;
9570         u64 total = 0;
9571         int ret;
9572         int err = 0;
9573
9574         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9575         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9576         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9577         used = btrfs_block_group_used(&bg_item);
9578         bg_flags = btrfs_block_group_flags(&bg_item);
9579
9580         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9581         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9582         chunk_key.offset = bg_key.objectid;
9583
9584         btrfs_init_path(&path);
9585         /* Search for the referencer chunk */
9586         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9587         if (ret) {
9588                 error(
9589                 "block group[%llu %llu] did not find the related chunk item",
9590                         bg_key.objectid, bg_key.offset);
9591                 err |= REFERENCER_MISSING;
9592         } else {
9593                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9594                                         struct btrfs_chunk);
9595                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9596                                                 bg_key.offset) {
9597                         error(
9598         "block group[%llu %llu] related chunk item length does not match",
9599                                 bg_key.objectid, bg_key.offset);
9600                         err |= REFERENCER_MISMATCH;
9601                 }
9602         }
9603         btrfs_release_path(&path);
9604
9605         /* Search from the block group bytenr */
9606         extent_key.objectid = bg_key.objectid;
9607         extent_key.type = 0;
9608         extent_key.offset = 0;
9609
9610         btrfs_init_path(&path);
9611         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9612         if (ret < 0)
9613                 goto out;
9614
9615         /* Iterate extent tree to account used space */
9616         while (1) {
9617                 leaf = path.nodes[0];
9618                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9619                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9620                         break;
9621
9622                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9623                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9624                         goto next;
9625                 if (extent_key.objectid < bg_key.objectid)
9626                         goto next;
9627
9628                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9629                         total += nodesize;
9630                 else
9631                         total += extent_key.offset;
9632
9633                 ei = btrfs_item_ptr(leaf, path.slots[0],
9634                                     struct btrfs_extent_item);
9635                 flags = btrfs_extent_flags(leaf, ei);
9636                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9637                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9638                                 error(
9639                         "bad extent[%llu, %llu) type mismatch with chunk",
9640                                         extent_key.objectid,
9641                                         extent_key.objectid + extent_key.offset);
9642                                 err |= CHUNK_TYPE_MISMATCH;
9643                         }
9644                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9645                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9646                                     BTRFS_BLOCK_GROUP_METADATA))) {
9647                                 error(
9648                         "bad extent[%llu, %llu) type mismatch with chunk",
9649                                         extent_key.objectid,
9650                                         extent_key.objectid + nodesize);
9651                                 err |= CHUNK_TYPE_MISMATCH;
9652                         }
9653                 }
9654 next:
9655                 ret = btrfs_next_item(extent_root, &path);
9656                 if (ret)
9657                         break;
9658         }
9659
9660 out:
9661         btrfs_release_path(&path);
9662
9663         if (total != used) {
9664                 error(
9665                 "block group[%llu %llu] used %llu but extent items used %llu",
9666                         bg_key.objectid, bg_key.offset, used, total);
9667                 err |= ACCOUNTING_MISMATCH;
9668         }
9669         return err;
9670 }
9671
9672 /*
9673  * Check a chunk item.
9674  * Including checking all referred dev_extents and block group
9675  */
9676 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9677                             struct extent_buffer *eb, int slot)
9678 {
9679         struct btrfs_root *extent_root = fs_info->extent_root;
9680         struct btrfs_root *dev_root = fs_info->dev_root;
9681         struct btrfs_path path;
9682         struct btrfs_key chunk_key;
9683         struct btrfs_key bg_key;
9684         struct btrfs_key devext_key;
9685         struct btrfs_chunk *chunk;
9686         struct extent_buffer *leaf;
9687         struct btrfs_block_group_item *bi;
9688         struct btrfs_block_group_item bg_item;
9689         struct btrfs_dev_extent *ptr;
9690         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9691         u64 length;
9692         u64 chunk_end;
9693         u64 type;
9694         u64 profile;
9695         int num_stripes;
9696         u64 offset;
9697         u64 objectid;
9698         int i;
9699         int ret;
9700         int err = 0;
9701
9702         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9703         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9704         length = btrfs_chunk_length(eb, chunk);
9705         chunk_end = chunk_key.offset + length;
9706         if (!IS_ALIGNED(length, sectorsize)) {
9707                 error("chunk[%llu %llu) not aligned to %u",
9708                         chunk_key.offset, chunk_end, sectorsize);
9709                 err |= BYTES_UNALIGNED;
9710                 goto out;
9711         }
9712
9713         type = btrfs_chunk_type(eb, chunk);
9714         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9715         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9716                 error("chunk[%llu %llu) has no chunk type",
9717                         chunk_key.offset, chunk_end);
9718                 err |= UNKNOWN_TYPE;
9719         }
9720         if (profile && (profile & (profile - 1))) {
9721                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9722                         chunk_key.offset, chunk_end, profile);
9723                 err |= UNKNOWN_TYPE;
9724         }
9725
9726         bg_key.objectid = chunk_key.offset;
9727         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9728         bg_key.offset = length;
9729
9730         btrfs_init_path(&path);
9731         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9732         if (ret) {
9733                 error(
9734                 "chunk[%llu %llu) did not find the related block group item",
9735                         chunk_key.offset, chunk_end);
9736                 err |= REFERENCER_MISSING;
9737         } else{
9738                 leaf = path.nodes[0];
9739                 bi = btrfs_item_ptr(leaf, path.slots[0],
9740                                     struct btrfs_block_group_item);
9741                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9742                                    sizeof(bg_item));
9743                 if (btrfs_block_group_flags(&bg_item) != type) {
9744                         error(
9745 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9746                                 chunk_key.offset, chunk_end, type,
9747                                 btrfs_block_group_flags(&bg_item));
9748                         err |= REFERENCER_MISSING;
9749                 }
9750         }
9751
9752         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9753         for (i = 0; i < num_stripes; i++) {
9754                 btrfs_release_path(&path);
9755                 btrfs_init_path(&path);
9756                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9757                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9758                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9759
9760                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9761                                         0, 0);
9762                 if (ret)
9763                         goto not_match_dev;
9764
9765                 leaf = path.nodes[0];
9766                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9767                                      struct btrfs_dev_extent);
9768                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9769                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9770                 if (objectid != chunk_key.objectid ||
9771                     offset != chunk_key.offset ||
9772                     btrfs_dev_extent_length(leaf, ptr) != length)
9773                         goto not_match_dev;
9774                 continue;
9775 not_match_dev:
9776                 err |= BACKREF_MISSING;
9777                 error(
9778                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9779                         chunk_key.objectid, chunk_end, i);
9780                 continue;
9781         }
9782         btrfs_release_path(&path);
9783 out:
9784         return err;
9785 }
9786
9787 /*
9788  * Main entry function to check known items and update related accounting info
9789  */
9790 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9791 {
9792         struct btrfs_fs_info *fs_info = root->fs_info;
9793         struct btrfs_key key;
9794         int slot = 0;
9795         int type;
9796         struct btrfs_extent_data_ref *dref;
9797         int ret;
9798         int err = 0;
9799
9800 next:
9801         btrfs_item_key_to_cpu(eb, &key, slot);
9802         type = key.type;
9803
9804         switch (type) {
9805         case BTRFS_EXTENT_DATA_KEY:
9806                 ret = check_extent_data_item(root, eb, slot);
9807                 err |= ret;
9808                 break;
9809         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9810                 ret = check_block_group_item(fs_info, eb, slot);
9811                 err |= ret;
9812                 break;
9813         case BTRFS_DEV_ITEM_KEY:
9814                 ret = check_dev_item(fs_info, eb, slot);
9815                 err |= ret;
9816                 break;
9817         case BTRFS_CHUNK_ITEM_KEY:
9818                 ret = check_chunk_item(fs_info, eb, slot);
9819                 err |= ret;
9820                 break;
9821         case BTRFS_DEV_EXTENT_KEY:
9822                 ret = check_dev_extent_item(fs_info, eb, slot);
9823                 err |= ret;
9824                 break;
9825         case BTRFS_EXTENT_ITEM_KEY:
9826         case BTRFS_METADATA_ITEM_KEY:
9827                 ret = check_extent_item(fs_info, eb, slot);
9828                 err |= ret;
9829                 break;
9830         case BTRFS_EXTENT_CSUM_KEY:
9831                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9832                 break;
9833         case BTRFS_TREE_BLOCK_REF_KEY:
9834                 ret = check_tree_block_backref(fs_info, key.offset,
9835                                                key.objectid, -1);
9836                 err |= ret;
9837                 break;
9838         case BTRFS_EXTENT_DATA_REF_KEY:
9839                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9840                 ret = check_extent_data_backref(fs_info,
9841                                 btrfs_extent_data_ref_root(eb, dref),
9842                                 btrfs_extent_data_ref_objectid(eb, dref),
9843                                 btrfs_extent_data_ref_offset(eb, dref),
9844                                 key.objectid, 0,
9845                                 btrfs_extent_data_ref_count(eb, dref));
9846                 err |= ret;
9847                 break;
9848         case BTRFS_SHARED_BLOCK_REF_KEY:
9849                 ret = check_shared_block_backref(fs_info, key.offset,
9850                                                  key.objectid, -1);
9851                 err |= ret;
9852                 break;
9853         case BTRFS_SHARED_DATA_REF_KEY:
9854                 ret = check_shared_data_backref(fs_info, key.offset,
9855                                                 key.objectid);
9856                 err |= ret;
9857                 break;
9858         default:
9859                 break;
9860         }
9861
9862         if (++slot < btrfs_header_nritems(eb))
9863                 goto next;
9864
9865         return err;
9866 }
9867
9868 /*
9869  * Helper function for later fs/subvol tree check.  To determine if a tree
9870  * block should be checked.
9871  * This function will ensure only the direct referencer with lowest rootid to
9872  * check a fs/subvolume tree block.
9873  *
9874  * Backref check at extent tree would detect errors like missing subvolume
9875  * tree, so we can do aggressive check to reduce duplicated checks.
9876  */
9877 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9878 {
9879         struct btrfs_root *extent_root = root->fs_info->extent_root;
9880         struct btrfs_key key;
9881         struct btrfs_path path;
9882         struct extent_buffer *leaf;
9883         int slot;
9884         struct btrfs_extent_item *ei;
9885         unsigned long ptr;
9886         unsigned long end;
9887         int type;
9888         u32 item_size;
9889         u64 offset;
9890         struct btrfs_extent_inline_ref *iref;
9891         int ret;
9892
9893         btrfs_init_path(&path);
9894         key.objectid = btrfs_header_bytenr(eb);
9895         key.type = BTRFS_METADATA_ITEM_KEY;
9896         key.offset = (u64)-1;
9897
9898         /*
9899          * Any failure in backref resolving means we can't determine
9900          * whom the tree block belongs to.
9901          * So in that case, we need to check that tree block
9902          */
9903         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9904         if (ret < 0)
9905                 goto need_check;
9906
9907         ret = btrfs_previous_extent_item(extent_root, &path,
9908                                          btrfs_header_bytenr(eb));
9909         if (ret)
9910                 goto need_check;
9911
9912         leaf = path.nodes[0];
9913         slot = path.slots[0];
9914         btrfs_item_key_to_cpu(leaf, &key, slot);
9915         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9916
9917         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9918                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9919         } else {
9920                 struct btrfs_tree_block_info *info;
9921
9922                 info = (struct btrfs_tree_block_info *)(ei + 1);
9923                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9924         }
9925
9926         item_size = btrfs_item_size_nr(leaf, slot);
9927         ptr = (unsigned long)iref;
9928         end = (unsigned long)ei + item_size;
9929         while (ptr < end) {
9930                 iref = (struct btrfs_extent_inline_ref *)ptr;
9931                 type = btrfs_extent_inline_ref_type(leaf, iref);
9932                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9933
9934                 /*
9935                  * We only check the tree block if current root is
9936                  * the lowest referencer of it.
9937                  */
9938                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9939                     offset < root->objectid) {
9940                         btrfs_release_path(&path);
9941                         return 0;
9942                 }
9943
9944                 ptr += btrfs_extent_inline_ref_size(type);
9945         }
9946         /*
9947          * Normally we should also check keyed tree block ref, but that may be
9948          * very time consuming.  Inlined ref should already make us skip a lot
9949          * of refs now.  So skip search keyed tree block ref.
9950          */
9951
9952 need_check:
9953         btrfs_release_path(&path);
9954         return 1;
9955 }
9956
9957 /*
9958  * Traversal function for tree block. We will do:
9959  * 1) Skip shared fs/subvolume tree blocks
9960  * 2) Update related bytes accounting
9961  * 3) Pre-order traversal
9962  */
9963 static int traverse_tree_block(struct btrfs_root *root,
9964                                 struct extent_buffer *node)
9965 {
9966         struct extent_buffer *eb;
9967         struct btrfs_key key;
9968         struct btrfs_key drop_key;
9969         int level;
9970         u64 nr;
9971         int i;
9972         int err = 0;
9973         int ret;
9974
9975         /*
9976          * Skip shared fs/subvolume tree block, in that case they will
9977          * be checked by referencer with lowest rootid
9978          */
9979         if (is_fstree(root->objectid) && !should_check(root, node))
9980                 return 0;
9981
9982         /* Update bytes accounting */
9983         total_btree_bytes += node->len;
9984         if (fs_root_objectid(btrfs_header_owner(node)))
9985                 total_fs_tree_bytes += node->len;
9986         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9987                 total_extent_tree_bytes += node->len;
9988         if (!found_old_backref &&
9989             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9990             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9991             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9992                 found_old_backref = 1;
9993
9994         /* pre-order tranversal, check itself first */
9995         level = btrfs_header_level(node);
9996         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9997                                    btrfs_header_level(node),
9998                                    btrfs_header_owner(node));
9999         err |= ret;
10000         if (err)
10001                 error(
10002         "check %s failed root %llu bytenr %llu level %d, force continue check",
10003                         level ? "node":"leaf", root->objectid,
10004                         btrfs_header_bytenr(node), btrfs_header_level(node));
10005
10006         if (!level) {
10007                 btree_space_waste += btrfs_leaf_free_space(root, node);
10008                 ret = check_leaf_items(root, node);
10009                 err |= ret;
10010                 return err;
10011         }
10012
10013         nr = btrfs_header_nritems(node);
10014         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10015         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10016                 sizeof(struct btrfs_key_ptr);
10017
10018         /* Then check all its children */
10019         for (i = 0; i < nr; i++) {
10020                 u64 blocknr = btrfs_node_blockptr(node, i);
10021
10022                 btrfs_node_key_to_cpu(node, &key, i);
10023                 if (level == root->root_item.drop_level &&
10024                     is_dropped_key(&key, &drop_key))
10025                         continue;
10026
10027                 /*
10028                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10029                  * to call the function itself.
10030                  */
10031                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10032                 if (extent_buffer_uptodate(eb)) {
10033                         ret = traverse_tree_block(root, eb);
10034                         err |= ret;
10035                 }
10036                 free_extent_buffer(eb);
10037         }
10038
10039         return err;
10040 }
10041
10042 /*
10043  * Low memory usage version check_chunks_and_extents.
10044  */
10045 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10046 {
10047         struct btrfs_path path;
10048         struct btrfs_key key;
10049         struct btrfs_root *root1;
10050         struct btrfs_root *cur_root;
10051         int err = 0;
10052         int ret;
10053
10054         root1 = root->fs_info->chunk_root;
10055         ret = traverse_tree_block(root1, root1->node);
10056         err |= ret;
10057
10058         root1 = root->fs_info->tree_root;
10059         ret = traverse_tree_block(root1, root1->node);
10060         err |= ret;
10061
10062         btrfs_init_path(&path);
10063         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10064         key.offset = 0;
10065         key.type = BTRFS_ROOT_ITEM_KEY;
10066
10067         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10068         if (ret) {
10069                 error("cannot find extent treet in tree_root");
10070                 goto out;
10071         }
10072
10073         while (1) {
10074                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10075                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10076                         goto next;
10077                 key.offset = (u64)-1;
10078
10079                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10080                 if (IS_ERR(cur_root) || !cur_root) {
10081                         error("failed to read tree: %lld", key.objectid);
10082                         goto next;
10083                 }
10084
10085                 ret = traverse_tree_block(cur_root, cur_root->node);
10086                 err |= ret;
10087
10088 next:
10089                 ret = btrfs_next_item(root1, &path);
10090                 if (ret)
10091                         goto out;
10092         }
10093
10094 out:
10095         btrfs_release_path(&path);
10096         return err;
10097 }
10098
10099 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10100                            struct btrfs_root *root, int overwrite)
10101 {
10102         struct extent_buffer *c;
10103         struct extent_buffer *old = root->node;
10104         int level;
10105         int ret;
10106         struct btrfs_disk_key disk_key = {0,0,0};
10107
10108         level = 0;
10109
10110         if (overwrite) {
10111                 c = old;
10112                 extent_buffer_get(c);
10113                 goto init;
10114         }
10115         c = btrfs_alloc_free_block(trans, root,
10116                                    root->nodesize,
10117                                    root->root_key.objectid,
10118                                    &disk_key, level, 0, 0);
10119         if (IS_ERR(c)) {
10120                 c = old;
10121                 extent_buffer_get(c);
10122                 overwrite = 1;
10123         }
10124 init:
10125         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10126         btrfs_set_header_level(c, level);
10127         btrfs_set_header_bytenr(c, c->start);
10128         btrfs_set_header_generation(c, trans->transid);
10129         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10130         btrfs_set_header_owner(c, root->root_key.objectid);
10131
10132         write_extent_buffer(c, root->fs_info->fsid,
10133                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10134
10135         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10136                             btrfs_header_chunk_tree_uuid(c),
10137                             BTRFS_UUID_SIZE);
10138
10139         btrfs_mark_buffer_dirty(c);
10140         /*
10141          * this case can happen in the following case:
10142          *
10143          * 1.overwrite previous root.
10144          *
10145          * 2.reinit reloc data root, this is because we skip pin
10146          * down reloc data tree before which means we can allocate
10147          * same block bytenr here.
10148          */
10149         if (old->start == c->start) {
10150                 btrfs_set_root_generation(&root->root_item,
10151                                           trans->transid);
10152                 root->root_item.level = btrfs_header_level(root->node);
10153                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10154                                         &root->root_key, &root->root_item);
10155                 if (ret) {
10156                         free_extent_buffer(c);
10157                         return ret;
10158                 }
10159         }
10160         free_extent_buffer(old);
10161         root->node = c;
10162         add_root_to_dirty_list(root);
10163         return 0;
10164 }
10165
10166 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10167                                 struct extent_buffer *eb, int tree_root)
10168 {
10169         struct extent_buffer *tmp;
10170         struct btrfs_root_item *ri;
10171         struct btrfs_key key;
10172         u64 bytenr;
10173         u32 nodesize;
10174         int level = btrfs_header_level(eb);
10175         int nritems;
10176         int ret;
10177         int i;
10178
10179         /*
10180          * If we have pinned this block before, don't pin it again.
10181          * This can not only avoid forever loop with broken filesystem
10182          * but also give us some speedups.
10183          */
10184         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10185                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10186                 return 0;
10187
10188         btrfs_pin_extent(fs_info, eb->start, eb->len);
10189
10190         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10191         nritems = btrfs_header_nritems(eb);
10192         for (i = 0; i < nritems; i++) {
10193                 if (level == 0) {
10194                         btrfs_item_key_to_cpu(eb, &key, i);
10195                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10196                                 continue;
10197                         /* Skip the extent root and reloc roots */
10198                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10199                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10200                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10201                                 continue;
10202                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10203                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10204
10205                         /*
10206                          * If at any point we start needing the real root we
10207                          * will have to build a stump root for the root we are
10208                          * in, but for now this doesn't actually use the root so
10209                          * just pass in extent_root.
10210                          */
10211                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10212                                               nodesize, 0);
10213                         if (!extent_buffer_uptodate(tmp)) {
10214                                 fprintf(stderr, "Error reading root block\n");
10215                                 return -EIO;
10216                         }
10217                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10218                         free_extent_buffer(tmp);
10219                         if (ret)
10220                                 return ret;
10221                 } else {
10222                         bytenr = btrfs_node_blockptr(eb, i);
10223
10224                         /* If we aren't the tree root don't read the block */
10225                         if (level == 1 && !tree_root) {
10226                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10227                                 continue;
10228                         }
10229
10230                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10231                                               nodesize, 0);
10232                         if (!extent_buffer_uptodate(tmp)) {
10233                                 fprintf(stderr, "Error reading tree block\n");
10234                                 return -EIO;
10235                         }
10236                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10237                         free_extent_buffer(tmp);
10238                         if (ret)
10239                                 return ret;
10240                 }
10241         }
10242
10243         return 0;
10244 }
10245
10246 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10247 {
10248         int ret;
10249
10250         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10251         if (ret)
10252                 return ret;
10253
10254         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10255 }
10256
10257 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10258 {
10259         struct btrfs_block_group_cache *cache;
10260         struct btrfs_path *path;
10261         struct extent_buffer *leaf;
10262         struct btrfs_chunk *chunk;
10263         struct btrfs_key key;
10264         int ret;
10265         u64 start;
10266
10267         path = btrfs_alloc_path();
10268         if (!path)
10269                 return -ENOMEM;
10270
10271         key.objectid = 0;
10272         key.type = BTRFS_CHUNK_ITEM_KEY;
10273         key.offset = 0;
10274
10275         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10276         if (ret < 0) {
10277                 btrfs_free_path(path);
10278                 return ret;
10279         }
10280
10281         /*
10282          * We do this in case the block groups were screwed up and had alloc
10283          * bits that aren't actually set on the chunks.  This happens with
10284          * restored images every time and could happen in real life I guess.
10285          */
10286         fs_info->avail_data_alloc_bits = 0;
10287         fs_info->avail_metadata_alloc_bits = 0;
10288         fs_info->avail_system_alloc_bits = 0;
10289
10290         /* First we need to create the in-memory block groups */
10291         while (1) {
10292                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10293                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10294                         if (ret < 0) {
10295                                 btrfs_free_path(path);
10296                                 return ret;
10297                         }
10298                         if (ret) {
10299                                 ret = 0;
10300                                 break;
10301                         }
10302                 }
10303                 leaf = path->nodes[0];
10304                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10305                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10306                         path->slots[0]++;
10307                         continue;
10308                 }
10309
10310                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10311                                        struct btrfs_chunk);
10312                 btrfs_add_block_group(fs_info, 0,
10313                                       btrfs_chunk_type(leaf, chunk),
10314                                       key.objectid, key.offset,
10315                                       btrfs_chunk_length(leaf, chunk));
10316                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10317                                  key.offset + btrfs_chunk_length(leaf, chunk),
10318                                  GFP_NOFS);
10319                 path->slots[0]++;
10320         }
10321         start = 0;
10322         while (1) {
10323                 cache = btrfs_lookup_first_block_group(fs_info, start);
10324                 if (!cache)
10325                         break;
10326                 cache->cached = 1;
10327                 start = cache->key.objectid + cache->key.offset;
10328         }
10329
10330         btrfs_free_path(path);
10331         return 0;
10332 }
10333
10334 static int reset_balance(struct btrfs_trans_handle *trans,
10335                          struct btrfs_fs_info *fs_info)
10336 {
10337         struct btrfs_root *root = fs_info->tree_root;
10338         struct btrfs_path *path;
10339         struct extent_buffer *leaf;
10340         struct btrfs_key key;
10341         int del_slot, del_nr = 0;
10342         int ret;
10343         int found = 0;
10344
10345         path = btrfs_alloc_path();
10346         if (!path)
10347                 return -ENOMEM;
10348
10349         key.objectid = BTRFS_BALANCE_OBJECTID;
10350         key.type = BTRFS_BALANCE_ITEM_KEY;
10351         key.offset = 0;
10352
10353         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10354         if (ret) {
10355                 if (ret > 0)
10356                         ret = 0;
10357                 if (!ret)
10358                         goto reinit_data_reloc;
10359                 else
10360                         goto out;
10361         }
10362
10363         ret = btrfs_del_item(trans, root, path);
10364         if (ret)
10365                 goto out;
10366         btrfs_release_path(path);
10367
10368         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10369         key.type = BTRFS_ROOT_ITEM_KEY;
10370         key.offset = 0;
10371
10372         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10373         if (ret < 0)
10374                 goto out;
10375         while (1) {
10376                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10377                         if (!found)
10378                                 break;
10379
10380                         if (del_nr) {
10381                                 ret = btrfs_del_items(trans, root, path,
10382                                                       del_slot, del_nr);
10383                                 del_nr = 0;
10384                                 if (ret)
10385                                         goto out;
10386                         }
10387                         key.offset++;
10388                         btrfs_release_path(path);
10389
10390                         found = 0;
10391                         ret = btrfs_search_slot(trans, root, &key, path,
10392                                                 -1, 1);
10393                         if (ret < 0)
10394                                 goto out;
10395                         continue;
10396                 }
10397                 found = 1;
10398                 leaf = path->nodes[0];
10399                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10400                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10401                         break;
10402                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10403                         path->slots[0]++;
10404                         continue;
10405                 }
10406                 if (!del_nr) {
10407                         del_slot = path->slots[0];
10408                         del_nr = 1;
10409                 } else {
10410                         del_nr++;
10411                 }
10412                 path->slots[0]++;
10413         }
10414
10415         if (del_nr) {
10416                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10417                 if (ret)
10418                         goto out;
10419         }
10420         btrfs_release_path(path);
10421
10422 reinit_data_reloc:
10423         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10424         key.type = BTRFS_ROOT_ITEM_KEY;
10425         key.offset = (u64)-1;
10426         root = btrfs_read_fs_root(fs_info, &key);
10427         if (IS_ERR(root)) {
10428                 fprintf(stderr, "Error reading data reloc tree\n");
10429                 ret = PTR_ERR(root);
10430                 goto out;
10431         }
10432         record_root_in_trans(trans, root);
10433         ret = btrfs_fsck_reinit_root(trans, root, 0);
10434         if (ret)
10435                 goto out;
10436         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10437 out:
10438         btrfs_free_path(path);
10439         return ret;
10440 }
10441
10442 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10443                               struct btrfs_fs_info *fs_info)
10444 {
10445         u64 start = 0;
10446         int ret;
10447
10448         /*
10449          * The only reason we don't do this is because right now we're just
10450          * walking the trees we find and pinning down their bytes, we don't look
10451          * at any of the leaves.  In order to do mixed groups we'd have to check
10452          * the leaves of any fs roots and pin down the bytes for any file
10453          * extents we find.  Not hard but why do it if we don't have to?
10454          */
10455         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10456                 fprintf(stderr, "We don't support re-initing the extent tree "
10457                         "for mixed block groups yet, please notify a btrfs "
10458                         "developer you want to do this so they can add this "
10459                         "functionality.\n");
10460                 return -EINVAL;
10461         }
10462
10463         /*
10464          * first we need to walk all of the trees except the extent tree and pin
10465          * down the bytes that are in use so we don't overwrite any existing
10466          * metadata.
10467          */
10468         ret = pin_metadata_blocks(fs_info);
10469         if (ret) {
10470                 fprintf(stderr, "error pinning down used bytes\n");
10471                 return ret;
10472         }
10473
10474         /*
10475          * Need to drop all the block groups since we're going to recreate all
10476          * of them again.
10477          */
10478         btrfs_free_block_groups(fs_info);
10479         ret = reset_block_groups(fs_info);
10480         if (ret) {
10481                 fprintf(stderr, "error resetting the block groups\n");
10482                 return ret;
10483         }
10484
10485         /* Ok we can allocate now, reinit the extent root */
10486         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10487         if (ret) {
10488                 fprintf(stderr, "extent root initialization failed\n");
10489                 /*
10490                  * When the transaction code is updated we should end the
10491                  * transaction, but for now progs only knows about commit so
10492                  * just return an error.
10493                  */
10494                 return ret;
10495         }
10496
10497         /*
10498          * Now we have all the in-memory block groups setup so we can make
10499          * allocations properly, and the metadata we care about is safe since we
10500          * pinned all of it above.
10501          */
10502         while (1) {
10503                 struct btrfs_block_group_cache *cache;
10504
10505                 cache = btrfs_lookup_first_block_group(fs_info, start);
10506                 if (!cache)
10507                         break;
10508                 start = cache->key.objectid + cache->key.offset;
10509                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10510                                         &cache->key, &cache->item,
10511                                         sizeof(cache->item));
10512                 if (ret) {
10513                         fprintf(stderr, "Error adding block group\n");
10514                         return ret;
10515                 }
10516                 btrfs_extent_post_op(trans, fs_info->extent_root);
10517         }
10518
10519         ret = reset_balance(trans, fs_info);
10520         if (ret)
10521                 fprintf(stderr, "error resetting the pending balance\n");
10522
10523         return ret;
10524 }
10525
10526 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10527 {
10528         struct btrfs_path *path;
10529         struct btrfs_trans_handle *trans;
10530         struct btrfs_key key;
10531         int ret;
10532
10533         printf("Recowing metadata block %llu\n", eb->start);
10534         key.objectid = btrfs_header_owner(eb);
10535         key.type = BTRFS_ROOT_ITEM_KEY;
10536         key.offset = (u64)-1;
10537
10538         root = btrfs_read_fs_root(root->fs_info, &key);
10539         if (IS_ERR(root)) {
10540                 fprintf(stderr, "Couldn't find owner root %llu\n",
10541                         key.objectid);
10542                 return PTR_ERR(root);
10543         }
10544
10545         path = btrfs_alloc_path();
10546         if (!path)
10547                 return -ENOMEM;
10548
10549         trans = btrfs_start_transaction(root, 1);
10550         if (IS_ERR(trans)) {
10551                 btrfs_free_path(path);
10552                 return PTR_ERR(trans);
10553         }
10554
10555         path->lowest_level = btrfs_header_level(eb);
10556         if (path->lowest_level)
10557                 btrfs_node_key_to_cpu(eb, &key, 0);
10558         else
10559                 btrfs_item_key_to_cpu(eb, &key, 0);
10560
10561         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10562         btrfs_commit_transaction(trans, root);
10563         btrfs_free_path(path);
10564         return ret;
10565 }
10566
10567 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10568 {
10569         struct btrfs_path *path;
10570         struct btrfs_trans_handle *trans;
10571         struct btrfs_key key;
10572         int ret;
10573
10574         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10575                bad->key.type, bad->key.offset);
10576         key.objectid = bad->root_id;
10577         key.type = BTRFS_ROOT_ITEM_KEY;
10578         key.offset = (u64)-1;
10579
10580         root = btrfs_read_fs_root(root->fs_info, &key);
10581         if (IS_ERR(root)) {
10582                 fprintf(stderr, "Couldn't find owner root %llu\n",
10583                         key.objectid);
10584                 return PTR_ERR(root);
10585         }
10586
10587         path = btrfs_alloc_path();
10588         if (!path)
10589                 return -ENOMEM;
10590
10591         trans = btrfs_start_transaction(root, 1);
10592         if (IS_ERR(trans)) {
10593                 btrfs_free_path(path);
10594                 return PTR_ERR(trans);
10595         }
10596
10597         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10598         if (ret) {
10599                 if (ret > 0)
10600                         ret = 0;
10601                 goto out;
10602         }
10603         ret = btrfs_del_item(trans, root, path);
10604 out:
10605         btrfs_commit_transaction(trans, root);
10606         btrfs_free_path(path);
10607         return ret;
10608 }
10609
10610 static int zero_log_tree(struct btrfs_root *root)
10611 {
10612         struct btrfs_trans_handle *trans;
10613         int ret;
10614
10615         trans = btrfs_start_transaction(root, 1);
10616         if (IS_ERR(trans)) {
10617                 ret = PTR_ERR(trans);
10618                 return ret;
10619         }
10620         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10621         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10622         ret = btrfs_commit_transaction(trans, root);
10623         return ret;
10624 }
10625
10626 static int populate_csum(struct btrfs_trans_handle *trans,
10627                          struct btrfs_root *csum_root, char *buf, u64 start,
10628                          u64 len)
10629 {
10630         u64 offset = 0;
10631         u64 sectorsize;
10632         int ret = 0;
10633
10634         while (offset < len) {
10635                 sectorsize = csum_root->sectorsize;
10636                 ret = read_extent_data(csum_root, buf, start + offset,
10637                                        &sectorsize, 0);
10638                 if (ret)
10639                         break;
10640                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10641                                             start + offset, buf, sectorsize);
10642                 if (ret)
10643                         break;
10644                 offset += sectorsize;
10645         }
10646         return ret;
10647 }
10648
10649 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10650                                       struct btrfs_root *csum_root,
10651                                       struct btrfs_root *cur_root)
10652 {
10653         struct btrfs_path *path;
10654         struct btrfs_key key;
10655         struct extent_buffer *node;
10656         struct btrfs_file_extent_item *fi;
10657         char *buf = NULL;
10658         u64 start = 0;
10659         u64 len = 0;
10660         int slot = 0;
10661         int ret = 0;
10662
10663         path = btrfs_alloc_path();
10664         if (!path)
10665                 return -ENOMEM;
10666         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10667         if (!buf) {
10668                 ret = -ENOMEM;
10669                 goto out;
10670         }
10671
10672         key.objectid = 0;
10673         key.offset = 0;
10674         key.type = 0;
10675
10676         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10677         if (ret < 0)
10678                 goto out;
10679         /* Iterate all regular file extents and fill its csum */
10680         while (1) {
10681                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10682
10683                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10684                         goto next;
10685                 node = path->nodes[0];
10686                 slot = path->slots[0];
10687                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10688                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10689                         goto next;
10690                 start = btrfs_file_extent_disk_bytenr(node, fi);
10691                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10692
10693                 ret = populate_csum(trans, csum_root, buf, start, len);
10694                 if (ret == -EEXIST)
10695                         ret = 0;
10696                 if (ret < 0)
10697                         goto out;
10698 next:
10699                 /*
10700                  * TODO: if next leaf is corrupted, jump to nearest next valid
10701                  * leaf.
10702                  */
10703                 ret = btrfs_next_item(cur_root, path);
10704                 if (ret < 0)
10705                         goto out;
10706                 if (ret > 0) {
10707                         ret = 0;
10708                         goto out;
10709                 }
10710         }
10711
10712 out:
10713         btrfs_free_path(path);
10714         free(buf);
10715         return ret;
10716 }
10717
10718 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10719                                   struct btrfs_root *csum_root)
10720 {
10721         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10722         struct btrfs_path *path;
10723         struct btrfs_root *tree_root = fs_info->tree_root;
10724         struct btrfs_root *cur_root;
10725         struct extent_buffer *node;
10726         struct btrfs_key key;
10727         int slot = 0;
10728         int ret = 0;
10729
10730         path = btrfs_alloc_path();
10731         if (!path)
10732                 return -ENOMEM;
10733
10734         key.objectid = BTRFS_FS_TREE_OBJECTID;
10735         key.offset = 0;
10736         key.type = BTRFS_ROOT_ITEM_KEY;
10737
10738         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10739         if (ret < 0)
10740                 goto out;
10741         if (ret > 0) {
10742                 ret = -ENOENT;
10743                 goto out;
10744         }
10745
10746         while (1) {
10747                 node = path->nodes[0];
10748                 slot = path->slots[0];
10749                 btrfs_item_key_to_cpu(node, &key, slot);
10750                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10751                         goto out;
10752                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10753                         goto next;
10754                 if (!is_fstree(key.objectid))
10755                         goto next;
10756                 key.offset = (u64)-1;
10757
10758                 cur_root = btrfs_read_fs_root(fs_info, &key);
10759                 if (IS_ERR(cur_root) || !cur_root) {
10760                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10761                                 key.objectid);
10762                         goto out;
10763                 }
10764                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10765                                 cur_root);
10766                 if (ret < 0)
10767                         goto out;
10768 next:
10769                 ret = btrfs_next_item(tree_root, path);
10770                 if (ret > 0) {
10771                         ret = 0;
10772                         goto out;
10773                 }
10774                 if (ret < 0)
10775                         goto out;
10776         }
10777
10778 out:
10779         btrfs_free_path(path);
10780         return ret;
10781 }
10782
10783 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10784                                       struct btrfs_root *csum_root)
10785 {
10786         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10787         struct btrfs_path *path;
10788         struct btrfs_extent_item *ei;
10789         struct extent_buffer *leaf;
10790         char *buf;
10791         struct btrfs_key key;
10792         int ret;
10793
10794         path = btrfs_alloc_path();
10795         if (!path)
10796                 return -ENOMEM;
10797
10798         key.objectid = 0;
10799         key.type = BTRFS_EXTENT_ITEM_KEY;
10800         key.offset = 0;
10801
10802         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10803         if (ret < 0) {
10804                 btrfs_free_path(path);
10805                 return ret;
10806         }
10807
10808         buf = malloc(csum_root->sectorsize);
10809         if (!buf) {
10810                 btrfs_free_path(path);
10811                 return -ENOMEM;
10812         }
10813
10814         while (1) {
10815                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10816                         ret = btrfs_next_leaf(extent_root, path);
10817                         if (ret < 0)
10818                                 break;
10819                         if (ret) {
10820                                 ret = 0;
10821                                 break;
10822                         }
10823                 }
10824                 leaf = path->nodes[0];
10825
10826                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10827                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10828                         path->slots[0]++;
10829                         continue;
10830                 }
10831
10832                 ei = btrfs_item_ptr(leaf, path->slots[0],
10833                                     struct btrfs_extent_item);
10834                 if (!(btrfs_extent_flags(leaf, ei) &
10835                       BTRFS_EXTENT_FLAG_DATA)) {
10836                         path->slots[0]++;
10837                         continue;
10838                 }
10839
10840                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10841                                     key.offset);
10842                 if (ret)
10843                         break;
10844                 path->slots[0]++;
10845         }
10846
10847         btrfs_free_path(path);
10848         free(buf);
10849         return ret;
10850 }
10851
10852 /*
10853  * Recalculate the csum and put it into the csum tree.
10854  *
10855  * Extent tree init will wipe out all the extent info, so in that case, we
10856  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10857  * will use fs/subvol trees to init the csum tree.
10858  */
10859 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10860                           struct btrfs_root *csum_root,
10861                           int search_fs_tree)
10862 {
10863         if (search_fs_tree)
10864                 return fill_csum_tree_from_fs(trans, csum_root);
10865         else
10866                 return fill_csum_tree_from_extent(trans, csum_root);
10867 }
10868
10869 static void free_roots_info_cache(void)
10870 {
10871         if (!roots_info_cache)
10872                 return;
10873
10874         while (!cache_tree_empty(roots_info_cache)) {
10875                 struct cache_extent *entry;
10876                 struct root_item_info *rii;
10877
10878                 entry = first_cache_extent(roots_info_cache);
10879                 if (!entry)
10880                         break;
10881                 remove_cache_extent(roots_info_cache, entry);
10882                 rii = container_of(entry, struct root_item_info, cache_extent);
10883                 free(rii);
10884         }
10885
10886         free(roots_info_cache);
10887         roots_info_cache = NULL;
10888 }
10889
10890 static int build_roots_info_cache(struct btrfs_fs_info *info)
10891 {
10892         int ret = 0;
10893         struct btrfs_key key;
10894         struct extent_buffer *leaf;
10895         struct btrfs_path *path;
10896
10897         if (!roots_info_cache) {
10898                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10899                 if (!roots_info_cache)
10900                         return -ENOMEM;
10901                 cache_tree_init(roots_info_cache);
10902         }
10903
10904         path = btrfs_alloc_path();
10905         if (!path)
10906                 return -ENOMEM;
10907
10908         key.objectid = 0;
10909         key.type = BTRFS_EXTENT_ITEM_KEY;
10910         key.offset = 0;
10911
10912         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10913         if (ret < 0)
10914                 goto out;
10915         leaf = path->nodes[0];
10916
10917         while (1) {
10918                 struct btrfs_key found_key;
10919                 struct btrfs_extent_item *ei;
10920                 struct btrfs_extent_inline_ref *iref;
10921                 int slot = path->slots[0];
10922                 int type;
10923                 u64 flags;
10924                 u64 root_id;
10925                 u8 level;
10926                 struct cache_extent *entry;
10927                 struct root_item_info *rii;
10928
10929                 if (slot >= btrfs_header_nritems(leaf)) {
10930                         ret = btrfs_next_leaf(info->extent_root, path);
10931                         if (ret < 0) {
10932                                 break;
10933                         } else if (ret) {
10934                                 ret = 0;
10935                                 break;
10936                         }
10937                         leaf = path->nodes[0];
10938                         slot = path->slots[0];
10939                 }
10940
10941                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10942
10943                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10944                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10945                         goto next;
10946
10947                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10948                 flags = btrfs_extent_flags(leaf, ei);
10949
10950                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10951                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10952                         goto next;
10953
10954                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10955                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10956                         level = found_key.offset;
10957                 } else {
10958                         struct btrfs_tree_block_info *binfo;
10959
10960                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10961                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10962                         level = btrfs_tree_block_level(leaf, binfo);
10963                 }
10964
10965                 /*
10966                  * For a root extent, it must be of the following type and the
10967                  * first (and only one) iref in the item.
10968                  */
10969                 type = btrfs_extent_inline_ref_type(leaf, iref);
10970                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10971                         goto next;
10972
10973                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10974                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10975                 if (!entry) {
10976                         rii = malloc(sizeof(struct root_item_info));
10977                         if (!rii) {
10978                                 ret = -ENOMEM;
10979                                 goto out;
10980                         }
10981                         rii->cache_extent.start = root_id;
10982                         rii->cache_extent.size = 1;
10983                         rii->level = (u8)-1;
10984                         entry = &rii->cache_extent;
10985                         ret = insert_cache_extent(roots_info_cache, entry);
10986                         ASSERT(ret == 0);
10987                 } else {
10988                         rii = container_of(entry, struct root_item_info,
10989                                            cache_extent);
10990                 }
10991
10992                 ASSERT(rii->cache_extent.start == root_id);
10993                 ASSERT(rii->cache_extent.size == 1);
10994
10995                 if (level > rii->level || rii->level == (u8)-1) {
10996                         rii->level = level;
10997                         rii->bytenr = found_key.objectid;
10998                         rii->gen = btrfs_extent_generation(leaf, ei);
10999                         rii->node_count = 1;
11000                 } else if (level == rii->level) {
11001                         rii->node_count++;
11002                 }
11003 next:
11004                 path->slots[0]++;
11005         }
11006
11007 out:
11008         btrfs_free_path(path);
11009
11010         return ret;
11011 }
11012
11013 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11014                                   struct btrfs_path *path,
11015                                   const struct btrfs_key *root_key,
11016                                   const int read_only_mode)
11017 {
11018         const u64 root_id = root_key->objectid;
11019         struct cache_extent *entry;
11020         struct root_item_info *rii;
11021         struct btrfs_root_item ri;
11022         unsigned long offset;
11023
11024         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11025         if (!entry) {
11026                 fprintf(stderr,
11027                         "Error: could not find extent items for root %llu\n",
11028                         root_key->objectid);
11029                 return -ENOENT;
11030         }
11031
11032         rii = container_of(entry, struct root_item_info, cache_extent);
11033         ASSERT(rii->cache_extent.start == root_id);
11034         ASSERT(rii->cache_extent.size == 1);
11035
11036         if (rii->node_count != 1) {
11037                 fprintf(stderr,
11038                         "Error: could not find btree root extent for root %llu\n",
11039                         root_id);
11040                 return -ENOENT;
11041         }
11042
11043         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11044         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11045
11046         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11047             btrfs_root_level(&ri) != rii->level ||
11048             btrfs_root_generation(&ri) != rii->gen) {
11049
11050                 /*
11051                  * If we're in repair mode but our caller told us to not update
11052                  * the root item, i.e. just check if it needs to be updated, don't
11053                  * print this message, since the caller will call us again shortly
11054                  * for the same root item without read only mode (the caller will
11055                  * open a transaction first).
11056                  */
11057                 if (!(read_only_mode && repair))
11058                         fprintf(stderr,
11059                                 "%sroot item for root %llu,"
11060                                 " current bytenr %llu, current gen %llu, current level %u,"
11061                                 " new bytenr %llu, new gen %llu, new level %u\n",
11062                                 (read_only_mode ? "" : "fixing "),
11063                                 root_id,
11064                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11065                                 btrfs_root_level(&ri),
11066                                 rii->bytenr, rii->gen, rii->level);
11067
11068                 if (btrfs_root_generation(&ri) > rii->gen) {
11069                         fprintf(stderr,
11070                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11071                                 root_id, btrfs_root_generation(&ri), rii->gen);
11072                         return -EINVAL;
11073                 }
11074
11075                 if (!read_only_mode) {
11076                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11077                         btrfs_set_root_level(&ri, rii->level);
11078                         btrfs_set_root_generation(&ri, rii->gen);
11079                         write_extent_buffer(path->nodes[0], &ri,
11080                                             offset, sizeof(ri));
11081                 }
11082
11083                 return 1;
11084         }
11085
11086         return 0;
11087 }
11088
11089 /*
11090  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11091  * caused read-only snapshots to be corrupted if they were created at a moment
11092  * when the source subvolume/snapshot had orphan items. The issue was that the
11093  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11094  * node instead of the post orphan cleanup root node.
11095  * So this function, and its callees, just detects and fixes those cases. Even
11096  * though the regression was for read-only snapshots, this function applies to
11097  * any snapshot/subvolume root.
11098  * This must be run before any other repair code - not doing it so, makes other
11099  * repair code delete or modify backrefs in the extent tree for example, which
11100  * will result in an inconsistent fs after repairing the root items.
11101  */
11102 static int repair_root_items(struct btrfs_fs_info *info)
11103 {
11104         struct btrfs_path *path = NULL;
11105         struct btrfs_key key;
11106         struct extent_buffer *leaf;
11107         struct btrfs_trans_handle *trans = NULL;
11108         int ret = 0;
11109         int bad_roots = 0;
11110         int need_trans = 0;
11111
11112         ret = build_roots_info_cache(info);
11113         if (ret)
11114                 goto out;
11115
11116         path = btrfs_alloc_path();
11117         if (!path) {
11118                 ret = -ENOMEM;
11119                 goto out;
11120         }
11121
11122         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11123         key.type = BTRFS_ROOT_ITEM_KEY;
11124         key.offset = 0;
11125
11126 again:
11127         /*
11128          * Avoid opening and committing transactions if a leaf doesn't have
11129          * any root items that need to be fixed, so that we avoid rotating
11130          * backup roots unnecessarily.
11131          */
11132         if (need_trans) {
11133                 trans = btrfs_start_transaction(info->tree_root, 1);
11134                 if (IS_ERR(trans)) {
11135                         ret = PTR_ERR(trans);
11136                         goto out;
11137                 }
11138         }
11139
11140         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11141                                 0, trans ? 1 : 0);
11142         if (ret < 0)
11143                 goto out;
11144         leaf = path->nodes[0];
11145
11146         while (1) {
11147                 struct btrfs_key found_key;
11148
11149                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11150                         int no_more_keys = find_next_key(path, &key);
11151
11152                         btrfs_release_path(path);
11153                         if (trans) {
11154                                 ret = btrfs_commit_transaction(trans,
11155                                                                info->tree_root);
11156                                 trans = NULL;
11157                                 if (ret < 0)
11158                                         goto out;
11159                         }
11160                         need_trans = 0;
11161                         if (no_more_keys)
11162                                 break;
11163                         goto again;
11164                 }
11165
11166                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11167
11168                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11169                         goto next;
11170                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11171                         goto next;
11172
11173                 ret = maybe_repair_root_item(info, path, &found_key,
11174                                              trans ? 0 : 1);
11175                 if (ret < 0)
11176                         goto out;
11177                 if (ret) {
11178                         if (!trans && repair) {
11179                                 need_trans = 1;
11180                                 key = found_key;
11181                                 btrfs_release_path(path);
11182                                 goto again;
11183                         }
11184                         bad_roots++;
11185                 }
11186 next:
11187                 path->slots[0]++;
11188         }
11189         ret = 0;
11190 out:
11191         free_roots_info_cache();
11192         btrfs_free_path(path);
11193         if (trans)
11194                 btrfs_commit_transaction(trans, info->tree_root);
11195         if (ret < 0)
11196                 return ret;
11197
11198         return bad_roots;
11199 }
11200
11201 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11202 {
11203         struct btrfs_trans_handle *trans;
11204         struct btrfs_block_group_cache *bg_cache;
11205         u64 current = 0;
11206         int ret = 0;
11207
11208         /* Clear all free space cache inodes and its extent data */
11209         while (1) {
11210                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11211                 if (!bg_cache)
11212                         break;
11213                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11214                 if (ret < 0)
11215                         return ret;
11216                 current = bg_cache->key.objectid + bg_cache->key.offset;
11217         }
11218
11219         /* Don't forget to set cache_generation to -1 */
11220         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11221         if (IS_ERR(trans)) {
11222                 error("failed to update super block cache generation");
11223                 return PTR_ERR(trans);
11224         }
11225         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11226         btrfs_commit_transaction(trans, fs_info->tree_root);
11227
11228         return ret;
11229 }
11230
11231 const char * const cmd_check_usage[] = {
11232         "btrfs check [options] <device>",
11233         "Check structural integrity of a filesystem (unmounted).",
11234         "Check structural integrity of an unmounted filesystem. Verify internal",
11235         "trees' consistency and item connectivity. In the repair mode try to",
11236         "fix the problems found. ",
11237         "WARNING: the repair mode is considered dangerous",
11238         "",
11239         "-s|--super <superblock>     use this superblock copy",
11240         "-b|--backup                 use the first valid backup root copy",
11241         "--repair                    try to repair the filesystem",
11242         "--readonly                  run in read-only mode (default)",
11243         "--init-csum-tree            create a new CRC tree",
11244         "--init-extent-tree          create a new extent tree",
11245         "--mode <MODE>               allows choice of memory/IO trade-offs",
11246         "                            where MODE is one of:",
11247         "                            original - read inodes and extents to memory (requires",
11248         "                                       more memory, does less IO)",
11249         "                            lowmem   - try to use less memory but read blocks again",
11250         "                                       when needed",
11251         "--check-data-csum           verify checksums of data blocks",
11252         "-Q|--qgroup-report          print a report on qgroup consistency",
11253         "-E|--subvol-extents <subvolid>",
11254         "                            print subvolume extents and sharing state",
11255         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11256         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11257         "-p|--progress               indicate progress",
11258         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11259         "                            NOTE: v1 support implemented",
11260         NULL
11261 };
11262
11263 int cmd_check(int argc, char **argv)
11264 {
11265         struct cache_tree root_cache;
11266         struct btrfs_root *root;
11267         struct btrfs_fs_info *info;
11268         u64 bytenr = 0;
11269         u64 subvolid = 0;
11270         u64 tree_root_bytenr = 0;
11271         u64 chunk_root_bytenr = 0;
11272         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11273         int ret;
11274         u64 num;
11275         int init_csum_tree = 0;
11276         int readonly = 0;
11277         int clear_space_cache = 0;
11278         int qgroup_report = 0;
11279         int qgroups_repaired = 0;
11280         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11281
11282         while(1) {
11283                 int c;
11284                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11285                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11286                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11287                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11288                 static const struct option long_options[] = {
11289                         { "super", required_argument, NULL, 's' },
11290                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11291                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11292                         { "init-csum-tree", no_argument, NULL,
11293                                 GETOPT_VAL_INIT_CSUM },
11294                         { "init-extent-tree", no_argument, NULL,
11295                                 GETOPT_VAL_INIT_EXTENT },
11296                         { "check-data-csum", no_argument, NULL,
11297                                 GETOPT_VAL_CHECK_CSUM },
11298                         { "backup", no_argument, NULL, 'b' },
11299                         { "subvol-extents", required_argument, NULL, 'E' },
11300                         { "qgroup-report", no_argument, NULL, 'Q' },
11301                         { "tree-root", required_argument, NULL, 'r' },
11302                         { "chunk-root", required_argument, NULL,
11303                                 GETOPT_VAL_CHUNK_TREE },
11304                         { "progress", no_argument, NULL, 'p' },
11305                         { "mode", required_argument, NULL,
11306                                 GETOPT_VAL_MODE },
11307                         { "clear-space-cache", required_argument, NULL,
11308                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11309                         { NULL, 0, NULL, 0}
11310                 };
11311
11312                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11313                 if (c < 0)
11314                         break;
11315                 switch(c) {
11316                         case 'a': /* ignored */ break;
11317                         case 'b':
11318                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11319                                 break;
11320                         case 's':
11321                                 num = arg_strtou64(optarg);
11322                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11323                                         error(
11324                                         "super mirror should be less than %d",
11325                                                 BTRFS_SUPER_MIRROR_MAX);
11326                                         exit(1);
11327                                 }
11328                                 bytenr = btrfs_sb_offset(((int)num));
11329                                 printf("using SB copy %llu, bytenr %llu\n", num,
11330                                        (unsigned long long)bytenr);
11331                                 break;
11332                         case 'Q':
11333                                 qgroup_report = 1;
11334                                 break;
11335                         case 'E':
11336                                 subvolid = arg_strtou64(optarg);
11337                                 break;
11338                         case 'r':
11339                                 tree_root_bytenr = arg_strtou64(optarg);
11340                                 break;
11341                         case GETOPT_VAL_CHUNK_TREE:
11342                                 chunk_root_bytenr = arg_strtou64(optarg);
11343                                 break;
11344                         case 'p':
11345                                 ctx.progress_enabled = true;
11346                                 break;
11347                         case '?':
11348                         case 'h':
11349                                 usage(cmd_check_usage);
11350                         case GETOPT_VAL_REPAIR:
11351                                 printf("enabling repair mode\n");
11352                                 repair = 1;
11353                                 ctree_flags |= OPEN_CTREE_WRITES;
11354                                 break;
11355                         case GETOPT_VAL_READONLY:
11356                                 readonly = 1;
11357                                 break;
11358                         case GETOPT_VAL_INIT_CSUM:
11359                                 printf("Creating a new CRC tree\n");
11360                                 init_csum_tree = 1;
11361                                 repair = 1;
11362                                 ctree_flags |= OPEN_CTREE_WRITES;
11363                                 break;
11364                         case GETOPT_VAL_INIT_EXTENT:
11365                                 init_extent_tree = 1;
11366                                 ctree_flags |= (OPEN_CTREE_WRITES |
11367                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11368                                 repair = 1;
11369                                 break;
11370                         case GETOPT_VAL_CHECK_CSUM:
11371                                 check_data_csum = 1;
11372                                 break;
11373                         case GETOPT_VAL_MODE:
11374                                 check_mode = parse_check_mode(optarg);
11375                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11376                                         error("unknown mode: %s", optarg);
11377                                         exit(1);
11378                                 }
11379                                 break;
11380                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11381                                 if (strcmp(optarg, "v1") != 0) {
11382                                         error(
11383                         "only v1 support implmented, unrecognized value %s",
11384                         optarg);
11385                                         exit(1);
11386                                 }
11387                                 clear_space_cache = 1;
11388                                 ctree_flags |= OPEN_CTREE_WRITES;
11389                                 break;
11390                 }
11391         }
11392
11393         if (check_argc_exact(argc - optind, 1))
11394                 usage(cmd_check_usage);
11395
11396         if (ctx.progress_enabled) {
11397                 ctx.tp = TASK_NOTHING;
11398                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11399         }
11400
11401         /* This check is the only reason for --readonly to exist */
11402         if (readonly && repair) {
11403                 error("repair options are not compatible with --readonly");
11404                 exit(1);
11405         }
11406
11407         /*
11408          * Not supported yet
11409          */
11410         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11411                 error("low memory mode doesn't support repair yet");
11412                 exit(1);
11413         }
11414
11415         radix_tree_init();
11416         cache_tree_init(&root_cache);
11417
11418         if((ret = check_mounted(argv[optind])) < 0) {
11419                 error("could not check mount status: %s", strerror(-ret));
11420                 goto err_out;
11421         } else if(ret) {
11422                 error("%s is currently mounted, aborting", argv[optind]);
11423                 ret = -EBUSY;
11424                 goto err_out;
11425         }
11426
11427         /* only allow partial opening under repair mode */
11428         if (repair)
11429                 ctree_flags |= OPEN_CTREE_PARTIAL;
11430
11431         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11432                                   chunk_root_bytenr, ctree_flags);
11433         if (!info) {
11434                 error("cannot open file system");
11435                 ret = -EIO;
11436                 goto err_out;
11437         }
11438
11439         global_info = info;
11440         root = info->fs_root;
11441         if (clear_space_cache) {
11442                 if (btrfs_fs_compat_ro(info,
11443                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11444                         error(
11445                         "free space cache v2 detected, clearing not implemented");
11446                         ret = 1;
11447                         goto close_out;
11448                 }
11449                 printf("Clearing free space cache\n");
11450                 ret = clear_free_space_cache(info);
11451                 if (ret) {
11452                         error("failed to clear free space cache");
11453                         ret = 1;
11454                 } else {
11455                         printf("Free space cache cleared\n");
11456                 }
11457                 goto close_out;
11458         }
11459
11460         /*
11461          * repair mode will force us to commit transaction which
11462          * will make us fail to load log tree when mounting.
11463          */
11464         if (repair && btrfs_super_log_root(info->super_copy)) {
11465                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11466                 if (!ret) {
11467                         ret = 1;
11468                         goto close_out;
11469                 }
11470                 ret = zero_log_tree(root);
11471                 if (ret) {
11472                         error("failed to zero log tree: %d", ret);
11473                         goto close_out;
11474                 }
11475         }
11476
11477         uuid_unparse(info->super_copy->fsid, uuidbuf);
11478         if (qgroup_report) {
11479                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11480                        uuidbuf);
11481                 ret = qgroup_verify_all(info);
11482                 if (ret == 0)
11483                         report_qgroups(1);
11484                 goto close_out;
11485         }
11486         if (subvolid) {
11487                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11488                        subvolid, argv[optind], uuidbuf);
11489                 ret = print_extent_state(info, subvolid);
11490                 goto close_out;
11491         }
11492         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11493
11494         if (!extent_buffer_uptodate(info->tree_root->node) ||
11495             !extent_buffer_uptodate(info->dev_root->node) ||
11496             !extent_buffer_uptodate(info->chunk_root->node)) {
11497                 error("critical roots corrupted, unable to check the filesystem");
11498                 ret = -EIO;
11499                 goto close_out;
11500         }
11501
11502         if (init_extent_tree || init_csum_tree) {
11503                 struct btrfs_trans_handle *trans;
11504
11505                 trans = btrfs_start_transaction(info->extent_root, 0);
11506                 if (IS_ERR(trans)) {
11507                         error("error starting transaction");
11508                         ret = PTR_ERR(trans);
11509                         goto close_out;
11510                 }
11511
11512                 if (init_extent_tree) {
11513                         printf("Creating a new extent tree\n");
11514                         ret = reinit_extent_tree(trans, info);
11515                         if (ret)
11516                                 goto close_out;
11517                 }
11518
11519                 if (init_csum_tree) {
11520                         printf("Reinitialize checksum tree\n");
11521                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11522                         if (ret) {
11523                                 error("checksum tree initialization failed: %d",
11524                                                 ret);
11525                                 ret = -EIO;
11526                                 goto close_out;
11527                         }
11528
11529                         ret = fill_csum_tree(trans, info->csum_root,
11530                                              init_extent_tree);
11531                         if (ret) {
11532                                 error("checksum tree refilling failed: %d", ret);
11533                                 return -EIO;
11534                         }
11535                 }
11536                 /*
11537                  * Ok now we commit and run the normal fsck, which will add
11538                  * extent entries for all of the items it finds.
11539                  */
11540                 ret = btrfs_commit_transaction(trans, info->extent_root);
11541                 if (ret)
11542                         goto close_out;
11543         }
11544         if (!extent_buffer_uptodate(info->extent_root->node)) {
11545                 error("critical: extent_root, unable to check the filesystem");
11546                 ret = -EIO;
11547                 goto close_out;
11548         }
11549         if (!extent_buffer_uptodate(info->csum_root->node)) {
11550                 error("critical: csum_root, unable to check the filesystem");
11551                 ret = -EIO;
11552                 goto close_out;
11553         }
11554
11555         if (!ctx.progress_enabled)
11556                 printf("checking extents");
11557         if (check_mode == CHECK_MODE_LOWMEM)
11558                 ret = check_chunks_and_extents_v2(root);
11559         else
11560                 ret = check_chunks_and_extents(root);
11561         if (ret)
11562                 printf("Errors found in extent allocation tree or chunk allocation");
11563
11564         ret = repair_root_items(info);
11565         if (ret < 0)
11566                 goto close_out;
11567         if (repair) {
11568                 fprintf(stderr, "Fixed %d roots.\n", ret);
11569                 ret = 0;
11570         } else if (ret > 0) {
11571                 fprintf(stderr,
11572                        "Found %d roots with an outdated root item.\n",
11573                        ret);
11574                 fprintf(stderr,
11575                         "Please run a filesystem check with the option --repair to fix them.\n");
11576                 ret = 1;
11577                 goto close_out;
11578         }
11579
11580         if (!ctx.progress_enabled) {
11581                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11582                         fprintf(stderr, "checking free space tree\n");
11583                 else
11584                         fprintf(stderr, "checking free space cache\n");
11585         }
11586         ret = check_space_cache(root);
11587         if (ret)
11588                 goto out;
11589
11590         /*
11591          * We used to have to have these hole extents in between our real
11592          * extents so if we don't have this flag set we need to make sure there
11593          * are no gaps in the file extents for inodes, otherwise we can just
11594          * ignore it when this happens.
11595          */
11596         no_holes = btrfs_fs_incompat(root->fs_info,
11597                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11598         if (!ctx.progress_enabled)
11599                 fprintf(stderr, "checking fs roots\n");
11600         ret = check_fs_roots(root, &root_cache);
11601         if (ret)
11602                 goto out;
11603
11604         fprintf(stderr, "checking csums\n");
11605         ret = check_csums(root);
11606         if (ret)
11607                 goto out;
11608
11609         fprintf(stderr, "checking root refs\n");
11610         ret = check_root_refs(root, &root_cache);
11611         if (ret)
11612                 goto out;
11613
11614         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11615                 struct extent_buffer *eb;
11616
11617                 eb = list_first_entry(&root->fs_info->recow_ebs,
11618                                       struct extent_buffer, recow);
11619                 list_del_init(&eb->recow);
11620                 ret = recow_extent_buffer(root, eb);
11621                 if (ret)
11622                         break;
11623         }
11624
11625         while (!list_empty(&delete_items)) {
11626                 struct bad_item *bad;
11627
11628                 bad = list_first_entry(&delete_items, struct bad_item, list);
11629                 list_del_init(&bad->list);
11630                 if (repair)
11631                         ret = delete_bad_item(root, bad);
11632                 free(bad);
11633         }
11634
11635         if (info->quota_enabled) {
11636                 int err;
11637                 fprintf(stderr, "checking quota groups\n");
11638                 err = qgroup_verify_all(info);
11639                 if (err)
11640                         goto out;
11641                 report_qgroups(0);
11642                 err = repair_qgroups(info, &qgroups_repaired);
11643                 if (err)
11644                         goto out;
11645         }
11646
11647         if (!list_empty(&root->fs_info->recow_ebs)) {
11648                 error("transid errors in file system");
11649                 ret = 1;
11650         }
11651 out:
11652         /* Don't override original ret */
11653         if (!ret && qgroups_repaired)
11654                 ret = qgroups_repaired;
11655
11656         if (found_old_backref) { /*
11657                  * there was a disk format change when mixed
11658                  * backref was in testing tree. The old format
11659                  * existed about one week.
11660                  */
11661                 printf("\n * Found old mixed backref format. "
11662                        "The old format is not supported! *"
11663                        "\n * Please mount the FS in readonly mode, "
11664                        "backup data and re-format the FS. *\n\n");
11665                 ret = 1;
11666         }
11667         printf("found %llu bytes used err is %d\n",
11668                (unsigned long long)bytes_used, ret);
11669         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11670         printf("total tree bytes: %llu\n",
11671                (unsigned long long)total_btree_bytes);
11672         printf("total fs tree bytes: %llu\n",
11673                (unsigned long long)total_fs_tree_bytes);
11674         printf("total extent tree bytes: %llu\n",
11675                (unsigned long long)total_extent_tree_bytes);
11676         printf("btree space waste bytes: %llu\n",
11677                (unsigned long long)btree_space_waste);
11678         printf("file data blocks allocated: %llu\n referenced %llu\n",
11679                 (unsigned long long)data_bytes_allocated,
11680                 (unsigned long long)data_bytes_referenced);
11681
11682         free_qgroup_counts();
11683         free_root_recs_tree(&root_cache);
11684 close_out:
11685         close_ctree(root);
11686 err_out:
11687         if (ctx.progress_enabled)
11688                 task_deinit(ctx.info);
11689
11690         return ret;
11691 }