btrfs-progs: check: use on-stack path buffer in try_repair_inode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         u8 filetype;
189         u8 ref_type;
190         int errors;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         struct rb_node *rb;
663         size_t size;
664         int ret;
665
666         rec = malloc(sizeof(*rec));
667         if (!rec)
668                 return ERR_PTR(-ENOMEM);
669         memcpy(rec, orig_rec, sizeof(*rec));
670         rec->refs = 1;
671         INIT_LIST_HEAD(&rec->backrefs);
672         INIT_LIST_HEAD(&rec->orphan_extents);
673         rec->holes = RB_ROOT;
674
675         list_for_each_entry(orig, &orig_rec->backrefs, list) {
676                 size = sizeof(*orig) + orig->namelen + 1;
677                 backref = malloc(size);
678                 if (!backref) {
679                         ret = -ENOMEM;
680                         goto cleanup;
681                 }
682                 memcpy(backref, orig, size);
683                 list_add_tail(&backref->list, &rec->backrefs);
684         }
685         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686                 dst_orphan = malloc(sizeof(*dst_orphan));
687                 if (!dst_orphan) {
688                         ret = -ENOMEM;
689                         goto cleanup;
690                 }
691                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693         }
694         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
695         if (ret < 0)
696                 goto cleanup_rb;
697
698         return rec;
699
700 cleanup_rb:
701         rb = rb_first(&rec->holes);
702         while (rb) {
703                 struct file_extent_hole *hole;
704
705                 hole = rb_entry(rb, struct file_extent_hole, node);
706                 rb = rb_next(rb);
707                 free(hole);
708         }
709
710 cleanup:
711         if (!list_empty(&rec->backrefs))
712                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713                         list_del(&orig->list);
714                         free(orig);
715                 }
716
717         if (!list_empty(&rec->orphan_extents))
718                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719                         list_del(&orig->list);
720                         free(orig);
721                 }
722
723         free(rec);
724
725         return ERR_PTR(ret);
726 }
727
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
729                                       u64 objectid)
730 {
731         struct orphan_data_extent *orphan;
732
733         if (list_empty(orphan_extents))
734                 return;
735         printf("The following data extent is lost in tree %llu:\n",
736                objectid);
737         list_for_each_entry(orphan, orphan_extents, list) {
738                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
740                        orphan->disk_len);
741         }
742 }
743
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
745 {
746         u64 root_objectid = root->root_key.objectid;
747         int errors = rec->errors;
748
749         if (!errors)
750                 return;
751         /* reloc root errors, we print its corresponding fs root objectid*/
752         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753                 root_objectid = root->root_key.offset;
754                 fprintf(stderr, "reloc");
755         }
756         fprintf(stderr, "root %llu inode %llu errors %x",
757                 (unsigned long long) root_objectid,
758                 (unsigned long long) rec->ino, rec->errors);
759
760         if (errors & I_ERR_NO_INODE_ITEM)
761                 fprintf(stderr, ", no inode item");
762         if (errors & I_ERR_NO_ORPHAN_ITEM)
763                 fprintf(stderr, ", no orphan item");
764         if (errors & I_ERR_DUP_INODE_ITEM)
765                 fprintf(stderr, ", dup inode item");
766         if (errors & I_ERR_DUP_DIR_INDEX)
767                 fprintf(stderr, ", dup dir index");
768         if (errors & I_ERR_ODD_DIR_ITEM)
769                 fprintf(stderr, ", odd dir item");
770         if (errors & I_ERR_ODD_FILE_EXTENT)
771                 fprintf(stderr, ", odd file extent");
772         if (errors & I_ERR_BAD_FILE_EXTENT)
773                 fprintf(stderr, ", bad file extent");
774         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775                 fprintf(stderr, ", file extent overlap");
776         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777                 fprintf(stderr, ", file extent discount");
778         if (errors & I_ERR_DIR_ISIZE_WRONG)
779                 fprintf(stderr, ", dir isize wrong");
780         if (errors & I_ERR_FILE_NBYTES_WRONG)
781                 fprintf(stderr, ", nbytes wrong");
782         if (errors & I_ERR_ODD_CSUM_ITEM)
783                 fprintf(stderr, ", odd csum item");
784         if (errors & I_ERR_SOME_CSUM_MISSING)
785                 fprintf(stderr, ", some csum missing");
786         if (errors & I_ERR_LINK_COUNT_WRONG)
787                 fprintf(stderr, ", link count wrong");
788         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789                 fprintf(stderr, ", orphan file extent");
790         fprintf(stderr, "\n");
791         /* Print the orphan extents if needed */
792         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
794
795         /* Print the holes if needed */
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797                 struct file_extent_hole *hole;
798                 struct rb_node *node;
799                 int found = 0;
800
801                 node = rb_first(&rec->holes);
802                 fprintf(stderr, "Found file extent holes:\n");
803                 while (node) {
804                         found = 1;
805                         hole = rb_entry(node, struct file_extent_hole, node);
806                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
807                                 hole->start, hole->len);
808                         node = rb_next(node);
809                 }
810                 if (!found)
811                         fprintf(stderr, "\tstart: 0, len: %llu\n",
812                                 round_up(rec->isize, root->sectorsize));
813         }
814 }
815
816 static void print_ref_error(int errors)
817 {
818         if (errors & REF_ERR_NO_DIR_ITEM)
819                 fprintf(stderr, ", no dir item");
820         if (errors & REF_ERR_NO_DIR_INDEX)
821                 fprintf(stderr, ", no dir index");
822         if (errors & REF_ERR_NO_INODE_REF)
823                 fprintf(stderr, ", no inode ref");
824         if (errors & REF_ERR_DUP_DIR_ITEM)
825                 fprintf(stderr, ", dup dir item");
826         if (errors & REF_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & REF_ERR_DUP_INODE_REF)
829                 fprintf(stderr, ", dup inode ref");
830         if (errors & REF_ERR_INDEX_UNMATCH)
831                 fprintf(stderr, ", index mismatch");
832         if (errors & REF_ERR_FILETYPE_UNMATCH)
833                 fprintf(stderr, ", filetype mismatch");
834         if (errors & REF_ERR_NAME_TOO_LONG)
835                 fprintf(stderr, ", name too long");
836         if (errors & REF_ERR_NO_ROOT_REF)
837                 fprintf(stderr, ", no root ref");
838         if (errors & REF_ERR_NO_ROOT_BACKREF)
839                 fprintf(stderr, ", no root backref");
840         if (errors & REF_ERR_DUP_ROOT_REF)
841                 fprintf(stderr, ", dup root ref");
842         if (errors & REF_ERR_DUP_ROOT_BACKREF)
843                 fprintf(stderr, ", dup root backref");
844         fprintf(stderr, "\n");
845 }
846
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
848                                           u64 ino, int mod)
849 {
850         struct ptr_node *node;
851         struct cache_extent *cache;
852         struct inode_record *rec = NULL;
853         int ret;
854
855         cache = lookup_cache_extent(inode_cache, ino, 1);
856         if (cache) {
857                 node = container_of(cache, struct ptr_node, cache);
858                 rec = node->data;
859                 if (mod && rec->refs > 1) {
860                         node->data = clone_inode_rec(rec);
861                         if (IS_ERR(node->data))
862                                 return node->data;
863                         rec->refs--;
864                         rec = node->data;
865                 }
866         } else if (mod) {
867                 rec = calloc(1, sizeof(*rec));
868                 if (!rec)
869                         return ERR_PTR(-ENOMEM);
870                 rec->ino = ino;
871                 rec->extent_start = (u64)-1;
872                 rec->refs = 1;
873                 INIT_LIST_HEAD(&rec->backrefs);
874                 INIT_LIST_HEAD(&rec->orphan_extents);
875                 rec->holes = RB_ROOT;
876
877                 node = malloc(sizeof(*node));
878                 if (!node) {
879                         free(rec);
880                         return ERR_PTR(-ENOMEM);
881                 }
882                 node->cache.start = ino;
883                 node->cache.size = 1;
884                 node->data = rec;
885
886                 if (ino == BTRFS_FREE_INO_OBJECTID)
887                         rec->found_link = 1;
888
889                 ret = insert_cache_extent(inode_cache, &node->cache);
890                 if (ret)
891                         return ERR_PTR(-EEXIST);
892         }
893         return rec;
894 }
895
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
897 {
898         struct orphan_data_extent *orphan;
899
900         while (!list_empty(orphan_extents)) {
901                 orphan = list_entry(orphan_extents->next,
902                                     struct orphan_data_extent, list);
903                 list_del(&orphan->list);
904                 free(orphan);
905         }
906 }
907
908 static void free_inode_rec(struct inode_record *rec)
909 {
910         struct inode_backref *backref;
911
912         if (--rec->refs > 0)
913                 return;
914
915         while (!list_empty(&rec->backrefs)) {
916                 backref = to_inode_backref(rec->backrefs.next);
917                 list_del(&backref->list);
918                 free(backref);
919         }
920         free_orphan_data_extents(&rec->orphan_extents);
921         free_file_extent_holes(&rec->holes);
922         free(rec);
923 }
924
925 static int can_free_inode_rec(struct inode_record *rec)
926 {
927         if (!rec->errors && rec->checked && rec->found_inode_item &&
928             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
929                 return 1;
930         return 0;
931 }
932
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934                                  struct inode_record *rec)
935 {
936         struct cache_extent *cache;
937         struct inode_backref *tmp, *backref;
938         struct ptr_node *node;
939         u8 filetype;
940
941         if (!rec->found_inode_item)
942                 return;
943
944         filetype = imode_to_type(rec->imode);
945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946                 if (backref->found_dir_item && backref->found_dir_index) {
947                         if (backref->filetype != filetype)
948                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949                         if (!backref->errors && backref->found_inode_ref &&
950                             rec->nlink == rec->found_link) {
951                                 list_del(&backref->list);
952                                 free(backref);
953                         }
954                 }
955         }
956
957         if (!rec->checked || rec->merging)
958                 return;
959
960         if (S_ISDIR(rec->imode)) {
961                 if (rec->found_size != rec->isize)
962                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963                 if (rec->found_file_extent)
964                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
965         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966                 if (rec->found_dir_item)
967                         rec->errors |= I_ERR_ODD_DIR_ITEM;
968                 if (rec->found_size != rec->nbytes)
969                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970                 if (rec->nlink > 0 && !no_holes &&
971                     (rec->extent_end < rec->isize ||
972                      first_extent_gap(&rec->holes) < rec->isize))
973                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
974         }
975
976         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977                 if (rec->found_csum_item && rec->nodatasum)
978                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
979                 if (rec->some_csum_missing && !rec->nodatasum)
980                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
981         }
982
983         BUG_ON(rec->refs != 1);
984         if (can_free_inode_rec(rec)) {
985                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986                 node = container_of(cache, struct ptr_node, cache);
987                 BUG_ON(node->data != rec);
988                 remove_cache_extent(inode_cache, &node->cache);
989                 free(node);
990                 free_inode_rec(rec);
991         }
992 }
993
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
995 {
996         struct btrfs_path path;
997         struct btrfs_key key;
998         int ret;
999
1000         key.objectid = BTRFS_ORPHAN_OBJECTID;
1001         key.type = BTRFS_ORPHAN_ITEM_KEY;
1002         key.offset = ino;
1003
1004         btrfs_init_path(&path);
1005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006         btrfs_release_path(&path);
1007         if (ret > 0)
1008                 ret = -ENOENT;
1009         return ret;
1010 }
1011
1012 static int process_inode_item(struct extent_buffer *eb,
1013                               int slot, struct btrfs_key *key,
1014                               struct shared_node *active_node)
1015 {
1016         struct inode_record *rec;
1017         struct btrfs_inode_item *item;
1018
1019         rec = active_node->current;
1020         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021         if (rec->found_inode_item) {
1022                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1023                 return 1;
1024         }
1025         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026         rec->nlink = btrfs_inode_nlink(eb, item);
1027         rec->isize = btrfs_inode_size(eb, item);
1028         rec->nbytes = btrfs_inode_nbytes(eb, item);
1029         rec->imode = btrfs_inode_mode(eb, item);
1030         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1031                 rec->nodatasum = 1;
1032         rec->found_inode_item = 1;
1033         if (rec->nlink == 0)
1034                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035         maybe_free_inode_rec(&active_node->inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1040                                                 const char *name,
1041                                                 int namelen, u64 dir)
1042 {
1043         struct inode_backref *backref;
1044
1045         list_for_each_entry(backref, &rec->backrefs, list) {
1046                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1047                         break;
1048                 if (backref->dir != dir || backref->namelen != namelen)
1049                         continue;
1050                 if (memcmp(name, backref->name, namelen))
1051                         continue;
1052                 return backref;
1053         }
1054
1055         backref = malloc(sizeof(*backref) + namelen + 1);
1056         if (!backref)
1057                 return NULL;
1058         memset(backref, 0, sizeof(*backref));
1059         backref->dir = dir;
1060         backref->namelen = namelen;
1061         memcpy(backref->name, name, namelen);
1062         backref->name[namelen] = '\0';
1063         list_add_tail(&backref->list, &rec->backrefs);
1064         return backref;
1065 }
1066
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068                              u64 ino, u64 dir, u64 index,
1069                              const char *name, int namelen,
1070                              u8 filetype, u8 itemtype, int errors)
1071 {
1072         struct inode_record *rec;
1073         struct inode_backref *backref;
1074
1075         rec = get_inode_rec(inode_cache, ino, 1);
1076         BUG_ON(IS_ERR(rec));
1077         backref = get_inode_backref(rec, name, namelen, dir);
1078         BUG_ON(!backref);
1079         if (errors)
1080                 backref->errors |= errors;
1081         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082                 if (backref->found_dir_index)
1083                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084                 if (backref->found_inode_ref && backref->index != index)
1085                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1086                 if (backref->found_dir_item && backref->filetype != filetype)
1087                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1088
1089                 backref->index = index;
1090                 backref->filetype = filetype;
1091                 backref->found_dir_index = 1;
1092         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1093                 rec->found_link++;
1094                 if (backref->found_dir_item)
1095                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096                 if (backref->found_dir_index && backref->filetype != filetype)
1097                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1098
1099                 backref->filetype = filetype;
1100                 backref->found_dir_item = 1;
1101         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103                 if (backref->found_inode_ref)
1104                         backref->errors |= REF_ERR_DUP_INODE_REF;
1105                 if (backref->found_dir_index && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 else
1108                         backref->index = index;
1109
1110                 backref->ref_type = itemtype;
1111                 backref->found_inode_ref = 1;
1112         } else {
1113                 BUG_ON(1);
1114         }
1115
1116         maybe_free_inode_rec(inode_cache, rec);
1117         return 0;
1118 }
1119
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121                             struct cache_tree *dst_cache)
1122 {
1123         struct inode_backref *backref;
1124         u32 dir_count = 0;
1125         int ret = 0;
1126
1127         dst->merging = 1;
1128         list_for_each_entry(backref, &src->backrefs, list) {
1129                 if (backref->found_dir_index) {
1130                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1131                                         backref->index, backref->name,
1132                                         backref->namelen, backref->filetype,
1133                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1134                 }
1135                 if (backref->found_dir_item) {
1136                         dir_count++;
1137                         add_inode_backref(dst_cache, dst->ino,
1138                                         backref->dir, 0, backref->name,
1139                                         backref->namelen, backref->filetype,
1140                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1141                 }
1142                 if (backref->found_inode_ref) {
1143                         add_inode_backref(dst_cache, dst->ino,
1144                                         backref->dir, backref->index,
1145                                         backref->name, backref->namelen, 0,
1146                                         backref->ref_type, backref->errors);
1147                 }
1148         }
1149
1150         if (src->found_dir_item)
1151                 dst->found_dir_item = 1;
1152         if (src->found_file_extent)
1153                 dst->found_file_extent = 1;
1154         if (src->found_csum_item)
1155                 dst->found_csum_item = 1;
1156         if (src->some_csum_missing)
1157                 dst->some_csum_missing = 1;
1158         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1160                 if (ret < 0)
1161                         return ret;
1162         }
1163
1164         BUG_ON(src->found_link < dir_count);
1165         dst->found_link += src->found_link - dir_count;
1166         dst->found_size += src->found_size;
1167         if (src->extent_start != (u64)-1) {
1168                 if (dst->extent_start == (u64)-1) {
1169                         dst->extent_start = src->extent_start;
1170                         dst->extent_end = src->extent_end;
1171                 } else {
1172                         if (dst->extent_end > src->extent_start)
1173                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174                         else if (dst->extent_end < src->extent_start) {
1175                                 ret = add_file_extent_hole(&dst->holes,
1176                                         dst->extent_end,
1177                                         src->extent_start - dst->extent_end);
1178                         }
1179                         if (dst->extent_end < src->extent_end)
1180                                 dst->extent_end = src->extent_end;
1181                 }
1182         }
1183
1184         dst->errors |= src->errors;
1185         if (src->found_inode_item) {
1186                 if (!dst->found_inode_item) {
1187                         dst->nlink = src->nlink;
1188                         dst->isize = src->isize;
1189                         dst->nbytes = src->nbytes;
1190                         dst->imode = src->imode;
1191                         dst->nodatasum = src->nodatasum;
1192                         dst->found_inode_item = 1;
1193                 } else {
1194                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1195                 }
1196         }
1197         dst->merging = 0;
1198
1199         return 0;
1200 }
1201
1202 static int splice_shared_node(struct shared_node *src_node,
1203                               struct shared_node *dst_node)
1204 {
1205         struct cache_extent *cache;
1206         struct ptr_node *node, *ins;
1207         struct cache_tree *src, *dst;
1208         struct inode_record *rec, *conflict;
1209         u64 current_ino = 0;
1210         int splice = 0;
1211         int ret;
1212
1213         if (--src_node->refs == 0)
1214                 splice = 1;
1215         if (src_node->current)
1216                 current_ino = src_node->current->ino;
1217
1218         src = &src_node->root_cache;
1219         dst = &dst_node->root_cache;
1220 again:
1221         cache = search_cache_extent(src, 0);
1222         while (cache) {
1223                 node = container_of(cache, struct ptr_node, cache);
1224                 rec = node->data;
1225                 cache = next_cache_extent(cache);
1226
1227                 if (splice) {
1228                         remove_cache_extent(src, &node->cache);
1229                         ins = node;
1230                 } else {
1231                         ins = malloc(sizeof(*ins));
1232                         BUG_ON(!ins);
1233                         ins->cache.start = node->cache.start;
1234                         ins->cache.size = node->cache.size;
1235                         ins->data = rec;
1236                         rec->refs++;
1237                 }
1238                 ret = insert_cache_extent(dst, &ins->cache);
1239                 if (ret == -EEXIST) {
1240                         conflict = get_inode_rec(dst, rec->ino, 1);
1241                         BUG_ON(IS_ERR(conflict));
1242                         merge_inode_recs(rec, conflict, dst);
1243                         if (rec->checked) {
1244                                 conflict->checked = 1;
1245                                 if (dst_node->current == conflict)
1246                                         dst_node->current = NULL;
1247                         }
1248                         maybe_free_inode_rec(dst, conflict);
1249                         free_inode_rec(rec);
1250                         free(ins);
1251                 } else {
1252                         BUG_ON(ret);
1253                 }
1254         }
1255
1256         if (src == &src_node->root_cache) {
1257                 src = &src_node->inode_cache;
1258                 dst = &dst_node->inode_cache;
1259                 goto again;
1260         }
1261
1262         if (current_ino > 0 && (!dst_node->current ||
1263             current_ino > dst_node->current->ino)) {
1264                 if (dst_node->current) {
1265                         dst_node->current->checked = 1;
1266                         maybe_free_inode_rec(dst, dst_node->current);
1267                 }
1268                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269                 BUG_ON(IS_ERR(dst_node->current));
1270         }
1271         return 0;
1272 }
1273
1274 static void free_inode_ptr(struct cache_extent *cache)
1275 {
1276         struct ptr_node *node;
1277         struct inode_record *rec;
1278
1279         node = container_of(cache, struct ptr_node, cache);
1280         rec = node->data;
1281         free_inode_rec(rec);
1282         free(node);
1283 }
1284
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1286
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1288                                             u64 bytenr)
1289 {
1290         struct cache_extent *cache;
1291         struct shared_node *node;
1292
1293         cache = lookup_cache_extent(shared, bytenr, 1);
1294         if (cache) {
1295                 node = container_of(cache, struct shared_node, cache);
1296                 return node;
1297         }
1298         return NULL;
1299 }
1300
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1302 {
1303         int ret;
1304         struct shared_node *node;
1305
1306         node = calloc(1, sizeof(*node));
1307         if (!node)
1308                 return -ENOMEM;
1309         node->cache.start = bytenr;
1310         node->cache.size = 1;
1311         cache_tree_init(&node->root_cache);
1312         cache_tree_init(&node->inode_cache);
1313         node->refs = refs;
1314
1315         ret = insert_cache_extent(shared, &node->cache);
1316
1317         return ret;
1318 }
1319
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321                              struct walk_control *wc, int level)
1322 {
1323         struct shared_node *node;
1324         struct shared_node *dest;
1325         int ret;
1326
1327         if (level == wc->active_node)
1328                 return 0;
1329
1330         BUG_ON(wc->active_node <= level);
1331         node = find_shared_node(&wc->shared, bytenr);
1332         if (!node) {
1333                 ret = add_shared_node(&wc->shared, bytenr, refs);
1334                 BUG_ON(ret);
1335                 node = find_shared_node(&wc->shared, bytenr);
1336                 wc->nodes[level] = node;
1337                 wc->active_node = level;
1338                 return 0;
1339         }
1340
1341         if (wc->root_level == wc->active_node &&
1342             btrfs_root_refs(&root->root_item) == 0) {
1343                 if (--node->refs == 0) {
1344                         free_inode_recs_tree(&node->root_cache);
1345                         free_inode_recs_tree(&node->inode_cache);
1346                         remove_cache_extent(&wc->shared, &node->cache);
1347                         free(node);
1348                 }
1349                 return 1;
1350         }
1351
1352         dest = wc->nodes[wc->active_node];
1353         splice_shared_node(node, dest);
1354         if (node->refs == 0) {
1355                 remove_cache_extent(&wc->shared, &node->cache);
1356                 free(node);
1357         }
1358         return 1;
1359 }
1360
1361 static int leave_shared_node(struct btrfs_root *root,
1362                              struct walk_control *wc, int level)
1363 {
1364         struct shared_node *node;
1365         struct shared_node *dest;
1366         int i;
1367
1368         if (level == wc->root_level)
1369                 return 0;
1370
1371         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1372                 if (wc->nodes[i])
1373                         break;
1374         }
1375         BUG_ON(i >= BTRFS_MAX_LEVEL);
1376
1377         node = wc->nodes[wc->active_node];
1378         wc->nodes[wc->active_node] = NULL;
1379         wc->active_node = i;
1380
1381         dest = wc->nodes[wc->active_node];
1382         if (wc->active_node < wc->root_level ||
1383             btrfs_root_refs(&root->root_item) > 0) {
1384                 BUG_ON(node->refs <= 1);
1385                 splice_shared_node(node, dest);
1386         } else {
1387                 BUG_ON(node->refs < 2);
1388                 node->refs--;
1389         }
1390         return 0;
1391 }
1392
1393 /*
1394  * Returns:
1395  * < 0 - on error
1396  * 1   - if the root with id child_root_id is a child of root parent_root_id
1397  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1398  *       has other root(s) as parent(s)
1399  * 2   - if the root child_root_id doesn't have any parent roots
1400  */
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1402                          u64 child_root_id)
1403 {
1404         struct btrfs_path path;
1405         struct btrfs_key key;
1406         struct extent_buffer *leaf;
1407         int has_parent = 0;
1408         int ret;
1409
1410         btrfs_init_path(&path);
1411
1412         key.objectid = parent_root_id;
1413         key.type = BTRFS_ROOT_REF_KEY;
1414         key.offset = child_root_id;
1415         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1416                                 0, 0);
1417         if (ret < 0)
1418                 return ret;
1419         btrfs_release_path(&path);
1420         if (!ret)
1421                 return 1;
1422
1423         key.objectid = child_root_id;
1424         key.type = BTRFS_ROOT_BACKREF_KEY;
1425         key.offset = 0;
1426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1427                                 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430
1431         while (1) {
1432                 leaf = path.nodes[0];
1433                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1435                         if (ret)
1436                                 break;
1437                         leaf = path.nodes[0];
1438                 }
1439
1440                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441                 if (key.objectid != child_root_id ||
1442                     key.type != BTRFS_ROOT_BACKREF_KEY)
1443                         break;
1444
1445                 has_parent = 1;
1446
1447                 if (key.offset == parent_root_id) {
1448                         btrfs_release_path(&path);
1449                         return 1;
1450                 }
1451
1452                 path.slots[0]++;
1453         }
1454 out:
1455         btrfs_release_path(&path);
1456         if (ret < 0)
1457                 return ret;
1458         return has_parent ? 0 : 2;
1459 }
1460
1461 static int process_dir_item(struct btrfs_root *root,
1462                             struct extent_buffer *eb,
1463                             int slot, struct btrfs_key *key,
1464                             struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u32 data_len;
1471         int error;
1472         int nritems = 0;
1473         u8 filetype;
1474         struct btrfs_dir_item *di;
1475         struct inode_record *rec;
1476         struct cache_tree *root_cache;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_key location;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         root_cache = &active_node->root_cache;
1482         inode_cache = &active_node->inode_cache;
1483         rec = active_node->current;
1484         rec->found_dir_item = 1;
1485
1486         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 nritems++;
1490                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491                 name_len = btrfs_dir_name_len(eb, di);
1492                 data_len = btrfs_dir_data_len(eb, di);
1493                 filetype = btrfs_dir_type(eb, di);
1494
1495                 rec->found_size += name_len;
1496                 if (name_len <= BTRFS_NAME_LEN) {
1497                         len = name_len;
1498                         error = 0;
1499                 } else {
1500                         len = BTRFS_NAME_LEN;
1501                         error = REF_ERR_NAME_TOO_LONG;
1502                 }
1503                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1504
1505                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506                         add_inode_backref(inode_cache, location.objectid,
1507                                           key->objectid, key->offset, namebuf,
1508                                           len, filetype, key->type, error);
1509                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510                         add_inode_backref(root_cache, location.objectid,
1511                                           key->objectid, key->offset,
1512                                           namebuf, len, filetype,
1513                                           key->type, error);
1514                 } else {
1515                         fprintf(stderr, "invalid location in dir item %u\n",
1516                                 location.type);
1517                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518                                           key->objectid, key->offset, namebuf,
1519                                           len, filetype, key->type, error);
1520                 }
1521
1522                 len = sizeof(*di) + name_len + data_len;
1523                 di = (struct btrfs_dir_item *)((char *)di + len);
1524                 cur += len;
1525         }
1526         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1528
1529         return 0;
1530 }
1531
1532 static int process_inode_ref(struct extent_buffer *eb,
1533                              int slot, struct btrfs_key *key,
1534                              struct shared_node *active_node)
1535 {
1536         u32 total;
1537         u32 cur = 0;
1538         u32 len;
1539         u32 name_len;
1540         u64 index;
1541         int error;
1542         struct cache_tree *inode_cache;
1543         struct btrfs_inode_ref *ref;
1544         char namebuf[BTRFS_NAME_LEN];
1545
1546         inode_cache = &active_node->inode_cache;
1547
1548         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549         total = btrfs_item_size_nr(eb, slot);
1550         while (cur < total) {
1551                 name_len = btrfs_inode_ref_name_len(eb, ref);
1552                 index = btrfs_inode_ref_index(eb, ref);
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561                 add_inode_backref(inode_cache, key->objectid, key->offset,
1562                                   index, namebuf, len, 0, key->type, error);
1563
1564                 len = sizeof(*ref) + name_len;
1565                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1566                 cur += len;
1567         }
1568         return 0;
1569 }
1570
1571 static int process_inode_extref(struct extent_buffer *eb,
1572                                 int slot, struct btrfs_key *key,
1573                                 struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u64 index;
1580         u64 parent;
1581         int error;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_inode_extref *extref;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         inode_cache = &active_node->inode_cache;
1587
1588         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589         total = btrfs_item_size_nr(eb, slot);
1590         while (cur < total) {
1591                 name_len = btrfs_inode_extref_name_len(eb, extref);
1592                 index = btrfs_inode_extref_index(eb, extref);
1593                 parent = btrfs_inode_extref_parent(eb, extref);
1594                 if (name_len <= BTRFS_NAME_LEN) {
1595                         len = name_len;
1596                         error = 0;
1597                 } else {
1598                         len = BTRFS_NAME_LEN;
1599                         error = REF_ERR_NAME_TOO_LONG;
1600                 }
1601                 read_extent_buffer(eb, namebuf,
1602                                    (unsigned long)(extref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, parent,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*extref) + name_len;
1607                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611
1612 }
1613
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615                             u64 len, u64 *found)
1616 {
1617         struct btrfs_key key;
1618         struct btrfs_path path;
1619         struct extent_buffer *leaf;
1620         int ret;
1621         size_t size;
1622         *found = 0;
1623         u64 csum_end;
1624         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1625
1626         btrfs_init_path(&path);
1627
1628         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1629         key.offset = start;
1630         key.type = BTRFS_EXTENT_CSUM_KEY;
1631
1632         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1633                                 &key, &path, 0, 0);
1634         if (ret < 0)
1635                 goto out;
1636         if (ret > 0 && path.slots[0] > 0) {
1637                 leaf = path.nodes[0];
1638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640                     key.type == BTRFS_EXTENT_CSUM_KEY)
1641                         path.slots[0]--;
1642         }
1643
1644         while (len > 0) {
1645                 leaf = path.nodes[0];
1646                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1648                         if (ret > 0)
1649                                 break;
1650                         else if (ret < 0)
1651                                 goto out;
1652                         leaf = path.nodes[0];
1653                 }
1654
1655                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657                     key.type != BTRFS_EXTENT_CSUM_KEY)
1658                         break;
1659
1660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661                 if (key.offset >= start + len)
1662                         break;
1663
1664                 if (key.offset > start)
1665                         start = key.offset;
1666
1667                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669                 if (csum_end > start) {
1670                         size = min(csum_end - start, len);
1671                         len -= size;
1672                         start += size;
1673                         *found += size;
1674                 }
1675
1676                 path.slots[0]++;
1677         }
1678 out:
1679         btrfs_release_path(&path);
1680         if (ret < 0)
1681                 return ret;
1682         return 0;
1683 }
1684
1685 static int process_file_extent(struct btrfs_root *root,
1686                                 struct extent_buffer *eb,
1687                                 int slot, struct btrfs_key *key,
1688                                 struct shared_node *active_node)
1689 {
1690         struct inode_record *rec;
1691         struct btrfs_file_extent_item *fi;
1692         u64 num_bytes = 0;
1693         u64 disk_bytenr = 0;
1694         u64 extent_offset = 0;
1695         u64 mask = root->sectorsize - 1;
1696         int extent_type;
1697         int ret;
1698
1699         rec = active_node->current;
1700         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701         rec->found_file_extent = 1;
1702
1703         if (rec->extent_start == (u64)-1) {
1704                 rec->extent_start = key->offset;
1705                 rec->extent_end = key->offset;
1706         }
1707
1708         if (rec->extent_end > key->offset)
1709                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710         else if (rec->extent_end < key->offset) {
1711                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712                                            key->offset - rec->extent_end);
1713                 if (ret < 0)
1714                         return ret;
1715         }
1716
1717         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718         extent_type = btrfs_file_extent_type(eb, fi);
1719
1720         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1722                 if (num_bytes == 0)
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 rec->found_size += num_bytes;
1725                 num_bytes = (num_bytes + mask) & ~mask;
1726         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730                 extent_offset = btrfs_file_extent_offset(eb, fi);
1731                 if (num_bytes == 0 || (num_bytes & mask))
1732                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733                 if (num_bytes + extent_offset >
1734                     btrfs_file_extent_ram_bytes(eb, fi))
1735                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737                     (btrfs_file_extent_compression(eb, fi) ||
1738                      btrfs_file_extent_encryption(eb, fi) ||
1739                      btrfs_file_extent_other_encoding(eb, fi)))
1740                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741                 if (disk_bytenr > 0)
1742                         rec->found_size += num_bytes;
1743         } else {
1744                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1745         }
1746         rec->extent_end = key->offset + num_bytes;
1747
1748         /*
1749          * The data reloc tree will copy full extents into its inode and then
1750          * copy the corresponding csums.  Because the extent it copied could be
1751          * a preallocated extent that hasn't been written to yet there may be no
1752          * csums to copy, ergo we won't have csums for our file extent.  This is
1753          * ok so just don't bother checking csums if the inode belongs to the
1754          * data reloc tree.
1755          */
1756         if (disk_bytenr > 0 &&
1757             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1758                 u64 found;
1759                 if (btrfs_file_extent_compression(eb, fi))
1760                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1761                 else
1762                         disk_bytenr += extent_offset;
1763
1764                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1765                 if (ret < 0)
1766                         return ret;
1767                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1768                         if (found > 0)
1769                                 rec->found_csum_item = 1;
1770                         if (found < num_bytes)
1771                                 rec->some_csum_missing = 1;
1772                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1773                         if (found > 0)
1774                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781                             struct walk_control *wc)
1782 {
1783         struct btrfs_key key;
1784         u32 nritems;
1785         int i;
1786         int ret = 0;
1787         struct cache_tree *inode_cache;
1788         struct shared_node *active_node;
1789
1790         if (wc->root_level == wc->active_node &&
1791             btrfs_root_refs(&root->root_item) == 0)
1792                 return 0;
1793
1794         active_node = wc->nodes[wc->active_node];
1795         inode_cache = &active_node->inode_cache;
1796         nritems = btrfs_header_nritems(eb);
1797         for (i = 0; i < nritems; i++) {
1798                 btrfs_item_key_to_cpu(eb, &key, i);
1799
1800                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1801                         continue;
1802                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1803                         continue;
1804
1805                 if (active_node->current == NULL ||
1806                     active_node->current->ino < key.objectid) {
1807                         if (active_node->current) {
1808                                 active_node->current->checked = 1;
1809                                 maybe_free_inode_rec(inode_cache,
1810                                                      active_node->current);
1811                         }
1812                         active_node->current = get_inode_rec(inode_cache,
1813                                                              key.objectid, 1);
1814                         BUG_ON(IS_ERR(active_node->current));
1815                 }
1816                 switch (key.type) {
1817                 case BTRFS_DIR_ITEM_KEY:
1818                 case BTRFS_DIR_INDEX_KEY:
1819                         ret = process_dir_item(root, eb, i, &key, active_node);
1820                         break;
1821                 case BTRFS_INODE_REF_KEY:
1822                         ret = process_inode_ref(eb, i, &key, active_node);
1823                         break;
1824                 case BTRFS_INODE_EXTREF_KEY:
1825                         ret = process_inode_extref(eb, i, &key, active_node);
1826                         break;
1827                 case BTRFS_INODE_ITEM_KEY:
1828                         ret = process_inode_item(eb, i, &key, active_node);
1829                         break;
1830                 case BTRFS_EXTENT_DATA_KEY:
1831                         ret = process_file_extent(root, eb, i, &key,
1832                                                   active_node);
1833                         break;
1834                 default:
1835                         break;
1836                 };
1837         }
1838         return ret;
1839 }
1840
1841 static void reada_walk_down(struct btrfs_root *root,
1842                             struct extent_buffer *node, int slot)
1843 {
1844         u64 bytenr;
1845         u64 ptr_gen;
1846         u32 nritems;
1847         u32 blocksize;
1848         int i;
1849         int level;
1850
1851         level = btrfs_header_level(node);
1852         if (level != 1)
1853                 return;
1854
1855         nritems = btrfs_header_nritems(node);
1856         blocksize = root->nodesize;
1857         for (i = slot; i < nritems; i++) {
1858                 bytenr = btrfs_node_blockptr(node, i);
1859                 ptr_gen = btrfs_node_ptr_generation(node, i);
1860                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1861         }
1862 }
1863
1864 /*
1865  * Check the child node/leaf by the following condition:
1866  * 1. the first item key of the node/leaf should be the same with the one
1867  *    in parent.
1868  * 2. block in parent node should match the child node/leaf.
1869  * 3. generation of parent node and child's header should be consistent.
1870  *
1871  * Or the child node/leaf pointed by the key in parent is not valid.
1872  *
1873  * We hope to check leaf owner too, but since subvol may share leaves,
1874  * which makes leaf owner check not so strong, key check should be
1875  * sufficient enough for that case.
1876  */
1877 static int check_child_node(struct btrfs_root *root,
1878                             struct extent_buffer *parent, int slot,
1879                             struct extent_buffer *child)
1880 {
1881         struct btrfs_key parent_key;
1882         struct btrfs_key child_key;
1883         int ret = 0;
1884
1885         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886         if (btrfs_header_level(child) == 0)
1887                 btrfs_item_key_to_cpu(child, &child_key, 0);
1888         else
1889                 btrfs_node_key_to_cpu(child, &child_key, 0);
1890
1891         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1892                 ret = -EINVAL;
1893                 fprintf(stderr,
1894                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895                         parent_key.objectid, parent_key.type, parent_key.offset,
1896                         child_key.objectid, child_key.type, child_key.offset);
1897         }
1898         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1899                 ret = -EINVAL;
1900                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901                         btrfs_node_blockptr(parent, slot),
1902                         btrfs_header_bytenr(child));
1903         }
1904         if (btrfs_node_ptr_generation(parent, slot) !=
1905             btrfs_header_generation(child)) {
1906                 ret = -EINVAL;
1907                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908                         btrfs_header_generation(child),
1909                         btrfs_node_ptr_generation(parent, slot));
1910         }
1911         return ret;
1912 }
1913
1914 struct node_refs {
1915         u64 bytenr[BTRFS_MAX_LEVEL];
1916         u64 refs[BTRFS_MAX_LEVEL];
1917 };
1918
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920                           struct walk_control *wc, int *level,
1921                           struct node_refs *nrefs)
1922 {
1923         enum btrfs_tree_block_status status;
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         struct extent_buffer *next;
1927         struct extent_buffer *cur;
1928         u32 blocksize;
1929         int ret, err = 0;
1930         u64 refs;
1931
1932         WARN_ON(*level < 0);
1933         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1934
1935         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936                 refs = nrefs->refs[*level];
1937                 ret = 0;
1938         } else {
1939                 ret = btrfs_lookup_extent_info(NULL, root,
1940                                        path->nodes[*level]->start,
1941                                        *level, 1, &refs, NULL);
1942                 if (ret < 0) {
1943                         err = ret;
1944                         goto out;
1945                 }
1946                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947                 nrefs->refs[*level] = refs;
1948         }
1949
1950         if (refs > 1) {
1951                 ret = enter_shared_node(root, path->nodes[*level]->start,
1952                                         refs, wc, *level);
1953                 if (ret > 0) {
1954                         err = ret;
1955                         goto out;
1956                 }
1957         }
1958
1959         while (*level >= 0) {
1960                 WARN_ON(*level < 0);
1961                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962                 cur = path->nodes[*level];
1963
1964                 if (btrfs_header_level(cur) != *level)
1965                         WARN_ON(1);
1966
1967                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1968                         break;
1969                 if (*level == 0) {
1970                         ret = process_one_leaf(root, cur, wc);
1971                         if (ret < 0)
1972                                 err = ret;
1973                         break;
1974                 }
1975                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977                 blocksize = root->nodesize;
1978
1979                 if (bytenr == nrefs->bytenr[*level - 1]) {
1980                         refs = nrefs->refs[*level - 1];
1981                 } else {
1982                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983                                         *level - 1, 1, &refs, NULL);
1984                         if (ret < 0) {
1985                                 refs = 0;
1986                         } else {
1987                                 nrefs->bytenr[*level - 1] = bytenr;
1988                                 nrefs->refs[*level - 1] = refs;
1989                         }
1990                 }
1991
1992                 if (refs > 1) {
1993                         ret = enter_shared_node(root, bytenr, refs,
1994                                                 wc, *level - 1);
1995                         if (ret > 0) {
1996                                 path->slots[*level]++;
1997                                 continue;
1998                         }
1999                 }
2000
2001                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003                         free_extent_buffer(next);
2004                         reada_walk_down(root, cur, path->slots[*level]);
2005                         next = read_tree_block(root, bytenr, blocksize,
2006                                                ptr_gen);
2007                         if (!extent_buffer_uptodate(next)) {
2008                                 struct btrfs_key node_key;
2009
2010                                 btrfs_node_key_to_cpu(path->nodes[*level],
2011                                                       &node_key,
2012                                                       path->slots[*level]);
2013                                 btrfs_add_corrupt_extent_record(root->fs_info,
2014                                                 &node_key,
2015                                                 path->nodes[*level]->start,
2016                                                 root->nodesize, *level);
2017                                 err = -EIO;
2018                                 goto out;
2019                         }
2020                 }
2021
2022                 ret = check_child_node(root, cur, path->slots[*level], next);
2023                 if (ret) {
2024                         err = ret;
2025                         goto out;
2026                 }
2027
2028                 if (btrfs_is_leaf(next))
2029                         status = btrfs_check_leaf(root, NULL, next);
2030                 else
2031                         status = btrfs_check_node(root, NULL, next);
2032                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033                         free_extent_buffer(next);
2034                         err = -EIO;
2035                         goto out;
2036                 }
2037
2038                 *level = *level - 1;
2039                 free_extent_buffer(path->nodes[*level]);
2040                 path->nodes[*level] = next;
2041                 path->slots[*level] = 0;
2042         }
2043 out:
2044         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2045         return err;
2046 }
2047
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049                         struct walk_control *wc, int *level)
2050 {
2051         int i;
2052         struct extent_buffer *leaf;
2053
2054         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055                 leaf = path->nodes[i];
2056                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2057                         path->slots[i]++;
2058                         *level = i;
2059                         return 0;
2060                 } else {
2061                         free_extent_buffer(path->nodes[*level]);
2062                         path->nodes[*level] = NULL;
2063                         BUG_ON(*level > wc->active_node);
2064                         if (*level == wc->active_node)
2065                                 leave_shared_node(root, wc, *level);
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 static int check_root_dir(struct inode_record *rec)
2073 {
2074         struct inode_backref *backref;
2075         int ret = -1;
2076
2077         if (!rec->found_inode_item || rec->errors)
2078                 goto out;
2079         if (rec->nlink != 1 || rec->found_link != 0)
2080                 goto out;
2081         if (list_empty(&rec->backrefs))
2082                 goto out;
2083         backref = to_inode_backref(rec->backrefs.next);
2084         if (!backref->found_inode_ref)
2085                 goto out;
2086         if (backref->index != 0 || backref->namelen != 2 ||
2087             memcmp(backref->name, "..", 2))
2088                 goto out;
2089         if (backref->found_dir_index || backref->found_dir_item)
2090                 goto out;
2091         ret = 0;
2092 out:
2093         return ret;
2094 }
2095
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097                               struct btrfs_root *root, struct btrfs_path *path,
2098                               struct inode_record *rec)
2099 {
2100         struct btrfs_inode_item *ei;
2101         struct btrfs_key key;
2102         int ret;
2103
2104         key.objectid = rec->ino;
2105         key.type = BTRFS_INODE_ITEM_KEY;
2106         key.offset = (u64)-1;
2107
2108         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2109         if (ret < 0)
2110                 goto out;
2111         if (ret) {
2112                 if (!path->slots[0]) {
2113                         ret = -ENOENT;
2114                         goto out;
2115                 }
2116                 path->slots[0]--;
2117                 ret = 0;
2118         }
2119         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120         if (key.objectid != rec->ino) {
2121                 ret = -ENOENT;
2122                 goto out;
2123         }
2124
2125         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126                             struct btrfs_inode_item);
2127         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128         btrfs_mark_buffer_dirty(path->nodes[0]);
2129         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131                root->root_key.objectid);
2132 out:
2133         btrfs_release_path(path);
2134         return ret;
2135 }
2136
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138                                     struct btrfs_root *root,
2139                                     struct btrfs_path *path,
2140                                     struct inode_record *rec)
2141 {
2142         int ret;
2143
2144         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145         btrfs_release_path(path);
2146         if (!ret)
2147                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2148         return ret;
2149 }
2150
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152                                struct btrfs_root *root,
2153                                struct btrfs_path *path,
2154                                struct inode_record *rec)
2155 {
2156         struct btrfs_inode_item *ei;
2157         struct btrfs_key key;
2158         int ret = 0;
2159
2160         key.objectid = rec->ino;
2161         key.type = BTRFS_INODE_ITEM_KEY;
2162         key.offset = 0;
2163
2164         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2165         if (ret) {
2166                 if (ret > 0)
2167                         ret = -ENOENT;
2168                 goto out;
2169         }
2170
2171         /* Since ret == 0, no need to check anything */
2172         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173                             struct btrfs_inode_item);
2174         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175         btrfs_mark_buffer_dirty(path->nodes[0]);
2176         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177         printf("reset nbytes for ino %llu root %llu\n",
2178                rec->ino, root->root_key.objectid);
2179 out:
2180         btrfs_release_path(path);
2181         return ret;
2182 }
2183
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185                                  struct cache_tree *inode_cache,
2186                                  struct inode_record *rec,
2187                                  struct inode_backref *backref)
2188 {
2189         struct btrfs_path path;
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_dir_item *dir_item;
2192         struct extent_buffer *leaf;
2193         struct btrfs_key key;
2194         struct btrfs_disk_key disk_key;
2195         struct inode_record *dir_rec;
2196         unsigned long name_ptr;
2197         u32 data_size = sizeof(*dir_item) + backref->namelen;
2198         int ret;
2199
2200         trans = btrfs_start_transaction(root, 1);
2201         if (IS_ERR(trans))
2202                 return PTR_ERR(trans);
2203
2204         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205                 (unsigned long long)rec->ino);
2206
2207         btrfs_init_path(&path);
2208         key.objectid = backref->dir;
2209         key.type = BTRFS_DIR_INDEX_KEY;
2210         key.offset = backref->index;
2211         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2212         BUG_ON(ret);
2213
2214         leaf = path.nodes[0];
2215         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2216
2217         disk_key.objectid = cpu_to_le64(rec->ino);
2218         disk_key.type = BTRFS_INODE_ITEM_KEY;
2219         disk_key.offset = 0;
2220
2221         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223         btrfs_set_dir_data_len(leaf, dir_item, 0);
2224         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225         name_ptr = (unsigned long)(dir_item + 1);
2226         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227         btrfs_mark_buffer_dirty(leaf);
2228         btrfs_release_path(&path);
2229         btrfs_commit_transaction(trans, root);
2230
2231         backref->found_dir_index = 1;
2232         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233         BUG_ON(IS_ERR(dir_rec));
2234         if (!dir_rec)
2235                 return 0;
2236         dir_rec->found_size += backref->namelen;
2237         if (dir_rec->found_size == dir_rec->isize &&
2238             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240         if (dir_rec->found_size != dir_rec->isize)
2241                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2242
2243         return 0;
2244 }
2245
2246 static int delete_dir_index(struct btrfs_root *root,
2247                             struct cache_tree *inode_cache,
2248                             struct inode_record *rec,
2249                             struct inode_backref *backref)
2250 {
2251         struct btrfs_trans_handle *trans;
2252         struct btrfs_dir_item *di;
2253         struct btrfs_path path;
2254         int ret = 0;
2255
2256         trans = btrfs_start_transaction(root, 1);
2257         if (IS_ERR(trans))
2258                 return PTR_ERR(trans);
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         btrfs_init_path(&path);
2266         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267                                     backref->name, backref->namelen,
2268                                     backref->index, -1);
2269         if (IS_ERR(di)) {
2270                 ret = PTR_ERR(di);
2271                 btrfs_release_path(&path);
2272                 btrfs_commit_transaction(trans, root);
2273                 if (ret == -ENOENT)
2274                         return 0;
2275                 return ret;
2276         }
2277
2278         if (!di)
2279                 ret = btrfs_del_item(trans, root, &path);
2280         else
2281                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2282         BUG_ON(ret);
2283         btrfs_release_path(&path);
2284         btrfs_commit_transaction(trans, root);
2285         return ret;
2286 }
2287
2288 static int create_inode_item(struct btrfs_root *root,
2289                              struct inode_record *rec,
2290                              struct inode_backref *backref, int root_dir)
2291 {
2292         struct btrfs_trans_handle *trans;
2293         struct btrfs_inode_item inode_item;
2294         time_t now = time(NULL);
2295         int ret;
2296
2297         trans = btrfs_start_transaction(root, 1);
2298         if (IS_ERR(trans)) {
2299                 ret = PTR_ERR(trans);
2300                 return ret;
2301         }
2302
2303         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304                 "be incomplete, please check permissions and content after "
2305                 "the fsck completes.\n", (unsigned long long)root->objectid,
2306                 (unsigned long long)rec->ino);
2307
2308         memset(&inode_item, 0, sizeof(inode_item));
2309         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2310         if (root_dir)
2311                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2312         else
2313                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315         if (rec->found_dir_item) {
2316                 if (rec->found_file_extent)
2317                         fprintf(stderr, "root %llu inode %llu has both a dir "
2318                                 "item and extents, unsure if it is a dir or a "
2319                                 "regular file so setting it as a directory\n",
2320                                 (unsigned long long)root->objectid,
2321                                 (unsigned long long)rec->ino);
2322                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324         } else if (!rec->found_dir_item) {
2325                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2327         }
2328         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2336
2337         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2338         BUG_ON(ret);
2339         btrfs_commit_transaction(trans, root);
2340         return 0;
2341 }
2342
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344                                  struct inode_record *rec,
2345                                  struct cache_tree *inode_cache,
2346                                  int delete)
2347 {
2348         struct inode_backref *tmp, *backref;
2349         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2350         int ret = 0;
2351         int repaired = 0;
2352
2353         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354                 if (!delete && rec->ino == root_dirid) {
2355                         if (!rec->found_inode_item) {
2356                                 ret = create_inode_item(root, rec, backref, 1);
2357                                 if (ret)
2358                                         break;
2359                                 repaired++;
2360                         }
2361                 }
2362
2363                 /* Index 0 for root dir's are special, don't mess with it */
2364                 if (rec->ino == root_dirid && backref->index == 0)
2365                         continue;
2366
2367                 if (delete &&
2368                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2369                      (backref->found_dir_index && backref->found_inode_ref &&
2370                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371                         ret = delete_dir_index(root, inode_cache, rec, backref);
2372                         if (ret)
2373                                 break;
2374                         repaired++;
2375                         list_del(&backref->list);
2376                         free(backref);
2377                 }
2378
2379                 if (!delete && !backref->found_dir_index &&
2380                     backref->found_dir_item && backref->found_inode_ref) {
2381                         ret = add_missing_dir_index(root, inode_cache, rec,
2382                                                     backref);
2383                         if (ret)
2384                                 break;
2385                         repaired++;
2386                         if (backref->found_dir_item &&
2387                             backref->found_dir_index &&
2388                             backref->found_dir_index) {
2389                                 if (!backref->errors &&
2390                                     backref->found_inode_ref) {
2391                                         list_del(&backref->list);
2392                                         free(backref);
2393                                 }
2394                         }
2395                 }
2396
2397                 if (!delete && (!backref->found_dir_index &&
2398                                 !backref->found_dir_item &&
2399                                 backref->found_inode_ref)) {
2400                         struct btrfs_trans_handle *trans;
2401                         struct btrfs_key location;
2402
2403                         ret = check_dir_conflict(root, backref->name,
2404                                                  backref->namelen,
2405                                                  backref->dir,
2406                                                  backref->index);
2407                         if (ret) {
2408                                 /*
2409                                  * let nlink fixing routine to handle it,
2410                                  * which can do it better.
2411                                  */
2412                                 ret = 0;
2413                                 break;
2414                         }
2415                         location.objectid = rec->ino;
2416                         location.type = BTRFS_INODE_ITEM_KEY;
2417                         location.offset = 0;
2418
2419                         trans = btrfs_start_transaction(root, 1);
2420                         if (IS_ERR(trans)) {
2421                                 ret = PTR_ERR(trans);
2422                                 break;
2423                         }
2424                         fprintf(stderr, "adding missing dir index/item pair "
2425                                 "for inode %llu\n",
2426                                 (unsigned long long)rec->ino);
2427                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2428                                                     backref->namelen,
2429                                                     backref->dir, &location,
2430                                                     imode_to_type(rec->imode),
2431                                                     backref->index);
2432                         BUG_ON(ret);
2433                         btrfs_commit_transaction(trans, root);
2434                         repaired++;
2435                 }
2436
2437                 if (!delete && (backref->found_inode_ref &&
2438                                 backref->found_dir_index &&
2439                                 backref->found_dir_item &&
2440                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441                                 !rec->found_inode_item)) {
2442                         ret = create_inode_item(root, rec, backref, 0);
2443                         if (ret)
2444                                 break;
2445                         repaired++;
2446                 }
2447
2448         }
2449         return ret ? ret : repaired;
2450 }
2451
2452 /*
2453  * To determine the file type for nlink/inode_item repair
2454  *
2455  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456  * Return -ENOENT if file type is not found.
2457  */
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2459 {
2460         struct inode_backref *backref;
2461
2462         /* For inode item recovered case */
2463         if (rec->found_inode_item) {
2464                 *type = imode_to_type(rec->imode);
2465                 return 0;
2466         }
2467
2468         list_for_each_entry(backref, &rec->backrefs, list) {
2469                 if (backref->found_dir_index || backref->found_dir_item) {
2470                         *type = backref->filetype;
2471                         return 0;
2472                 }
2473         }
2474         return -ENOENT;
2475 }
2476
2477 /*
2478  * To determine the file name for nlink repair
2479  *
2480  * Return 0 if file name is found, set name and namelen.
2481  * Return -ENOENT if file name is not found.
2482  */
2483 static int find_file_name(struct inode_record *rec,
2484                           char *name, int *namelen)
2485 {
2486         struct inode_backref *backref;
2487
2488         list_for_each_entry(backref, &rec->backrefs, list) {
2489                 if (backref->found_dir_index || backref->found_dir_item ||
2490                     backref->found_inode_ref) {
2491                         memcpy(name, backref->name, backref->namelen);
2492                         *namelen = backref->namelen;
2493                         return 0;
2494                 }
2495         }
2496         return -ENOENT;
2497 }
2498
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501                        struct btrfs_root *root,
2502                        struct btrfs_path *path,
2503                        struct inode_record *rec)
2504 {
2505         struct inode_backref *backref;
2506         struct inode_backref *tmp;
2507         struct btrfs_key key;
2508         struct btrfs_inode_item *inode_item;
2509         int ret = 0;
2510
2511         /* We don't believe this either, reset it and iterate backref */
2512         rec->found_link = 0;
2513
2514         /* Remove all backref including the valid ones */
2515         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517                                    backref->index, backref->name,
2518                                    backref->namelen, 0);
2519                 if (ret < 0)
2520                         goto out;
2521
2522                 /* remove invalid backref, so it won't be added back */
2523                 if (!(backref->found_dir_index &&
2524                       backref->found_dir_item &&
2525                       backref->found_inode_ref)) {
2526                         list_del(&backref->list);
2527                         free(backref);
2528                 } else {
2529                         rec->found_link++;
2530                 }
2531         }
2532
2533         /* Set nlink to 0 */
2534         key.objectid = rec->ino;
2535         key.type = BTRFS_INODE_ITEM_KEY;
2536         key.offset = 0;
2537         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2538         if (ret < 0)
2539                 goto out;
2540         if (ret > 0) {
2541                 ret = -ENOENT;
2542                 goto out;
2543         }
2544         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545                                     struct btrfs_inode_item);
2546         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547         btrfs_mark_buffer_dirty(path->nodes[0]);
2548         btrfs_release_path(path);
2549
2550         /*
2551          * Add back valid inode_ref/dir_item/dir_index,
2552          * add_link() will handle the nlink inc, so new nlink must be correct
2553          */
2554         list_for_each_entry(backref, &rec->backrefs, list) {
2555                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556                                      backref->name, backref->namelen,
2557                                      backref->filetype, &backref->index, 1);
2558                 if (ret < 0)
2559                         goto out;
2560         }
2561 out:
2562         btrfs_release_path(path);
2563         return ret;
2564 }
2565
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567                                struct btrfs_root *root,
2568                                struct btrfs_path *path,
2569                                struct inode_record *rec)
2570 {
2571         char *dir_name = "lost+found";
2572         char namebuf[BTRFS_NAME_LEN] = {0};
2573         u64 lost_found_ino;
2574         u32 mode = 0700;
2575         u8 type = 0;
2576         int namelen = 0;
2577         int name_recovered = 0;
2578         int type_recovered = 0;
2579         int ret = 0;
2580
2581         /*
2582          * Get file name and type first before these invalid inode ref
2583          * are deleted by remove_all_invalid_backref()
2584          */
2585         name_recovered = !find_file_name(rec, namebuf, &namelen);
2586         type_recovered = !find_file_type(rec, &type);
2587
2588         if (!name_recovered) {
2589                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590                        rec->ino, rec->ino);
2591                 namelen = count_digits(rec->ino);
2592                 sprintf(namebuf, "%llu", rec->ino);
2593                 name_recovered = 1;
2594         }
2595         if (!type_recovered) {
2596                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2597                        rec->ino);
2598                 type = BTRFS_FT_REG_FILE;
2599                 type_recovered = 1;
2600         }
2601
2602         ret = reset_nlink(trans, root, path, rec);
2603         if (ret < 0) {
2604                 fprintf(stderr,
2605                         "Failed to reset nlink for inode %llu: %s\n",
2606                         rec->ino, strerror(-ret));
2607                 goto out;
2608         }
2609
2610         if (rec->found_link == 0) {
2611                 lost_found_ino = root->highest_inode;
2612                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2613                         ret = -EOVERFLOW;
2614                         goto out;
2615                 }
2616                 lost_found_ino++;
2617                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2619                                   mode);
2620                 if (ret < 0) {
2621                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622                                 dir_name, strerror(-ret));
2623                         goto out;
2624                 }
2625                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626                                      namebuf, namelen, type, NULL, 1);
2627                 /*
2628                  * Add ".INO" suffix several times to handle case where
2629                  * "FILENAME.INO" is already taken by another file.
2630                  */
2631                 while (ret == -EEXIST) {
2632                         /*
2633                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2634                          */
2635                         if (namelen + count_digits(rec->ino) + 1 >
2636                             BTRFS_NAME_LEN) {
2637                                 ret = -EFBIG;
2638                                 goto out;
2639                         }
2640                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2641                                  ".%llu", rec->ino);
2642                         namelen += count_digits(rec->ino) + 1;
2643                         ret = btrfs_add_link(trans, root, rec->ino,
2644                                              lost_found_ino, namebuf,
2645                                              namelen, type, NULL, 1);
2646                 }
2647                 if (ret < 0) {
2648                         fprintf(stderr,
2649                                 "Failed to link the inode %llu to %s dir: %s\n",
2650                                 rec->ino, dir_name, strerror(-ret));
2651                         goto out;
2652                 }
2653                 /*
2654                  * Just increase the found_link, don't actually add the
2655                  * backref. This will make things easier and this inode
2656                  * record will be freed after the repair is done.
2657                  * So fsck will not report problem about this inode.
2658                  */
2659                 rec->found_link++;
2660                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661                        namelen, namebuf, dir_name);
2662         }
2663         printf("Fixed the nlink of inode %llu\n", rec->ino);
2664 out:
2665         /*
2666          * Clear the flag anyway, or we will loop forever for the same inode
2667          * as it will not be removed from the bad inode list and the dead loop
2668          * happens.
2669          */
2670         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671         btrfs_release_path(path);
2672         return ret;
2673 }
2674
2675 /*
2676  * Check if there is any normal(reg or prealloc) file extent for given
2677  * ino.
2678  * This is used to determine the file type when neither its dir_index/item or
2679  * inode_item exists.
2680  *
2681  * This will *NOT* report error, if any error happens, just consider it does
2682  * not have any normal file extent.
2683  */
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2685 {
2686         struct btrfs_path path;
2687         struct btrfs_key key;
2688         struct btrfs_key found_key;
2689         struct btrfs_file_extent_item *fi;
2690         u8 type;
2691         int ret = 0;
2692
2693         btrfs_init_path(&path);
2694         key.objectid = ino;
2695         key.type = BTRFS_EXTENT_DATA_KEY;
2696         key.offset = 0;
2697
2698         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2699         if (ret < 0) {
2700                 ret = 0;
2701                 goto out;
2702         }
2703         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2704                 ret = btrfs_next_leaf(root, &path);
2705                 if (ret) {
2706                         ret = 0;
2707                         goto out;
2708                 }
2709         }
2710         while (1) {
2711                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2712                                       path.slots[0]);
2713                 if (found_key.objectid != ino ||
2714                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2715                         break;
2716                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2717                                     struct btrfs_file_extent_item);
2718                 type = btrfs_file_extent_type(path.nodes[0], fi);
2719                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2720                         ret = 1;
2721                         goto out;
2722                 }
2723         }
2724 out:
2725         btrfs_release_path(&path);
2726         return ret;
2727 }
2728
2729 static u32 btrfs_type_to_imode(u8 type)
2730 {
2731         static u32 imode_by_btrfs_type[] = {
2732                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2733                 [BTRFS_FT_DIR]          = S_IFDIR,
2734                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2735                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2736                 [BTRFS_FT_FIFO]         = S_IFIFO,
2737                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2738                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2739         };
2740
2741         return imode_by_btrfs_type[(type)];
2742 }
2743
2744 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2745                                 struct btrfs_root *root,
2746                                 struct btrfs_path *path,
2747                                 struct inode_record *rec)
2748 {
2749         u8 filetype;
2750         u32 mode = 0700;
2751         int type_recovered = 0;
2752         int ret = 0;
2753
2754         printf("Trying to rebuild inode:%llu\n", rec->ino);
2755
2756         type_recovered = !find_file_type(rec, &filetype);
2757
2758         /*
2759          * Try to determine inode type if type not found.
2760          *
2761          * For found regular file extent, it must be FILE.
2762          * For found dir_item/index, it must be DIR.
2763          *
2764          * For undetermined one, use FILE as fallback.
2765          *
2766          * TODO:
2767          * 1. If found backref(inode_index/item is already handled) to it,
2768          *    it must be DIR.
2769          *    Need new inode-inode ref structure to allow search for that.
2770          */
2771         if (!type_recovered) {
2772                 if (rec->found_file_extent &&
2773                     find_normal_file_extent(root, rec->ino)) {
2774                         type_recovered = 1;
2775                         filetype = BTRFS_FT_REG_FILE;
2776                 } else if (rec->found_dir_item) {
2777                         type_recovered = 1;
2778                         filetype = BTRFS_FT_DIR;
2779                 } else if (!list_empty(&rec->orphan_extents)) {
2780                         type_recovered = 1;
2781                         filetype = BTRFS_FT_REG_FILE;
2782                 } else{
2783                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2784                                rec->ino);
2785                         type_recovered = 1;
2786                         filetype = BTRFS_FT_REG_FILE;
2787                 }
2788         }
2789
2790         ret = btrfs_new_inode(trans, root, rec->ino,
2791                               mode | btrfs_type_to_imode(filetype));
2792         if (ret < 0)
2793                 goto out;
2794
2795         /*
2796          * Here inode rebuild is done, we only rebuild the inode item,
2797          * don't repair the nlink(like move to lost+found).
2798          * That is the job of nlink repair.
2799          *
2800          * We just fill the record and return
2801          */
2802         rec->found_dir_item = 1;
2803         rec->imode = mode | btrfs_type_to_imode(filetype);
2804         rec->nlink = 0;
2805         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2806         /* Ensure the inode_nlinks repair function will be called */
2807         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2808 out:
2809         return ret;
2810 }
2811
2812 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2813                                       struct btrfs_root *root,
2814                                       struct btrfs_path *path,
2815                                       struct inode_record *rec)
2816 {
2817         struct orphan_data_extent *orphan;
2818         struct orphan_data_extent *tmp;
2819         int ret = 0;
2820
2821         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2822                 /*
2823                  * Check for conflicting file extents
2824                  *
2825                  * Here we don't know whether the extents is compressed or not,
2826                  * so we can only assume it not compressed nor data offset,
2827                  * and use its disk_len as extent length.
2828                  */
2829                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2830                                        orphan->offset, orphan->disk_len, 0);
2831                 btrfs_release_path(path);
2832                 if (ret < 0)
2833                         goto out;
2834                 if (!ret) {
2835                         fprintf(stderr,
2836                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2837                                 orphan->disk_bytenr, orphan->disk_len);
2838                         ret = btrfs_free_extent(trans,
2839                                         root->fs_info->extent_root,
2840                                         orphan->disk_bytenr, orphan->disk_len,
2841                                         0, root->objectid, orphan->objectid,
2842                                         orphan->offset);
2843                         if (ret < 0)
2844                                 goto out;
2845                 }
2846                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2847                                 orphan->offset, orphan->disk_bytenr,
2848                                 orphan->disk_len, orphan->disk_len);
2849                 if (ret < 0)
2850                         goto out;
2851
2852                 /* Update file size info */
2853                 rec->found_size += orphan->disk_len;
2854                 if (rec->found_size == rec->nbytes)
2855                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2856
2857                 /* Update the file extent hole info too */
2858                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2859                                            orphan->disk_len);
2860                 if (ret < 0)
2861                         goto out;
2862                 if (RB_EMPTY_ROOT(&rec->holes))
2863                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2864
2865                 list_del(&orphan->list);
2866                 free(orphan);
2867         }
2868         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2869 out:
2870         return ret;
2871 }
2872
2873 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2874                                         struct btrfs_root *root,
2875                                         struct btrfs_path *path,
2876                                         struct inode_record *rec)
2877 {
2878         struct rb_node *node;
2879         struct file_extent_hole *hole;
2880         int found = 0;
2881         int ret = 0;
2882
2883         node = rb_first(&rec->holes);
2884
2885         while (node) {
2886                 found = 1;
2887                 hole = rb_entry(node, struct file_extent_hole, node);
2888                 ret = btrfs_punch_hole(trans, root, rec->ino,
2889                                        hole->start, hole->len);
2890                 if (ret < 0)
2891                         goto out;
2892                 ret = del_file_extent_hole(&rec->holes, hole->start,
2893                                            hole->len);
2894                 if (ret < 0)
2895                         goto out;
2896                 if (RB_EMPTY_ROOT(&rec->holes))
2897                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898                 node = rb_first(&rec->holes);
2899         }
2900         /* special case for a file losing all its file extent */
2901         if (!found) {
2902                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2903                                        round_up(rec->isize, root->sectorsize));
2904                 if (ret < 0)
2905                         goto out;
2906         }
2907         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2908                rec->ino, root->objectid);
2909 out:
2910         return ret;
2911 }
2912
2913 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2914 {
2915         struct btrfs_trans_handle *trans;
2916         struct btrfs_path path;
2917         int ret = 0;
2918
2919         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2920                              I_ERR_NO_ORPHAN_ITEM |
2921                              I_ERR_LINK_COUNT_WRONG |
2922                              I_ERR_NO_INODE_ITEM |
2923                              I_ERR_FILE_EXTENT_ORPHAN |
2924                              I_ERR_FILE_EXTENT_DISCOUNT|
2925                              I_ERR_FILE_NBYTES_WRONG)))
2926                 return rec->errors;
2927
2928         /*
2929          * For nlink repair, it may create a dir and add link, so
2930          * 2 for parent(256)'s dir_index and dir_item
2931          * 2 for lost+found dir's inode_item and inode_ref
2932          * 1 for the new inode_ref of the file
2933          * 2 for lost+found dir's dir_index and dir_item for the file
2934          */
2935         trans = btrfs_start_transaction(root, 7);
2936         if (IS_ERR(trans))
2937                 return PTR_ERR(trans);
2938
2939         btrfs_init_path(&path);
2940         if (rec->errors & I_ERR_NO_INODE_ITEM)
2941                 ret = repair_inode_no_item(trans, root, &path, rec);
2942         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2943                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2944         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2945                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2946         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2947                 ret = repair_inode_isize(trans, root, &path, rec);
2948         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2949                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2950         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2951                 ret = repair_inode_nlinks(trans, root, &path, rec);
2952         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2953                 ret = repair_inode_nbytes(trans, root, &path, rec);
2954         btrfs_commit_transaction(trans, root);
2955         btrfs_release_path(&path);
2956         return ret;
2957 }
2958
2959 static int check_inode_recs(struct btrfs_root *root,
2960                             struct cache_tree *inode_cache)
2961 {
2962         struct cache_extent *cache;
2963         struct ptr_node *node;
2964         struct inode_record *rec;
2965         struct inode_backref *backref;
2966         int stage = 0;
2967         int ret = 0;
2968         int err = 0;
2969         u64 error = 0;
2970         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2971
2972         if (btrfs_root_refs(&root->root_item) == 0) {
2973                 if (!cache_tree_empty(inode_cache))
2974                         fprintf(stderr, "warning line %d\n", __LINE__);
2975                 return 0;
2976         }
2977
2978         /*
2979          * We need to record the highest inode number for later 'lost+found'
2980          * dir creation.
2981          * We must select an ino not used/referred by any existing inode, or
2982          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2983          * this may cause 'lost+found' dir has wrong nlinks.
2984          */
2985         cache = last_cache_extent(inode_cache);
2986         if (cache) {
2987                 node = container_of(cache, struct ptr_node, cache);
2988                 rec = node->data;
2989                 if (rec->ino > root->highest_inode)
2990                         root->highest_inode = rec->ino;
2991         }
2992
2993         /*
2994          * We need to repair backrefs first because we could change some of the
2995          * errors in the inode recs.
2996          *
2997          * We also need to go through and delete invalid backrefs first and then
2998          * add the correct ones second.  We do this because we may get EEXIST
2999          * when adding back the correct index because we hadn't yet deleted the
3000          * invalid index.
3001          *
3002          * For example, if we were missing a dir index then the directories
3003          * isize would be wrong, so if we fixed the isize to what we thought it
3004          * would be and then fixed the backref we'd still have a invalid fs, so
3005          * we need to add back the dir index and then check to see if the isize
3006          * is still wrong.
3007          */
3008         while (stage < 3) {
3009                 stage++;
3010                 if (stage == 3 && !err)
3011                         break;
3012
3013                 cache = search_cache_extent(inode_cache, 0);
3014                 while (repair && cache) {
3015                         node = container_of(cache, struct ptr_node, cache);
3016                         rec = node->data;
3017                         cache = next_cache_extent(cache);
3018
3019                         /* Need to free everything up and rescan */
3020                         if (stage == 3) {
3021                                 remove_cache_extent(inode_cache, &node->cache);
3022                                 free(node);
3023                                 free_inode_rec(rec);
3024                                 continue;
3025                         }
3026
3027                         if (list_empty(&rec->backrefs))
3028                                 continue;
3029
3030                         ret = repair_inode_backrefs(root, rec, inode_cache,
3031                                                     stage == 1);
3032                         if (ret < 0) {
3033                                 err = ret;
3034                                 stage = 2;
3035                                 break;
3036                         } if (ret > 0) {
3037                                 err = -EAGAIN;
3038                         }
3039                 }
3040         }
3041         if (err)
3042                 return err;
3043
3044         rec = get_inode_rec(inode_cache, root_dirid, 0);
3045         BUG_ON(IS_ERR(rec));
3046         if (rec) {
3047                 ret = check_root_dir(rec);
3048                 if (ret) {
3049                         fprintf(stderr, "root %llu root dir %llu error\n",
3050                                 (unsigned long long)root->root_key.objectid,
3051                                 (unsigned long long)root_dirid);
3052                         print_inode_error(root, rec);
3053                         error++;
3054                 }
3055         } else {
3056                 if (repair) {
3057                         struct btrfs_trans_handle *trans;
3058
3059                         trans = btrfs_start_transaction(root, 1);
3060                         if (IS_ERR(trans)) {
3061                                 err = PTR_ERR(trans);
3062                                 return err;
3063                         }
3064
3065                         fprintf(stderr,
3066                                 "root %llu missing its root dir, recreating\n",
3067                                 (unsigned long long)root->objectid);
3068
3069                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3070                         BUG_ON(ret);
3071
3072                         btrfs_commit_transaction(trans, root);
3073                         return -EAGAIN;
3074                 }
3075
3076                 fprintf(stderr, "root %llu root dir %llu not found\n",
3077                         (unsigned long long)root->root_key.objectid,
3078                         (unsigned long long)root_dirid);
3079         }
3080
3081         while (1) {
3082                 cache = search_cache_extent(inode_cache, 0);
3083                 if (!cache)
3084                         break;
3085                 node = container_of(cache, struct ptr_node, cache);
3086                 rec = node->data;
3087                 remove_cache_extent(inode_cache, &node->cache);
3088                 free(node);
3089                 if (rec->ino == root_dirid ||
3090                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3091                         free_inode_rec(rec);
3092                         continue;
3093                 }
3094
3095                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3096                         ret = check_orphan_item(root, rec->ino);
3097                         if (ret == 0)
3098                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3099                         if (can_free_inode_rec(rec)) {
3100                                 free_inode_rec(rec);
3101                                 continue;
3102                         }
3103                 }
3104
3105                 if (!rec->found_inode_item)
3106                         rec->errors |= I_ERR_NO_INODE_ITEM;
3107                 if (rec->found_link != rec->nlink)
3108                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3109                 if (repair) {
3110                         ret = try_repair_inode(root, rec);
3111                         if (ret == 0 && can_free_inode_rec(rec)) {
3112                                 free_inode_rec(rec);
3113                                 continue;
3114                         }
3115                         ret = 0;
3116                 }
3117
3118                 if (!(repair && ret == 0))
3119                         error++;
3120                 print_inode_error(root, rec);
3121                 list_for_each_entry(backref, &rec->backrefs, list) {
3122                         if (!backref->found_dir_item)
3123                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3124                         if (!backref->found_dir_index)
3125                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3126                         if (!backref->found_inode_ref)
3127                                 backref->errors |= REF_ERR_NO_INODE_REF;
3128                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3129                                 " namelen %u name %s filetype %d errors %x",
3130                                 (unsigned long long)backref->dir,
3131                                 (unsigned long long)backref->index,
3132                                 backref->namelen, backref->name,
3133                                 backref->filetype, backref->errors);
3134                         print_ref_error(backref->errors);
3135                 }
3136                 free_inode_rec(rec);
3137         }
3138         return (error > 0) ? -1 : 0;
3139 }
3140
3141 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3142                                         u64 objectid)
3143 {
3144         struct cache_extent *cache;
3145         struct root_record *rec = NULL;
3146         int ret;
3147
3148         cache = lookup_cache_extent(root_cache, objectid, 1);
3149         if (cache) {
3150                 rec = container_of(cache, struct root_record, cache);
3151         } else {
3152                 rec = calloc(1, sizeof(*rec));
3153                 if (!rec)
3154                         return ERR_PTR(-ENOMEM);
3155                 rec->objectid = objectid;
3156                 INIT_LIST_HEAD(&rec->backrefs);
3157                 rec->cache.start = objectid;
3158                 rec->cache.size = 1;
3159
3160                 ret = insert_cache_extent(root_cache, &rec->cache);
3161                 if (ret)
3162                         return ERR_PTR(-EEXIST);
3163         }
3164         return rec;
3165 }
3166
3167 static struct root_backref *get_root_backref(struct root_record *rec,
3168                                              u64 ref_root, u64 dir, u64 index,
3169                                              const char *name, int namelen)
3170 {
3171         struct root_backref *backref;
3172
3173         list_for_each_entry(backref, &rec->backrefs, list) {
3174                 if (backref->ref_root != ref_root || backref->dir != dir ||
3175                     backref->namelen != namelen)
3176                         continue;
3177                 if (memcmp(name, backref->name, namelen))
3178                         continue;
3179                 return backref;
3180         }
3181
3182         backref = calloc(1, sizeof(*backref) + namelen + 1);
3183         if (!backref)
3184                 return NULL;
3185         backref->ref_root = ref_root;
3186         backref->dir = dir;
3187         backref->index = index;
3188         backref->namelen = namelen;
3189         memcpy(backref->name, name, namelen);
3190         backref->name[namelen] = '\0';
3191         list_add_tail(&backref->list, &rec->backrefs);
3192         return backref;
3193 }
3194
3195 static void free_root_record(struct cache_extent *cache)
3196 {
3197         struct root_record *rec;
3198         struct root_backref *backref;
3199
3200         rec = container_of(cache, struct root_record, cache);
3201         while (!list_empty(&rec->backrefs)) {
3202                 backref = to_root_backref(rec->backrefs.next);
3203                 list_del(&backref->list);
3204                 free(backref);
3205         }
3206
3207         free(rec);
3208 }
3209
3210 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3211
3212 static int add_root_backref(struct cache_tree *root_cache,
3213                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3214                             const char *name, int namelen,
3215                             int item_type, int errors)
3216 {
3217         struct root_record *rec;
3218         struct root_backref *backref;
3219
3220         rec = get_root_rec(root_cache, root_id);
3221         BUG_ON(IS_ERR(rec));
3222         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3223         BUG_ON(!backref);
3224
3225         backref->errors |= errors;
3226
3227         if (item_type != BTRFS_DIR_ITEM_KEY) {
3228                 if (backref->found_dir_index || backref->found_back_ref ||
3229                     backref->found_forward_ref) {
3230                         if (backref->index != index)
3231                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3232                 } else {
3233                         backref->index = index;
3234                 }
3235         }
3236
3237         if (item_type == BTRFS_DIR_ITEM_KEY) {
3238                 if (backref->found_forward_ref)
3239                         rec->found_ref++;
3240                 backref->found_dir_item = 1;
3241         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3242                 backref->found_dir_index = 1;
3243         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3244                 if (backref->found_forward_ref)
3245                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3246                 else if (backref->found_dir_item)
3247                         rec->found_ref++;
3248                 backref->found_forward_ref = 1;
3249         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3250                 if (backref->found_back_ref)
3251                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3252                 backref->found_back_ref = 1;
3253         } else {
3254                 BUG_ON(1);
3255         }
3256
3257         if (backref->found_forward_ref && backref->found_dir_item)
3258                 backref->reachable = 1;
3259         return 0;
3260 }
3261
3262 static int merge_root_recs(struct btrfs_root *root,
3263                            struct cache_tree *src_cache,
3264                            struct cache_tree *dst_cache)
3265 {
3266         struct cache_extent *cache;
3267         struct ptr_node *node;
3268         struct inode_record *rec;
3269         struct inode_backref *backref;
3270         int ret = 0;
3271
3272         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3273                 free_inode_recs_tree(src_cache);
3274                 return 0;
3275         }
3276
3277         while (1) {
3278                 cache = search_cache_extent(src_cache, 0);
3279                 if (!cache)
3280                         break;
3281                 node = container_of(cache, struct ptr_node, cache);
3282                 rec = node->data;
3283                 remove_cache_extent(src_cache, &node->cache);
3284                 free(node);
3285
3286                 ret = is_child_root(root, root->objectid, rec->ino);
3287                 if (ret < 0)
3288                         break;
3289                 else if (ret == 0)
3290                         goto skip;
3291
3292                 list_for_each_entry(backref, &rec->backrefs, list) {
3293                         BUG_ON(backref->found_inode_ref);
3294                         if (backref->found_dir_item)
3295                                 add_root_backref(dst_cache, rec->ino,
3296                                         root->root_key.objectid, backref->dir,
3297                                         backref->index, backref->name,
3298                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3299                                         backref->errors);
3300                         if (backref->found_dir_index)
3301                                 add_root_backref(dst_cache, rec->ino,
3302                                         root->root_key.objectid, backref->dir,
3303                                         backref->index, backref->name,
3304                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3305                                         backref->errors);
3306                 }
3307 skip:
3308                 free_inode_rec(rec);
3309         }
3310         if (ret < 0)
3311                 return ret;
3312         return 0;
3313 }
3314
3315 static int check_root_refs(struct btrfs_root *root,
3316                            struct cache_tree *root_cache)
3317 {
3318         struct root_record *rec;
3319         struct root_record *ref_root;
3320         struct root_backref *backref;
3321         struct cache_extent *cache;
3322         int loop = 1;
3323         int ret;
3324         int error;
3325         int errors = 0;
3326
3327         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3328         BUG_ON(IS_ERR(rec));
3329         rec->found_ref = 1;
3330
3331         /* fixme: this can not detect circular references */
3332         while (loop) {
3333                 loop = 0;
3334                 cache = search_cache_extent(root_cache, 0);
3335                 while (1) {
3336                         if (!cache)
3337                                 break;
3338                         rec = container_of(cache, struct root_record, cache);
3339                         cache = next_cache_extent(cache);
3340
3341                         if (rec->found_ref == 0)
3342                                 continue;
3343
3344                         list_for_each_entry(backref, &rec->backrefs, list) {
3345                                 if (!backref->reachable)
3346                                         continue;
3347
3348                                 ref_root = get_root_rec(root_cache,
3349                                                         backref->ref_root);
3350                                 BUG_ON(IS_ERR(ref_root));
3351                                 if (ref_root->found_ref > 0)
3352                                         continue;
3353
3354                                 backref->reachable = 0;
3355                                 rec->found_ref--;
3356                                 if (rec->found_ref == 0)
3357                                         loop = 1;
3358                         }
3359                 }
3360         }
3361
3362         cache = search_cache_extent(root_cache, 0);
3363         while (1) {
3364                 if (!cache)
3365                         break;
3366                 rec = container_of(cache, struct root_record, cache);
3367                 cache = next_cache_extent(cache);
3368
3369                 if (rec->found_ref == 0 &&
3370                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3371                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3372                         ret = check_orphan_item(root->fs_info->tree_root,
3373                                                 rec->objectid);
3374                         if (ret == 0)
3375                                 continue;
3376
3377                         /*
3378                          * If we don't have a root item then we likely just have
3379                          * a dir item in a snapshot for this root but no actual
3380                          * ref key or anything so it's meaningless.
3381                          */
3382                         if (!rec->found_root_item)
3383                                 continue;
3384                         errors++;
3385                         fprintf(stderr, "fs tree %llu not referenced\n",
3386                                 (unsigned long long)rec->objectid);
3387                 }
3388
3389                 error = 0;
3390                 if (rec->found_ref > 0 && !rec->found_root_item)
3391                         error = 1;
3392                 list_for_each_entry(backref, &rec->backrefs, list) {
3393                         if (!backref->found_dir_item)
3394                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3395                         if (!backref->found_dir_index)
3396                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3397                         if (!backref->found_back_ref)
3398                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3399                         if (!backref->found_forward_ref)
3400                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3401                         if (backref->reachable && backref->errors)
3402                                 error = 1;
3403                 }
3404                 if (!error)
3405                         continue;
3406
3407                 errors++;
3408                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3409                         (unsigned long long)rec->objectid, rec->found_ref,
3410                          rec->found_root_item ? "" : "not found");
3411
3412                 list_for_each_entry(backref, &rec->backrefs, list) {
3413                         if (!backref->reachable)
3414                                 continue;
3415                         if (!backref->errors && rec->found_root_item)
3416                                 continue;
3417                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3418                                 " index %llu namelen %u name %s errors %x\n",
3419                                 (unsigned long long)backref->ref_root,
3420                                 (unsigned long long)backref->dir,
3421                                 (unsigned long long)backref->index,
3422                                 backref->namelen, backref->name,
3423                                 backref->errors);
3424                         print_ref_error(backref->errors);
3425                 }
3426         }
3427         return errors > 0 ? 1 : 0;
3428 }
3429
3430 static int process_root_ref(struct extent_buffer *eb, int slot,
3431                             struct btrfs_key *key,
3432                             struct cache_tree *root_cache)
3433 {
3434         u64 dirid;
3435         u64 index;
3436         u32 len;
3437         u32 name_len;
3438         struct btrfs_root_ref *ref;
3439         char namebuf[BTRFS_NAME_LEN];
3440         int error;
3441
3442         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3443
3444         dirid = btrfs_root_ref_dirid(eb, ref);
3445         index = btrfs_root_ref_sequence(eb, ref);
3446         name_len = btrfs_root_ref_name_len(eb, ref);
3447
3448         if (name_len <= BTRFS_NAME_LEN) {
3449                 len = name_len;
3450                 error = 0;
3451         } else {
3452                 len = BTRFS_NAME_LEN;
3453                 error = REF_ERR_NAME_TOO_LONG;
3454         }
3455         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3456
3457         if (key->type == BTRFS_ROOT_REF_KEY) {
3458                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3459                                  index, namebuf, len, key->type, error);
3460         } else {
3461                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3462                                  index, namebuf, len, key->type, error);
3463         }
3464         return 0;
3465 }
3466
3467 static void free_corrupt_block(struct cache_extent *cache)
3468 {
3469         struct btrfs_corrupt_block *corrupt;
3470
3471         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3472         free(corrupt);
3473 }
3474
3475 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3476
3477 /*
3478  * Repair the btree of the given root.
3479  *
3480  * The fix is to remove the node key in corrupt_blocks cache_tree.
3481  * and rebalance the tree.
3482  * After the fix, the btree should be writeable.
3483  */
3484 static int repair_btree(struct btrfs_root *root,
3485                         struct cache_tree *corrupt_blocks)
3486 {
3487         struct btrfs_trans_handle *trans;
3488         struct btrfs_path *path;
3489         struct btrfs_corrupt_block *corrupt;
3490         struct cache_extent *cache;
3491         struct btrfs_key key;
3492         u64 offset;
3493         int level;
3494         int ret = 0;
3495
3496         if (cache_tree_empty(corrupt_blocks))
3497                 return 0;
3498
3499         path = btrfs_alloc_path();
3500         if (!path)
3501                 return -ENOMEM;
3502
3503         trans = btrfs_start_transaction(root, 1);
3504         if (IS_ERR(trans)) {
3505                 ret = PTR_ERR(trans);
3506                 fprintf(stderr, "Error starting transaction: %s\n",
3507                         strerror(-ret));
3508                 goto out_free_path;
3509         }
3510         cache = first_cache_extent(corrupt_blocks);
3511         while (cache) {
3512                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3513                                        cache);
3514                 level = corrupt->level;
3515                 path->lowest_level = level;
3516                 key.objectid = corrupt->key.objectid;
3517                 key.type = corrupt->key.type;
3518                 key.offset = corrupt->key.offset;
3519
3520                 /*
3521                  * Here we don't want to do any tree balance, since it may
3522                  * cause a balance with corrupted brother leaf/node,
3523                  * so ins_len set to 0 here.
3524                  * Balance will be done after all corrupt node/leaf is deleted.
3525                  */
3526                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3527                 if (ret < 0)
3528                         goto out;
3529                 offset = btrfs_node_blockptr(path->nodes[level],
3530                                              path->slots[level]);
3531
3532                 /* Remove the ptr */
3533                 ret = btrfs_del_ptr(trans, root, path, level,
3534                                     path->slots[level]);
3535                 if (ret < 0)
3536                         goto out;
3537                 /*
3538                  * Remove the corresponding extent
3539                  * return value is not concerned.
3540                  */
3541                 btrfs_release_path(path);
3542                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3543                                         0, root->root_key.objectid,
3544                                         level - 1, 0);
3545                 cache = next_cache_extent(cache);
3546         }
3547
3548         /* Balance the btree using btrfs_search_slot() */
3549         cache = first_cache_extent(corrupt_blocks);
3550         while (cache) {
3551                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3552                                        cache);
3553                 memcpy(&key, &corrupt->key, sizeof(key));
3554                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3555                 if (ret < 0)
3556                         goto out;
3557                 /* return will always >0 since it won't find the item */
3558                 ret = 0;
3559                 btrfs_release_path(path);
3560                 cache = next_cache_extent(cache);
3561         }
3562 out:
3563         btrfs_commit_transaction(trans, root);
3564 out_free_path:
3565         btrfs_free_path(path);
3566         return ret;
3567 }
3568
3569 static int check_fs_root(struct btrfs_root *root,
3570                          struct cache_tree *root_cache,
3571                          struct walk_control *wc)
3572 {
3573         int ret = 0;
3574         int err = 0;
3575         int wret;
3576         int level;
3577         struct btrfs_path path;
3578         struct shared_node root_node;
3579         struct root_record *rec;
3580         struct btrfs_root_item *root_item = &root->root_item;
3581         struct cache_tree corrupt_blocks;
3582         struct orphan_data_extent *orphan;
3583         struct orphan_data_extent *tmp;
3584         enum btrfs_tree_block_status status;
3585         struct node_refs nrefs;
3586
3587         /*
3588          * Reuse the corrupt_block cache tree to record corrupted tree block
3589          *
3590          * Unlike the usage in extent tree check, here we do it in a per
3591          * fs/subvol tree base.
3592          */
3593         cache_tree_init(&corrupt_blocks);
3594         root->fs_info->corrupt_blocks = &corrupt_blocks;
3595
3596         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3597                 rec = get_root_rec(root_cache, root->root_key.objectid);
3598                 BUG_ON(IS_ERR(rec));
3599                 if (btrfs_root_refs(root_item) > 0)
3600                         rec->found_root_item = 1;
3601         }
3602
3603         btrfs_init_path(&path);
3604         memset(&root_node, 0, sizeof(root_node));
3605         cache_tree_init(&root_node.root_cache);
3606         cache_tree_init(&root_node.inode_cache);
3607         memset(&nrefs, 0, sizeof(nrefs));
3608
3609         /* Move the orphan extent record to corresponding inode_record */
3610         list_for_each_entry_safe(orphan, tmp,
3611                                  &root->orphan_data_extents, list) {
3612                 struct inode_record *inode;
3613
3614                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3615                                       1);
3616                 BUG_ON(IS_ERR(inode));
3617                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3618                 list_move(&orphan->list, &inode->orphan_extents);
3619         }
3620
3621         level = btrfs_header_level(root->node);
3622         memset(wc->nodes, 0, sizeof(wc->nodes));
3623         wc->nodes[level] = &root_node;
3624         wc->active_node = level;
3625         wc->root_level = level;
3626
3627         /* We may not have checked the root block, lets do that now */
3628         if (btrfs_is_leaf(root->node))
3629                 status = btrfs_check_leaf(root, NULL, root->node);
3630         else
3631                 status = btrfs_check_node(root, NULL, root->node);
3632         if (status != BTRFS_TREE_BLOCK_CLEAN)
3633                 return -EIO;
3634
3635         if (btrfs_root_refs(root_item) > 0 ||
3636             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3637                 path.nodes[level] = root->node;
3638                 extent_buffer_get(root->node);
3639                 path.slots[level] = 0;
3640         } else {
3641                 struct btrfs_key key;
3642                 struct btrfs_disk_key found_key;
3643
3644                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3645                 level = root_item->drop_level;
3646                 path.lowest_level = level;
3647                 if (level > btrfs_header_level(root->node) ||
3648                     level >= BTRFS_MAX_LEVEL) {
3649                         error("ignoring invalid drop level: %u", level);
3650                         goto skip_walking;
3651                 }
3652                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3653                 if (wret < 0)
3654                         goto skip_walking;
3655                 btrfs_node_key(path.nodes[level], &found_key,
3656                                 path.slots[level]);
3657                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3658                                         sizeof(found_key)));
3659         }
3660
3661         while (1) {
3662                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3663                 if (wret < 0)
3664                         ret = wret;
3665                 if (wret != 0)
3666                         break;
3667
3668                 wret = walk_up_tree(root, &path, wc, &level);
3669                 if (wret < 0)
3670                         ret = wret;
3671                 if (wret != 0)
3672                         break;
3673         }
3674 skip_walking:
3675         btrfs_release_path(&path);
3676
3677         if (!cache_tree_empty(&corrupt_blocks)) {
3678                 struct cache_extent *cache;
3679                 struct btrfs_corrupt_block *corrupt;
3680
3681                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3682                        root->root_key.objectid);
3683                 cache = first_cache_extent(&corrupt_blocks);
3684                 while (cache) {
3685                         corrupt = container_of(cache,
3686                                                struct btrfs_corrupt_block,
3687                                                cache);
3688                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3689                                cache->start, corrupt->level,
3690                                corrupt->key.objectid, corrupt->key.type,
3691                                corrupt->key.offset);
3692                         cache = next_cache_extent(cache);
3693                 }
3694                 if (repair) {
3695                         printf("Try to repair the btree for root %llu\n",
3696                                root->root_key.objectid);
3697                         ret = repair_btree(root, &corrupt_blocks);
3698                         if (ret < 0)
3699                                 fprintf(stderr, "Failed to repair btree: %s\n",
3700                                         strerror(-ret));
3701                         if (!ret)
3702                                 printf("Btree for root %llu is fixed\n",
3703                                        root->root_key.objectid);
3704                 }
3705         }
3706
3707         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3708         if (err < 0)
3709                 ret = err;
3710
3711         if (root_node.current) {
3712                 root_node.current->checked = 1;
3713                 maybe_free_inode_rec(&root_node.inode_cache,
3714                                 root_node.current);
3715         }
3716
3717         err = check_inode_recs(root, &root_node.inode_cache);
3718         if (!ret)
3719                 ret = err;
3720
3721         free_corrupt_blocks_tree(&corrupt_blocks);
3722         root->fs_info->corrupt_blocks = NULL;
3723         free_orphan_data_extents(&root->orphan_data_extents);
3724         return ret;
3725 }
3726
3727 static int fs_root_objectid(u64 objectid)
3728 {
3729         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3730             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3731                 return 1;
3732         return is_fstree(objectid);
3733 }
3734
3735 static int check_fs_roots(struct btrfs_root *root,
3736                           struct cache_tree *root_cache)
3737 {
3738         struct btrfs_path path;
3739         struct btrfs_key key;
3740         struct walk_control wc;
3741         struct extent_buffer *leaf, *tree_node;
3742         struct btrfs_root *tmp_root;
3743         struct btrfs_root *tree_root = root->fs_info->tree_root;
3744         int ret;
3745         int err = 0;
3746
3747         if (ctx.progress_enabled) {
3748                 ctx.tp = TASK_FS_ROOTS;
3749                 task_start(ctx.info);
3750         }
3751
3752         /*
3753          * Just in case we made any changes to the extent tree that weren't
3754          * reflected into the free space cache yet.
3755          */
3756         if (repair)
3757                 reset_cached_block_groups(root->fs_info);
3758         memset(&wc, 0, sizeof(wc));
3759         cache_tree_init(&wc.shared);
3760         btrfs_init_path(&path);
3761
3762 again:
3763         key.offset = 0;
3764         key.objectid = 0;
3765         key.type = BTRFS_ROOT_ITEM_KEY;
3766         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3767         if (ret < 0) {
3768                 err = 1;
3769                 goto out;
3770         }
3771         tree_node = tree_root->node;
3772         while (1) {
3773                 if (tree_node != tree_root->node) {
3774                         free_root_recs_tree(root_cache);
3775                         btrfs_release_path(&path);
3776                         goto again;
3777                 }
3778                 leaf = path.nodes[0];
3779                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3780                         ret = btrfs_next_leaf(tree_root, &path);
3781                         if (ret) {
3782                                 if (ret < 0)
3783                                         err = 1;
3784                                 break;
3785                         }
3786                         leaf = path.nodes[0];
3787                 }
3788                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3789                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3790                     fs_root_objectid(key.objectid)) {
3791                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3792                                 tmp_root = btrfs_read_fs_root_no_cache(
3793                                                 root->fs_info, &key);
3794                         } else {
3795                                 key.offset = (u64)-1;
3796                                 tmp_root = btrfs_read_fs_root(
3797                                                 root->fs_info, &key);
3798                         }
3799                         if (IS_ERR(tmp_root)) {
3800                                 err = 1;
3801                                 goto next;
3802                         }
3803                         ret = check_fs_root(tmp_root, root_cache, &wc);
3804                         if (ret == -EAGAIN) {
3805                                 free_root_recs_tree(root_cache);
3806                                 btrfs_release_path(&path);
3807                                 goto again;
3808                         }
3809                         if (ret)
3810                                 err = 1;
3811                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3812                                 btrfs_free_fs_root(tmp_root);
3813                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3814                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3815                         process_root_ref(leaf, path.slots[0], &key,
3816                                          root_cache);
3817                 }
3818 next:
3819                 path.slots[0]++;
3820         }
3821 out:
3822         btrfs_release_path(&path);
3823         if (err)
3824                 free_extent_cache_tree(&wc.shared);
3825         if (!cache_tree_empty(&wc.shared))
3826                 fprintf(stderr, "warning line %d\n", __LINE__);
3827
3828         task_stop(ctx.info);
3829
3830         return err;
3831 }
3832
3833 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3834 {
3835         struct list_head *cur = rec->backrefs.next;
3836         struct extent_backref *back;
3837         struct tree_backref *tback;
3838         struct data_backref *dback;
3839         u64 found = 0;
3840         int err = 0;
3841
3842         while(cur != &rec->backrefs) {
3843                 back = to_extent_backref(cur);
3844                 cur = cur->next;
3845                 if (!back->found_extent_tree) {
3846                         err = 1;
3847                         if (!print_errs)
3848                                 goto out;
3849                         if (back->is_data) {
3850                                 dback = to_data_backref(back);
3851                                 fprintf(stderr, "Backref %llu %s %llu"
3852                                         " owner %llu offset %llu num_refs %lu"
3853                                         " not found in extent tree\n",
3854                                         (unsigned long long)rec->start,
3855                                         back->full_backref ?
3856                                         "parent" : "root",
3857                                         back->full_backref ?
3858                                         (unsigned long long)dback->parent:
3859                                         (unsigned long long)dback->root,
3860                                         (unsigned long long)dback->owner,
3861                                         (unsigned long long)dback->offset,
3862                                         (unsigned long)dback->num_refs);
3863                         } else {
3864                                 tback = to_tree_backref(back);
3865                                 fprintf(stderr, "Backref %llu parent %llu"
3866                                         " root %llu not found in extent tree\n",
3867                                         (unsigned long long)rec->start,
3868                                         (unsigned long long)tback->parent,
3869                                         (unsigned long long)tback->root);
3870                         }
3871                 }
3872                 if (!back->is_data && !back->found_ref) {
3873                         err = 1;
3874                         if (!print_errs)
3875                                 goto out;
3876                         tback = to_tree_backref(back);
3877                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3878                                 (unsigned long long)rec->start,
3879                                 back->full_backref ? "parent" : "root",
3880                                 back->full_backref ?
3881                                 (unsigned long long)tback->parent :
3882                                 (unsigned long long)tback->root, back);
3883                 }
3884                 if (back->is_data) {
3885                         dback = to_data_backref(back);
3886                         if (dback->found_ref != dback->num_refs) {
3887                                 err = 1;
3888                                 if (!print_errs)
3889                                         goto out;
3890                                 fprintf(stderr, "Incorrect local backref count"
3891                                         " on %llu %s %llu owner %llu"
3892                                         " offset %llu found %u wanted %u back %p\n",
3893                                         (unsigned long long)rec->start,
3894                                         back->full_backref ?
3895                                         "parent" : "root",
3896                                         back->full_backref ?
3897                                         (unsigned long long)dback->parent:
3898                                         (unsigned long long)dback->root,
3899                                         (unsigned long long)dback->owner,
3900                                         (unsigned long long)dback->offset,
3901                                         dback->found_ref, dback->num_refs, back);
3902                         }
3903                         if (dback->disk_bytenr != rec->start) {
3904                                 err = 1;
3905                                 if (!print_errs)
3906                                         goto out;
3907                                 fprintf(stderr, "Backref disk bytenr does not"
3908                                         " match extent record, bytenr=%llu, "
3909                                         "ref bytenr=%llu\n",
3910                                         (unsigned long long)rec->start,
3911                                         (unsigned long long)dback->disk_bytenr);
3912                         }
3913
3914                         if (dback->bytes != rec->nr) {
3915                                 err = 1;
3916                                 if (!print_errs)
3917                                         goto out;
3918                                 fprintf(stderr, "Backref bytes do not match "
3919                                         "extent backref, bytenr=%llu, ref "
3920                                         "bytes=%llu, backref bytes=%llu\n",
3921                                         (unsigned long long)rec->start,
3922                                         (unsigned long long)rec->nr,
3923                                         (unsigned long long)dback->bytes);
3924                         }
3925                 }
3926                 if (!back->is_data) {
3927                         found += 1;
3928                 } else {
3929                         dback = to_data_backref(back);
3930                         found += dback->found_ref;
3931                 }
3932         }
3933         if (found != rec->refs) {
3934                 err = 1;
3935                 if (!print_errs)
3936                         goto out;
3937                 fprintf(stderr, "Incorrect global backref count "
3938                         "on %llu found %llu wanted %llu\n",
3939                         (unsigned long long)rec->start,
3940                         (unsigned long long)found,
3941                         (unsigned long long)rec->refs);
3942         }
3943 out:
3944         return err;
3945 }
3946
3947 static int free_all_extent_backrefs(struct extent_record *rec)
3948 {
3949         struct extent_backref *back;
3950         struct list_head *cur;
3951         while (!list_empty(&rec->backrefs)) {
3952                 cur = rec->backrefs.next;
3953                 back = to_extent_backref(cur);
3954                 list_del(cur);
3955                 free(back);
3956         }
3957         return 0;
3958 }
3959
3960 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3961                                      struct cache_tree *extent_cache)
3962 {
3963         struct cache_extent *cache;
3964         struct extent_record *rec;
3965
3966         while (1) {
3967                 cache = first_cache_extent(extent_cache);
3968                 if (!cache)
3969                         break;
3970                 rec = container_of(cache, struct extent_record, cache);
3971                 remove_cache_extent(extent_cache, cache);
3972                 free_all_extent_backrefs(rec);
3973                 free(rec);
3974         }
3975 }
3976
3977 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3978                                  struct extent_record *rec)
3979 {
3980         if (rec->content_checked && rec->owner_ref_checked &&
3981             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3982             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3983             !rec->bad_full_backref && !rec->crossing_stripes &&
3984             !rec->wrong_chunk_type) {
3985                 remove_cache_extent(extent_cache, &rec->cache);
3986                 free_all_extent_backrefs(rec);
3987                 list_del_init(&rec->list);
3988                 free(rec);
3989         }
3990         return 0;
3991 }
3992
3993 static int check_owner_ref(struct btrfs_root *root,
3994                             struct extent_record *rec,
3995                             struct extent_buffer *buf)
3996 {
3997         struct extent_backref *node;
3998         struct tree_backref *back;
3999         struct btrfs_root *ref_root;
4000         struct btrfs_key key;
4001         struct btrfs_path path;
4002         struct extent_buffer *parent;
4003         int level;
4004         int found = 0;
4005         int ret;
4006
4007         list_for_each_entry(node, &rec->backrefs, list) {
4008                 if (node->is_data)
4009                         continue;
4010                 if (!node->found_ref)
4011                         continue;
4012                 if (node->full_backref)
4013                         continue;
4014                 back = to_tree_backref(node);
4015                 if (btrfs_header_owner(buf) == back->root)
4016                         return 0;
4017         }
4018         BUG_ON(rec->is_root);
4019
4020         /* try to find the block by search corresponding fs tree */
4021         key.objectid = btrfs_header_owner(buf);
4022         key.type = BTRFS_ROOT_ITEM_KEY;
4023         key.offset = (u64)-1;
4024
4025         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4026         if (IS_ERR(ref_root))
4027                 return 1;
4028
4029         level = btrfs_header_level(buf);
4030         if (level == 0)
4031                 btrfs_item_key_to_cpu(buf, &key, 0);
4032         else
4033                 btrfs_node_key_to_cpu(buf, &key, 0);
4034
4035         btrfs_init_path(&path);
4036         path.lowest_level = level + 1;
4037         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4038         if (ret < 0)
4039                 return 0;
4040
4041         parent = path.nodes[level + 1];
4042         if (parent && buf->start == btrfs_node_blockptr(parent,
4043                                                         path.slots[level + 1]))
4044                 found = 1;
4045
4046         btrfs_release_path(&path);
4047         return found ? 0 : 1;
4048 }
4049
4050 static int is_extent_tree_record(struct extent_record *rec)
4051 {
4052         struct list_head *cur = rec->backrefs.next;
4053         struct extent_backref *node;
4054         struct tree_backref *back;
4055         int is_extent = 0;
4056
4057         while(cur != &rec->backrefs) {
4058                 node = to_extent_backref(cur);
4059                 cur = cur->next;
4060                 if (node->is_data)
4061                         return 0;
4062                 back = to_tree_backref(node);
4063                 if (node->full_backref)
4064                         return 0;
4065                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4066                         is_extent = 1;
4067         }
4068         return is_extent;
4069 }
4070
4071
4072 static int record_bad_block_io(struct btrfs_fs_info *info,
4073                                struct cache_tree *extent_cache,
4074                                u64 start, u64 len)
4075 {
4076         struct extent_record *rec;
4077         struct cache_extent *cache;
4078         struct btrfs_key key;
4079
4080         cache = lookup_cache_extent(extent_cache, start, len);
4081         if (!cache)
4082                 return 0;
4083
4084         rec = container_of(cache, struct extent_record, cache);
4085         if (!is_extent_tree_record(rec))
4086                 return 0;
4087
4088         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4089         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4090 }
4091
4092 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4093                        struct extent_buffer *buf, int slot)
4094 {
4095         if (btrfs_header_level(buf)) {
4096                 struct btrfs_key_ptr ptr1, ptr2;
4097
4098                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4099                                    sizeof(struct btrfs_key_ptr));
4100                 read_extent_buffer(buf, &ptr2,
4101                                    btrfs_node_key_ptr_offset(slot + 1),
4102                                    sizeof(struct btrfs_key_ptr));
4103                 write_extent_buffer(buf, &ptr1,
4104                                     btrfs_node_key_ptr_offset(slot + 1),
4105                                     sizeof(struct btrfs_key_ptr));
4106                 write_extent_buffer(buf, &ptr2,
4107                                     btrfs_node_key_ptr_offset(slot),
4108                                     sizeof(struct btrfs_key_ptr));
4109                 if (slot == 0) {
4110                         struct btrfs_disk_key key;
4111                         btrfs_node_key(buf, &key, 0);
4112                         btrfs_fixup_low_keys(root, path, &key,
4113                                              btrfs_header_level(buf) + 1);
4114                 }
4115         } else {
4116                 struct btrfs_item *item1, *item2;
4117                 struct btrfs_key k1, k2;
4118                 char *item1_data, *item2_data;
4119                 u32 item1_offset, item2_offset, item1_size, item2_size;
4120
4121                 item1 = btrfs_item_nr(slot);
4122                 item2 = btrfs_item_nr(slot + 1);
4123                 btrfs_item_key_to_cpu(buf, &k1, slot);
4124                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4125                 item1_offset = btrfs_item_offset(buf, item1);
4126                 item2_offset = btrfs_item_offset(buf, item2);
4127                 item1_size = btrfs_item_size(buf, item1);
4128                 item2_size = btrfs_item_size(buf, item2);
4129
4130                 item1_data = malloc(item1_size);
4131                 if (!item1_data)
4132                         return -ENOMEM;
4133                 item2_data = malloc(item2_size);
4134                 if (!item2_data) {
4135                         free(item1_data);
4136                         return -ENOMEM;
4137                 }
4138
4139                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4140                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4141
4142                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4143                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4144                 free(item1_data);
4145                 free(item2_data);
4146
4147                 btrfs_set_item_offset(buf, item1, item2_offset);
4148                 btrfs_set_item_offset(buf, item2, item1_offset);
4149                 btrfs_set_item_size(buf, item1, item2_size);
4150                 btrfs_set_item_size(buf, item2, item1_size);
4151
4152                 path->slots[0] = slot;
4153                 btrfs_set_item_key_unsafe(root, path, &k2);
4154                 path->slots[0] = slot + 1;
4155                 btrfs_set_item_key_unsafe(root, path, &k1);
4156         }
4157         return 0;
4158 }
4159
4160 static int fix_key_order(struct btrfs_trans_handle *trans,
4161                          struct btrfs_root *root,
4162                          struct btrfs_path *path)
4163 {
4164         struct extent_buffer *buf;
4165         struct btrfs_key k1, k2;
4166         int i;
4167         int level = path->lowest_level;
4168         int ret = -EIO;
4169
4170         buf = path->nodes[level];
4171         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4172                 if (level) {
4173                         btrfs_node_key_to_cpu(buf, &k1, i);
4174                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4175                 } else {
4176                         btrfs_item_key_to_cpu(buf, &k1, i);
4177                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4178                 }
4179                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4180                         continue;
4181                 ret = swap_values(root, path, buf, i);
4182                 if (ret)
4183                         break;
4184                 btrfs_mark_buffer_dirty(buf);
4185                 i = 0;
4186         }
4187         return ret;
4188 }
4189
4190 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4191                              struct btrfs_root *root,
4192                              struct btrfs_path *path,
4193                              struct extent_buffer *buf, int slot)
4194 {
4195         struct btrfs_key key;
4196         int nritems = btrfs_header_nritems(buf);
4197
4198         btrfs_item_key_to_cpu(buf, &key, slot);
4199
4200         /* These are all the keys we can deal with missing. */
4201         if (key.type != BTRFS_DIR_INDEX_KEY &&
4202             key.type != BTRFS_EXTENT_ITEM_KEY &&
4203             key.type != BTRFS_METADATA_ITEM_KEY &&
4204             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4205             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4206                 return -1;
4207
4208         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4209                (unsigned long long)key.objectid, key.type,
4210                (unsigned long long)key.offset, slot, buf->start);
4211         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4212                               btrfs_item_nr_offset(slot + 1),
4213                               sizeof(struct btrfs_item) *
4214                               (nritems - slot - 1));
4215         btrfs_set_header_nritems(buf, nritems - 1);
4216         if (slot == 0) {
4217                 struct btrfs_disk_key disk_key;
4218
4219                 btrfs_item_key(buf, &disk_key, 0);
4220                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4221         }
4222         btrfs_mark_buffer_dirty(buf);
4223         return 0;
4224 }
4225
4226 static int fix_item_offset(struct btrfs_trans_handle *trans,
4227                            struct btrfs_root *root,
4228                            struct btrfs_path *path)
4229 {
4230         struct extent_buffer *buf;
4231         int i;
4232         int ret = 0;
4233
4234         /* We should only get this for leaves */
4235         BUG_ON(path->lowest_level);
4236         buf = path->nodes[0];
4237 again:
4238         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4239                 unsigned int shift = 0, offset;
4240
4241                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4242                     BTRFS_LEAF_DATA_SIZE(root)) {
4243                         if (btrfs_item_end_nr(buf, i) >
4244                             BTRFS_LEAF_DATA_SIZE(root)) {
4245                                 ret = delete_bogus_item(trans, root, path,
4246                                                         buf, i);
4247                                 if (!ret)
4248                                         goto again;
4249                                 fprintf(stderr, "item is off the end of the "
4250                                         "leaf, can't fix\n");
4251                                 ret = -EIO;
4252                                 break;
4253                         }
4254                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4255                                 btrfs_item_end_nr(buf, i);
4256                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4257                            btrfs_item_offset_nr(buf, i - 1)) {
4258                         if (btrfs_item_end_nr(buf, i) >
4259                             btrfs_item_offset_nr(buf, i - 1)) {
4260                                 ret = delete_bogus_item(trans, root, path,
4261                                                         buf, i);
4262                                 if (!ret)
4263                                         goto again;
4264                                 fprintf(stderr, "items overlap, can't fix\n");
4265                                 ret = -EIO;
4266                                 break;
4267                         }
4268                         shift = btrfs_item_offset_nr(buf, i - 1) -
4269                                 btrfs_item_end_nr(buf, i);
4270                 }
4271                 if (!shift)
4272                         continue;
4273
4274                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4275                        i, shift, (unsigned long long)buf->start);
4276                 offset = btrfs_item_offset_nr(buf, i);
4277                 memmove_extent_buffer(buf,
4278                                       btrfs_leaf_data(buf) + offset + shift,
4279                                       btrfs_leaf_data(buf) + offset,
4280                                       btrfs_item_size_nr(buf, i));
4281                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4282                                       offset + shift);
4283                 btrfs_mark_buffer_dirty(buf);
4284         }
4285
4286         /*
4287          * We may have moved things, in which case we want to exit so we don't
4288          * write those changes out.  Once we have proper abort functionality in
4289          * progs this can be changed to something nicer.
4290          */
4291         BUG_ON(ret);
4292         return ret;
4293 }
4294
4295 /*
4296  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4297  * then just return -EIO.
4298  */
4299 static int try_to_fix_bad_block(struct btrfs_root *root,
4300                                 struct extent_buffer *buf,
4301                                 enum btrfs_tree_block_status status)
4302 {
4303         struct btrfs_trans_handle *trans;
4304         struct ulist *roots;
4305         struct ulist_node *node;
4306         struct btrfs_root *search_root;
4307         struct btrfs_path *path;
4308         struct ulist_iterator iter;
4309         struct btrfs_key root_key, key;
4310         int ret;
4311
4312         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4313             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4314                 return -EIO;
4315
4316         path = btrfs_alloc_path();
4317         if (!path)
4318                 return -EIO;
4319
4320         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4321                                    0, &roots);
4322         if (ret) {
4323                 btrfs_free_path(path);
4324                 return -EIO;
4325         }
4326
4327         ULIST_ITER_INIT(&iter);
4328         while ((node = ulist_next(roots, &iter))) {
4329                 root_key.objectid = node->val;
4330                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4331                 root_key.offset = (u64)-1;
4332
4333                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4334                 if (IS_ERR(root)) {
4335                         ret = -EIO;
4336                         break;
4337                 }
4338
4339
4340                 trans = btrfs_start_transaction(search_root, 0);
4341                 if (IS_ERR(trans)) {
4342                         ret = PTR_ERR(trans);
4343                         break;
4344                 }
4345
4346                 path->lowest_level = btrfs_header_level(buf);
4347                 path->skip_check_block = 1;
4348                 if (path->lowest_level)
4349                         btrfs_node_key_to_cpu(buf, &key, 0);
4350                 else
4351                         btrfs_item_key_to_cpu(buf, &key, 0);
4352                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4353                 if (ret) {
4354                         ret = -EIO;
4355                         btrfs_commit_transaction(trans, search_root);
4356                         break;
4357                 }
4358                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4359                         ret = fix_key_order(trans, search_root, path);
4360                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4361                         ret = fix_item_offset(trans, search_root, path);
4362                 if (ret) {
4363                         btrfs_commit_transaction(trans, search_root);
4364                         break;
4365                 }
4366                 btrfs_release_path(path);
4367                 btrfs_commit_transaction(trans, search_root);
4368         }
4369         ulist_free(roots);
4370         btrfs_free_path(path);
4371         return ret;
4372 }
4373
4374 static int check_block(struct btrfs_root *root,
4375                        struct cache_tree *extent_cache,
4376                        struct extent_buffer *buf, u64 flags)
4377 {
4378         struct extent_record *rec;
4379         struct cache_extent *cache;
4380         struct btrfs_key key;
4381         enum btrfs_tree_block_status status;
4382         int ret = 0;
4383         int level;
4384
4385         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4386         if (!cache)
4387                 return 1;
4388         rec = container_of(cache, struct extent_record, cache);
4389         rec->generation = btrfs_header_generation(buf);
4390
4391         level = btrfs_header_level(buf);
4392         if (btrfs_header_nritems(buf) > 0) {
4393
4394                 if (level == 0)
4395                         btrfs_item_key_to_cpu(buf, &key, 0);
4396                 else
4397                         btrfs_node_key_to_cpu(buf, &key, 0);
4398
4399                 rec->info_objectid = key.objectid;
4400         }
4401         rec->info_level = level;
4402
4403         if (btrfs_is_leaf(buf))
4404                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4405         else
4406                 status = btrfs_check_node(root, &rec->parent_key, buf);
4407
4408         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4409                 if (repair)
4410                         status = try_to_fix_bad_block(root, buf, status);
4411                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4412                         ret = -EIO;
4413                         fprintf(stderr, "bad block %llu\n",
4414                                 (unsigned long long)buf->start);
4415                 } else {
4416                         /*
4417                          * Signal to callers we need to start the scan over
4418                          * again since we'll have cowed blocks.
4419                          */
4420                         ret = -EAGAIN;
4421                 }
4422         } else {
4423                 rec->content_checked = 1;
4424                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4425                         rec->owner_ref_checked = 1;
4426                 else {
4427                         ret = check_owner_ref(root, rec, buf);
4428                         if (!ret)
4429                                 rec->owner_ref_checked = 1;
4430                 }
4431         }
4432         if (!ret)
4433                 maybe_free_extent_rec(extent_cache, rec);
4434         return ret;
4435 }
4436
4437 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4438                                                 u64 parent, u64 root)
4439 {
4440         struct list_head *cur = rec->backrefs.next;
4441         struct extent_backref *node;
4442         struct tree_backref *back;
4443
4444         while(cur != &rec->backrefs) {
4445                 node = to_extent_backref(cur);
4446                 cur = cur->next;
4447                 if (node->is_data)
4448                         continue;
4449                 back = to_tree_backref(node);
4450                 if (parent > 0) {
4451                         if (!node->full_backref)
4452                                 continue;
4453                         if (parent == back->parent)
4454                                 return back;
4455                 } else {
4456                         if (node->full_backref)
4457                                 continue;
4458                         if (back->root == root)
4459                                 return back;
4460                 }
4461         }
4462         return NULL;
4463 }
4464
4465 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4466                                                 u64 parent, u64 root)
4467 {
4468         struct tree_backref *ref = malloc(sizeof(*ref));
4469
4470         if (!ref)
4471                 return NULL;
4472         memset(&ref->node, 0, sizeof(ref->node));
4473         if (parent > 0) {
4474                 ref->parent = parent;
4475                 ref->node.full_backref = 1;
4476         } else {
4477                 ref->root = root;
4478                 ref->node.full_backref = 0;
4479         }
4480         list_add_tail(&ref->node.list, &rec->backrefs);
4481
4482         return ref;
4483 }
4484
4485 static struct data_backref *find_data_backref(struct extent_record *rec,
4486                                                 u64 parent, u64 root,
4487                                                 u64 owner, u64 offset,
4488                                                 int found_ref,
4489                                                 u64 disk_bytenr, u64 bytes)
4490 {
4491         struct list_head *cur = rec->backrefs.next;
4492         struct extent_backref *node;
4493         struct data_backref *back;
4494
4495         while(cur != &rec->backrefs) {
4496                 node = to_extent_backref(cur);
4497                 cur = cur->next;
4498                 if (!node->is_data)
4499                         continue;
4500                 back = to_data_backref(node);
4501                 if (parent > 0) {
4502                         if (!node->full_backref)
4503                                 continue;
4504                         if (parent == back->parent)
4505                                 return back;
4506                 } else {
4507                         if (node->full_backref)
4508                                 continue;
4509                         if (back->root == root && back->owner == owner &&
4510                             back->offset == offset) {
4511                                 if (found_ref && node->found_ref &&
4512                                     (back->bytes != bytes ||
4513                                     back->disk_bytenr != disk_bytenr))
4514                                         continue;
4515                                 return back;
4516                         }
4517                 }
4518         }
4519         return NULL;
4520 }
4521
4522 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4523                                                 u64 parent, u64 root,
4524                                                 u64 owner, u64 offset,
4525                                                 u64 max_size)
4526 {
4527         struct data_backref *ref = malloc(sizeof(*ref));
4528
4529         if (!ref)
4530                 return NULL;
4531         memset(&ref->node, 0, sizeof(ref->node));
4532         ref->node.is_data = 1;
4533
4534         if (parent > 0) {
4535                 ref->parent = parent;
4536                 ref->owner = 0;
4537                 ref->offset = 0;
4538                 ref->node.full_backref = 1;
4539         } else {
4540                 ref->root = root;
4541                 ref->owner = owner;
4542                 ref->offset = offset;
4543                 ref->node.full_backref = 0;
4544         }
4545         ref->bytes = max_size;
4546         ref->found_ref = 0;
4547         ref->num_refs = 0;
4548         list_add_tail(&ref->node.list, &rec->backrefs);
4549         if (max_size > rec->max_size)
4550                 rec->max_size = max_size;
4551         return ref;
4552 }
4553
4554 /* Check if the type of extent matches with its chunk */
4555 static void check_extent_type(struct extent_record *rec)
4556 {
4557         struct btrfs_block_group_cache *bg_cache;
4558
4559         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4560         if (!bg_cache)
4561                 return;
4562
4563         /* data extent, check chunk directly*/
4564         if (!rec->metadata) {
4565                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4566                         rec->wrong_chunk_type = 1;
4567                 return;
4568         }
4569
4570         /* metadata extent, check the obvious case first */
4571         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4572                                  BTRFS_BLOCK_GROUP_METADATA))) {
4573                 rec->wrong_chunk_type = 1;
4574                 return;
4575         }
4576
4577         /*
4578          * Check SYSTEM extent, as it's also marked as metadata, we can only
4579          * make sure it's a SYSTEM extent by its backref
4580          */
4581         if (!list_empty(&rec->backrefs)) {
4582                 struct extent_backref *node;
4583                 struct tree_backref *tback;
4584                 u64 bg_type;
4585
4586                 node = to_extent_backref(rec->backrefs.next);
4587                 if (node->is_data) {
4588                         /* tree block shouldn't have data backref */
4589                         rec->wrong_chunk_type = 1;
4590                         return;
4591                 }
4592                 tback = container_of(node, struct tree_backref, node);
4593
4594                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4595                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4596                 else
4597                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4598                 if (!(bg_cache->flags & bg_type))
4599                         rec->wrong_chunk_type = 1;
4600         }
4601 }
4602
4603 /*
4604  * Allocate a new extent record, fill default values from @tmpl and insert int
4605  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4606  * the cache, otherwise it fails.
4607  */
4608 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4609                 struct extent_record *tmpl)
4610 {
4611         struct extent_record *rec;
4612         int ret = 0;
4613
4614         rec = malloc(sizeof(*rec));
4615         if (!rec)
4616                 return -ENOMEM;
4617         rec->start = tmpl->start;
4618         rec->max_size = tmpl->max_size;
4619         rec->nr = max(tmpl->nr, tmpl->max_size);
4620         rec->found_rec = tmpl->found_rec;
4621         rec->content_checked = tmpl->content_checked;
4622         rec->owner_ref_checked = tmpl->owner_ref_checked;
4623         rec->num_duplicates = 0;
4624         rec->metadata = tmpl->metadata;
4625         rec->flag_block_full_backref = FLAG_UNSET;
4626         rec->bad_full_backref = 0;
4627         rec->crossing_stripes = 0;
4628         rec->wrong_chunk_type = 0;
4629         rec->is_root = tmpl->is_root;
4630         rec->refs = tmpl->refs;
4631         rec->extent_item_refs = tmpl->extent_item_refs;
4632         rec->parent_generation = tmpl->parent_generation;
4633         INIT_LIST_HEAD(&rec->backrefs);
4634         INIT_LIST_HEAD(&rec->dups);
4635         INIT_LIST_HEAD(&rec->list);
4636         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4637         rec->cache.start = tmpl->start;
4638         rec->cache.size = tmpl->nr;
4639         ret = insert_cache_extent(extent_cache, &rec->cache);
4640         if (ret) {
4641                 free(rec);
4642                 return ret;
4643         }
4644         bytes_used += rec->nr;
4645
4646         if (tmpl->metadata)
4647                 rec->crossing_stripes = check_crossing_stripes(global_info,
4648                                 rec->start, global_info->tree_root->nodesize);
4649         check_extent_type(rec);
4650         return ret;
4651 }
4652
4653 /*
4654  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4655  * some are hints:
4656  * - refs              - if found, increase refs
4657  * - is_root           - if found, set
4658  * - content_checked   - if found, set
4659  * - owner_ref_checked - if found, set
4660  *
4661  * If not found, create a new one, initialize and insert.
4662  */
4663 static int add_extent_rec(struct cache_tree *extent_cache,
4664                 struct extent_record *tmpl)
4665 {
4666         struct extent_record *rec;
4667         struct cache_extent *cache;
4668         int ret = 0;
4669         int dup = 0;
4670
4671         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4672         if (cache) {
4673                 rec = container_of(cache, struct extent_record, cache);
4674                 if (tmpl->refs)
4675                         rec->refs++;
4676                 if (rec->nr == 1)
4677                         rec->nr = max(tmpl->nr, tmpl->max_size);
4678
4679                 /*
4680                  * We need to make sure to reset nr to whatever the extent
4681                  * record says was the real size, this way we can compare it to
4682                  * the backrefs.
4683                  */
4684                 if (tmpl->found_rec) {
4685                         if (tmpl->start != rec->start || rec->found_rec) {
4686                                 struct extent_record *tmp;
4687
4688                                 dup = 1;
4689                                 if (list_empty(&rec->list))
4690                                         list_add_tail(&rec->list,
4691                                                       &duplicate_extents);
4692
4693                                 /*
4694                                  * We have to do this song and dance in case we
4695                                  * find an extent record that falls inside of
4696                                  * our current extent record but does not have
4697                                  * the same objectid.
4698                                  */
4699                                 tmp = malloc(sizeof(*tmp));
4700                                 if (!tmp)
4701                                         return -ENOMEM;
4702                                 tmp->start = tmpl->start;
4703                                 tmp->max_size = tmpl->max_size;
4704                                 tmp->nr = tmpl->nr;
4705                                 tmp->found_rec = 1;
4706                                 tmp->metadata = tmpl->metadata;
4707                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4708                                 INIT_LIST_HEAD(&tmp->list);
4709                                 list_add_tail(&tmp->list, &rec->dups);
4710                                 rec->num_duplicates++;
4711                         } else {
4712                                 rec->nr = tmpl->nr;
4713                                 rec->found_rec = 1;
4714                         }
4715                 }
4716
4717                 if (tmpl->extent_item_refs && !dup) {
4718                         if (rec->extent_item_refs) {
4719                                 fprintf(stderr, "block %llu rec "
4720                                         "extent_item_refs %llu, passed %llu\n",
4721                                         (unsigned long long)tmpl->start,
4722                                         (unsigned long long)
4723                                                         rec->extent_item_refs,
4724                                         (unsigned long long)tmpl->extent_item_refs);
4725                         }
4726                         rec->extent_item_refs = tmpl->extent_item_refs;
4727                 }
4728                 if (tmpl->is_root)
4729                         rec->is_root = 1;
4730                 if (tmpl->content_checked)
4731                         rec->content_checked = 1;
4732                 if (tmpl->owner_ref_checked)
4733                         rec->owner_ref_checked = 1;
4734                 memcpy(&rec->parent_key, &tmpl->parent_key,
4735                                 sizeof(tmpl->parent_key));
4736                 if (tmpl->parent_generation)
4737                         rec->parent_generation = tmpl->parent_generation;
4738                 if (rec->max_size < tmpl->max_size)
4739                         rec->max_size = tmpl->max_size;
4740
4741                 /*
4742                  * A metadata extent can't cross stripe_len boundary, otherwise
4743                  * kernel scrub won't be able to handle it.
4744                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4745                  * it.
4746                  */
4747                 if (tmpl->metadata)
4748                         rec->crossing_stripes = check_crossing_stripes(
4749                                         global_info, rec->start,
4750                                         global_info->tree_root->nodesize);
4751                 check_extent_type(rec);
4752                 maybe_free_extent_rec(extent_cache, rec);
4753                 return ret;
4754         }
4755
4756         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4757
4758         return ret;
4759 }
4760
4761 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4762                             u64 parent, u64 root, int found_ref)
4763 {
4764         struct extent_record *rec;
4765         struct tree_backref *back;
4766         struct cache_extent *cache;
4767         int ret;
4768
4769         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4770         if (!cache) {
4771                 struct extent_record tmpl;
4772
4773                 memset(&tmpl, 0, sizeof(tmpl));
4774                 tmpl.start = bytenr;
4775                 tmpl.nr = 1;
4776                 tmpl.metadata = 1;
4777
4778                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4779                 if (ret)
4780                         return ret;
4781
4782                 /* really a bug in cache_extent implement now */
4783                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4784                 if (!cache)
4785                         return -ENOENT;
4786         }
4787
4788         rec = container_of(cache, struct extent_record, cache);
4789         if (rec->start != bytenr) {
4790                 /*
4791                  * Several cause, from unaligned bytenr to over lapping extents
4792                  */
4793                 return -EEXIST;
4794         }
4795
4796         back = find_tree_backref(rec, parent, root);
4797         if (!back) {
4798                 back = alloc_tree_backref(rec, parent, root);
4799                 if (!back)
4800                         return -ENOMEM;
4801         }
4802
4803         if (found_ref) {
4804                 if (back->node.found_ref) {
4805                         fprintf(stderr, "Extent back ref already exists "
4806                                 "for %llu parent %llu root %llu \n",
4807                                 (unsigned long long)bytenr,
4808                                 (unsigned long long)parent,
4809                                 (unsigned long long)root);
4810                 }
4811                 back->node.found_ref = 1;
4812         } else {
4813                 if (back->node.found_extent_tree) {
4814                         fprintf(stderr, "Extent back ref already exists "
4815                                 "for %llu parent %llu root %llu \n",
4816                                 (unsigned long long)bytenr,
4817                                 (unsigned long long)parent,
4818                                 (unsigned long long)root);
4819                 }
4820                 back->node.found_extent_tree = 1;
4821         }
4822         check_extent_type(rec);
4823         maybe_free_extent_rec(extent_cache, rec);
4824         return 0;
4825 }
4826
4827 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4828                             u64 parent, u64 root, u64 owner, u64 offset,
4829                             u32 num_refs, int found_ref, u64 max_size)
4830 {
4831         struct extent_record *rec;
4832         struct data_backref *back;
4833         struct cache_extent *cache;
4834         int ret;
4835
4836         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4837         if (!cache) {
4838                 struct extent_record tmpl;
4839
4840                 memset(&tmpl, 0, sizeof(tmpl));
4841                 tmpl.start = bytenr;
4842                 tmpl.nr = 1;
4843                 tmpl.max_size = max_size;
4844
4845                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4846                 if (ret)
4847                         return ret;
4848
4849                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4850                 if (!cache)
4851                         abort();
4852         }
4853
4854         rec = container_of(cache, struct extent_record, cache);
4855         if (rec->max_size < max_size)
4856                 rec->max_size = max_size;
4857
4858         /*
4859          * If found_ref is set then max_size is the real size and must match the
4860          * existing refs.  So if we have already found a ref then we need to
4861          * make sure that this ref matches the existing one, otherwise we need
4862          * to add a new backref so we can notice that the backrefs don't match
4863          * and we need to figure out who is telling the truth.  This is to
4864          * account for that awful fsync bug I introduced where we'd end up with
4865          * a btrfs_file_extent_item that would have its length include multiple
4866          * prealloc extents or point inside of a prealloc extent.
4867          */
4868         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4869                                  bytenr, max_size);
4870         if (!back) {
4871                 back = alloc_data_backref(rec, parent, root, owner, offset,
4872                                           max_size);
4873                 BUG_ON(!back);
4874         }
4875
4876         if (found_ref) {
4877                 BUG_ON(num_refs != 1);
4878                 if (back->node.found_ref)
4879                         BUG_ON(back->bytes != max_size);
4880                 back->node.found_ref = 1;
4881                 back->found_ref += 1;
4882                 back->bytes = max_size;
4883                 back->disk_bytenr = bytenr;
4884                 rec->refs += 1;
4885                 rec->content_checked = 1;
4886                 rec->owner_ref_checked = 1;
4887         } else {
4888                 if (back->node.found_extent_tree) {
4889                         fprintf(stderr, "Extent back ref already exists "
4890                                 "for %llu parent %llu root %llu "
4891                                 "owner %llu offset %llu num_refs %lu\n",
4892                                 (unsigned long long)bytenr,
4893                                 (unsigned long long)parent,
4894                                 (unsigned long long)root,
4895                                 (unsigned long long)owner,
4896                                 (unsigned long long)offset,
4897                                 (unsigned long)num_refs);
4898                 }
4899                 back->num_refs = num_refs;
4900                 back->node.found_extent_tree = 1;
4901         }
4902         maybe_free_extent_rec(extent_cache, rec);
4903         return 0;
4904 }
4905
4906 static int add_pending(struct cache_tree *pending,
4907                        struct cache_tree *seen, u64 bytenr, u32 size)
4908 {
4909         int ret;
4910         ret = add_cache_extent(seen, bytenr, size);
4911         if (ret)
4912                 return ret;
4913         add_cache_extent(pending, bytenr, size);
4914         return 0;
4915 }
4916
4917 static int pick_next_pending(struct cache_tree *pending,
4918                         struct cache_tree *reada,
4919                         struct cache_tree *nodes,
4920                         u64 last, struct block_info *bits, int bits_nr,
4921                         int *reada_bits)
4922 {
4923         unsigned long node_start = last;
4924         struct cache_extent *cache;
4925         int ret;
4926
4927         cache = search_cache_extent(reada, 0);
4928         if (cache) {
4929                 bits[0].start = cache->start;
4930                 bits[0].size = cache->size;
4931                 *reada_bits = 1;
4932                 return 1;
4933         }
4934         *reada_bits = 0;
4935         if (node_start > 32768)
4936                 node_start -= 32768;
4937
4938         cache = search_cache_extent(nodes, node_start);
4939         if (!cache)
4940                 cache = search_cache_extent(nodes, 0);
4941
4942         if (!cache) {
4943                  cache = search_cache_extent(pending, 0);
4944                  if (!cache)
4945                          return 0;
4946                  ret = 0;
4947                  do {
4948                          bits[ret].start = cache->start;
4949                          bits[ret].size = cache->size;
4950                          cache = next_cache_extent(cache);
4951                          ret++;
4952                  } while (cache && ret < bits_nr);
4953                  return ret;
4954         }
4955
4956         ret = 0;
4957         do {
4958                 bits[ret].start = cache->start;
4959                 bits[ret].size = cache->size;
4960                 cache = next_cache_extent(cache);
4961                 ret++;
4962         } while (cache && ret < bits_nr);
4963
4964         if (bits_nr - ret > 8) {
4965                 u64 lookup = bits[0].start + bits[0].size;
4966                 struct cache_extent *next;
4967                 next = search_cache_extent(pending, lookup);
4968                 while(next) {
4969                         if (next->start - lookup > 32768)
4970                                 break;
4971                         bits[ret].start = next->start;
4972                         bits[ret].size = next->size;
4973                         lookup = next->start + next->size;
4974                         ret++;
4975                         if (ret == bits_nr)
4976                                 break;
4977                         next = next_cache_extent(next);
4978                         if (!next)
4979                                 break;
4980                 }
4981         }
4982         return ret;
4983 }
4984
4985 static void free_chunk_record(struct cache_extent *cache)
4986 {
4987         struct chunk_record *rec;
4988
4989         rec = container_of(cache, struct chunk_record, cache);
4990         list_del_init(&rec->list);
4991         list_del_init(&rec->dextents);
4992         free(rec);
4993 }
4994
4995 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4996 {
4997         cache_tree_free_extents(chunk_cache, free_chunk_record);
4998 }
4999
5000 static void free_device_record(struct rb_node *node)
5001 {
5002         struct device_record *rec;
5003
5004         rec = container_of(node, struct device_record, node);
5005         free(rec);
5006 }
5007
5008 FREE_RB_BASED_TREE(device_cache, free_device_record);
5009
5010 int insert_block_group_record(struct block_group_tree *tree,
5011                               struct block_group_record *bg_rec)
5012 {
5013         int ret;
5014
5015         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5016         if (ret)
5017                 return ret;
5018
5019         list_add_tail(&bg_rec->list, &tree->block_groups);
5020         return 0;
5021 }
5022
5023 static void free_block_group_record(struct cache_extent *cache)
5024 {
5025         struct block_group_record *rec;
5026
5027         rec = container_of(cache, struct block_group_record, cache);
5028         list_del_init(&rec->list);
5029         free(rec);
5030 }
5031
5032 void free_block_group_tree(struct block_group_tree *tree)
5033 {
5034         cache_tree_free_extents(&tree->tree, free_block_group_record);
5035 }
5036
5037 int insert_device_extent_record(struct device_extent_tree *tree,
5038                                 struct device_extent_record *de_rec)
5039 {
5040         int ret;
5041
5042         /*
5043          * Device extent is a bit different from the other extents, because
5044          * the extents which belong to the different devices may have the
5045          * same start and size, so we need use the special extent cache
5046          * search/insert functions.
5047          */
5048         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5049         if (ret)
5050                 return ret;
5051
5052         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5053         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5054         return 0;
5055 }
5056
5057 static void free_device_extent_record(struct cache_extent *cache)
5058 {
5059         struct device_extent_record *rec;
5060
5061         rec = container_of(cache, struct device_extent_record, cache);
5062         if (!list_empty(&rec->chunk_list))
5063                 list_del_init(&rec->chunk_list);
5064         if (!list_empty(&rec->device_list))
5065                 list_del_init(&rec->device_list);
5066         free(rec);
5067 }
5068
5069 void free_device_extent_tree(struct device_extent_tree *tree)
5070 {
5071         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5072 }
5073
5074 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5075 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5076                                  struct extent_buffer *leaf, int slot)
5077 {
5078         struct btrfs_extent_ref_v0 *ref0;
5079         struct btrfs_key key;
5080         int ret;
5081
5082         btrfs_item_key_to_cpu(leaf, &key, slot);
5083         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5084         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5085                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5086                                 0, 0);
5087         } else {
5088                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5089                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5090         }
5091         return ret;
5092 }
5093 #endif
5094
5095 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5096                                             struct btrfs_key *key,
5097                                             int slot)
5098 {
5099         struct btrfs_chunk *ptr;
5100         struct chunk_record *rec;
5101         int num_stripes, i;
5102
5103         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5104         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5105
5106         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5107         if (!rec) {
5108                 fprintf(stderr, "memory allocation failed\n");
5109                 exit(-1);
5110         }
5111
5112         INIT_LIST_HEAD(&rec->list);
5113         INIT_LIST_HEAD(&rec->dextents);
5114         rec->bg_rec = NULL;
5115
5116         rec->cache.start = key->offset;
5117         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5118
5119         rec->generation = btrfs_header_generation(leaf);
5120
5121         rec->objectid = key->objectid;
5122         rec->type = key->type;
5123         rec->offset = key->offset;
5124
5125         rec->length = rec->cache.size;
5126         rec->owner = btrfs_chunk_owner(leaf, ptr);
5127         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5128         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5129         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5130         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5131         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5132         rec->num_stripes = num_stripes;
5133         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5134
5135         for (i = 0; i < rec->num_stripes; ++i) {
5136                 rec->stripes[i].devid =
5137                         btrfs_stripe_devid_nr(leaf, ptr, i);
5138                 rec->stripes[i].offset =
5139                         btrfs_stripe_offset_nr(leaf, ptr, i);
5140                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5141                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5142                                 BTRFS_UUID_SIZE);
5143         }
5144
5145         return rec;
5146 }
5147
5148 static int process_chunk_item(struct cache_tree *chunk_cache,
5149                               struct btrfs_key *key, struct extent_buffer *eb,
5150                               int slot)
5151 {
5152         struct chunk_record *rec;
5153         struct btrfs_chunk *chunk;
5154         int ret = 0;
5155
5156         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5157         /*
5158          * Do extra check for this chunk item,
5159          *
5160          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5161          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5162          * and owner<->key_type check.
5163          */
5164         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5165                                       key->offset);
5166         if (ret < 0) {
5167                 error("chunk(%llu, %llu) is not valid, ignore it",
5168                       key->offset, btrfs_chunk_length(eb, chunk));
5169                 return 0;
5170         }
5171         rec = btrfs_new_chunk_record(eb, key, slot);
5172         ret = insert_cache_extent(chunk_cache, &rec->cache);
5173         if (ret) {
5174                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5175                         rec->offset, rec->length);
5176                 free(rec);
5177         }
5178
5179         return ret;
5180 }
5181
5182 static int process_device_item(struct rb_root *dev_cache,
5183                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5184 {
5185         struct btrfs_dev_item *ptr;
5186         struct device_record *rec;
5187         int ret = 0;
5188
5189         ptr = btrfs_item_ptr(eb,
5190                 slot, struct btrfs_dev_item);
5191
5192         rec = malloc(sizeof(*rec));
5193         if (!rec) {
5194                 fprintf(stderr, "memory allocation failed\n");
5195                 return -ENOMEM;
5196         }
5197
5198         rec->devid = key->offset;
5199         rec->generation = btrfs_header_generation(eb);
5200
5201         rec->objectid = key->objectid;
5202         rec->type = key->type;
5203         rec->offset = key->offset;
5204
5205         rec->devid = btrfs_device_id(eb, ptr);
5206         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5207         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5208
5209         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5210         if (ret) {
5211                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5212                 free(rec);
5213         }
5214
5215         return ret;
5216 }
5217
5218 struct block_group_record *
5219 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5220                              int slot)
5221 {
5222         struct btrfs_block_group_item *ptr;
5223         struct block_group_record *rec;
5224
5225         rec = calloc(1, sizeof(*rec));
5226         if (!rec) {
5227                 fprintf(stderr, "memory allocation failed\n");
5228                 exit(-1);
5229         }
5230
5231         rec->cache.start = key->objectid;
5232         rec->cache.size = key->offset;
5233
5234         rec->generation = btrfs_header_generation(leaf);
5235
5236         rec->objectid = key->objectid;
5237         rec->type = key->type;
5238         rec->offset = key->offset;
5239
5240         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5241         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5242
5243         INIT_LIST_HEAD(&rec->list);
5244
5245         return rec;
5246 }
5247
5248 static int process_block_group_item(struct block_group_tree *block_group_cache,
5249                                     struct btrfs_key *key,
5250                                     struct extent_buffer *eb, int slot)
5251 {
5252         struct block_group_record *rec;
5253         int ret = 0;
5254
5255         rec = btrfs_new_block_group_record(eb, key, slot);
5256         ret = insert_block_group_record(block_group_cache, rec);
5257         if (ret) {
5258                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5259                         rec->objectid, rec->offset);
5260                 free(rec);
5261         }
5262
5263         return ret;
5264 }
5265
5266 struct device_extent_record *
5267 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5268                                struct btrfs_key *key, int slot)
5269 {
5270         struct device_extent_record *rec;
5271         struct btrfs_dev_extent *ptr;
5272
5273         rec = calloc(1, sizeof(*rec));
5274         if (!rec) {
5275                 fprintf(stderr, "memory allocation failed\n");
5276                 exit(-1);
5277         }
5278
5279         rec->cache.objectid = key->objectid;
5280         rec->cache.start = key->offset;
5281
5282         rec->generation = btrfs_header_generation(leaf);
5283
5284         rec->objectid = key->objectid;
5285         rec->type = key->type;
5286         rec->offset = key->offset;
5287
5288         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5289         rec->chunk_objecteid =
5290                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5291         rec->chunk_offset =
5292                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5293         rec->length = btrfs_dev_extent_length(leaf, ptr);
5294         rec->cache.size = rec->length;
5295
5296         INIT_LIST_HEAD(&rec->chunk_list);
5297         INIT_LIST_HEAD(&rec->device_list);
5298
5299         return rec;
5300 }
5301
5302 static int
5303 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5304                            struct btrfs_key *key, struct extent_buffer *eb,
5305                            int slot)
5306 {
5307         struct device_extent_record *rec;
5308         int ret;
5309
5310         rec = btrfs_new_device_extent_record(eb, key, slot);
5311         ret = insert_device_extent_record(dev_extent_cache, rec);
5312         if (ret) {
5313                 fprintf(stderr,
5314                         "Device extent[%llu, %llu, %llu] existed.\n",
5315                         rec->objectid, rec->offset, rec->length);
5316                 free(rec);
5317         }
5318
5319         return ret;
5320 }
5321
5322 static int process_extent_item(struct btrfs_root *root,
5323                                struct cache_tree *extent_cache,
5324                                struct extent_buffer *eb, int slot)
5325 {
5326         struct btrfs_extent_item *ei;
5327         struct btrfs_extent_inline_ref *iref;
5328         struct btrfs_extent_data_ref *dref;
5329         struct btrfs_shared_data_ref *sref;
5330         struct btrfs_key key;
5331         struct extent_record tmpl;
5332         unsigned long end;
5333         unsigned long ptr;
5334         int ret;
5335         int type;
5336         u32 item_size = btrfs_item_size_nr(eb, slot);
5337         u64 refs = 0;
5338         u64 offset;
5339         u64 num_bytes;
5340         int metadata = 0;
5341
5342         btrfs_item_key_to_cpu(eb, &key, slot);
5343
5344         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5345                 metadata = 1;
5346                 num_bytes = root->nodesize;
5347         } else {
5348                 num_bytes = key.offset;
5349         }
5350
5351         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5352                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5353                       key.objectid, root->sectorsize);
5354                 return -EIO;
5355         }
5356         if (item_size < sizeof(*ei)) {
5357 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5358                 struct btrfs_extent_item_v0 *ei0;
5359                 BUG_ON(item_size != sizeof(*ei0));
5360                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5361                 refs = btrfs_extent_refs_v0(eb, ei0);
5362 #else
5363                 BUG();
5364 #endif
5365                 memset(&tmpl, 0, sizeof(tmpl));
5366                 tmpl.start = key.objectid;
5367                 tmpl.nr = num_bytes;
5368                 tmpl.extent_item_refs = refs;
5369                 tmpl.metadata = metadata;
5370                 tmpl.found_rec = 1;
5371                 tmpl.max_size = num_bytes;
5372
5373                 return add_extent_rec(extent_cache, &tmpl);
5374         }
5375
5376         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5377         refs = btrfs_extent_refs(eb, ei);
5378         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5379                 metadata = 1;
5380         else
5381                 metadata = 0;
5382         if (metadata && num_bytes != root->nodesize) {
5383                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5384                       num_bytes, root->nodesize);
5385                 return -EIO;
5386         }
5387         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5388                 error("ignore invalid data extent, length %llu is not aligned to %u",
5389                       num_bytes, root->sectorsize);
5390                 return -EIO;
5391         }
5392
5393         memset(&tmpl, 0, sizeof(tmpl));
5394         tmpl.start = key.objectid;
5395         tmpl.nr = num_bytes;
5396         tmpl.extent_item_refs = refs;
5397         tmpl.metadata = metadata;
5398         tmpl.found_rec = 1;
5399         tmpl.max_size = num_bytes;
5400         add_extent_rec(extent_cache, &tmpl);
5401
5402         ptr = (unsigned long)(ei + 1);
5403         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5404             key.type == BTRFS_EXTENT_ITEM_KEY)
5405                 ptr += sizeof(struct btrfs_tree_block_info);
5406
5407         end = (unsigned long)ei + item_size;
5408         while (ptr < end) {
5409                 iref = (struct btrfs_extent_inline_ref *)ptr;
5410                 type = btrfs_extent_inline_ref_type(eb, iref);
5411                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5412                 switch (type) {
5413                 case BTRFS_TREE_BLOCK_REF_KEY:
5414                         ret = add_tree_backref(extent_cache, key.objectid,
5415                                         0, offset, 0);
5416                         if (ret < 0)
5417                                 error("add_tree_backref failed: %s",
5418                                       strerror(-ret));
5419                         break;
5420                 case BTRFS_SHARED_BLOCK_REF_KEY:
5421                         ret = add_tree_backref(extent_cache, key.objectid,
5422                                         offset, 0, 0);
5423                         if (ret < 0)
5424                                 error("add_tree_backref failed: %s",
5425                                       strerror(-ret));
5426                         break;
5427                 case BTRFS_EXTENT_DATA_REF_KEY:
5428                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5429                         add_data_backref(extent_cache, key.objectid, 0,
5430                                         btrfs_extent_data_ref_root(eb, dref),
5431                                         btrfs_extent_data_ref_objectid(eb,
5432                                                                        dref),
5433                                         btrfs_extent_data_ref_offset(eb, dref),
5434                                         btrfs_extent_data_ref_count(eb, dref),
5435                                         0, num_bytes);
5436                         break;
5437                 case BTRFS_SHARED_DATA_REF_KEY:
5438                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5439                         add_data_backref(extent_cache, key.objectid, offset,
5440                                         0, 0, 0,
5441                                         btrfs_shared_data_ref_count(eb, sref),
5442                                         0, num_bytes);
5443                         break;
5444                 default:
5445                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5446                                 key.objectid, key.type, num_bytes);
5447                         goto out;
5448                 }
5449                 ptr += btrfs_extent_inline_ref_size(type);
5450         }
5451         WARN_ON(ptr > end);
5452 out:
5453         return 0;
5454 }
5455
5456 static int check_cache_range(struct btrfs_root *root,
5457                              struct btrfs_block_group_cache *cache,
5458                              u64 offset, u64 bytes)
5459 {
5460         struct btrfs_free_space *entry;
5461         u64 *logical;
5462         u64 bytenr;
5463         int stripe_len;
5464         int i, nr, ret;
5465
5466         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5467                 bytenr = btrfs_sb_offset(i);
5468                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5469                                        cache->key.objectid, bytenr, 0,
5470                                        &logical, &nr, &stripe_len);
5471                 if (ret)
5472                         return ret;
5473
5474                 while (nr--) {
5475                         if (logical[nr] + stripe_len <= offset)
5476                                 continue;
5477                         if (offset + bytes <= logical[nr])
5478                                 continue;
5479                         if (logical[nr] == offset) {
5480                                 if (stripe_len >= bytes) {
5481                                         free(logical);
5482                                         return 0;
5483                                 }
5484                                 bytes -= stripe_len;
5485                                 offset += stripe_len;
5486                         } else if (logical[nr] < offset) {
5487                                 if (logical[nr] + stripe_len >=
5488                                     offset + bytes) {
5489                                         free(logical);
5490                                         return 0;
5491                                 }
5492                                 bytes = (offset + bytes) -
5493                                         (logical[nr] + stripe_len);
5494                                 offset = logical[nr] + stripe_len;
5495                         } else {
5496                                 /*
5497                                  * Could be tricky, the super may land in the
5498                                  * middle of the area we're checking.  First
5499                                  * check the easiest case, it's at the end.
5500                                  */
5501                                 if (logical[nr] + stripe_len >=
5502                                     bytes + offset) {
5503                                         bytes = logical[nr] - offset;
5504                                         continue;
5505                                 }
5506
5507                                 /* Check the left side */
5508                                 ret = check_cache_range(root, cache,
5509                                                         offset,
5510                                                         logical[nr] - offset);
5511                                 if (ret) {
5512                                         free(logical);
5513                                         return ret;
5514                                 }
5515
5516                                 /* Now we continue with the right side */
5517                                 bytes = (offset + bytes) -
5518                                         (logical[nr] + stripe_len);
5519                                 offset = logical[nr] + stripe_len;
5520                         }
5521                 }
5522
5523                 free(logical);
5524         }
5525
5526         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5527         if (!entry) {
5528                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5529                         offset, offset+bytes);
5530                 return -EINVAL;
5531         }
5532
5533         if (entry->offset != offset) {
5534                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5535                         entry->offset);
5536                 return -EINVAL;
5537         }
5538
5539         if (entry->bytes != bytes) {
5540                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5541                         bytes, entry->bytes, offset);
5542                 return -EINVAL;
5543         }
5544
5545         unlink_free_space(cache->free_space_ctl, entry);
5546         free(entry);
5547         return 0;
5548 }
5549
5550 static int verify_space_cache(struct btrfs_root *root,
5551                               struct btrfs_block_group_cache *cache)
5552 {
5553         struct btrfs_path *path;
5554         struct extent_buffer *leaf;
5555         struct btrfs_key key;
5556         u64 last;
5557         int ret = 0;
5558
5559         path = btrfs_alloc_path();
5560         if (!path)
5561                 return -ENOMEM;
5562
5563         root = root->fs_info->extent_root;
5564
5565         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5566
5567         key.objectid = last;
5568         key.offset = 0;
5569         key.type = BTRFS_EXTENT_ITEM_KEY;
5570
5571         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5572         if (ret < 0)
5573                 goto out;
5574         ret = 0;
5575         while (1) {
5576                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5577                         ret = btrfs_next_leaf(root, path);
5578                         if (ret < 0)
5579                                 goto out;
5580                         if (ret > 0) {
5581                                 ret = 0;
5582                                 break;
5583                         }
5584                 }
5585                 leaf = path->nodes[0];
5586                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5587                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5588                         break;
5589                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5590                     key.type != BTRFS_METADATA_ITEM_KEY) {
5591                         path->slots[0]++;
5592                         continue;
5593                 }
5594
5595                 if (last == key.objectid) {
5596                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5597                                 last = key.objectid + key.offset;
5598                         else
5599                                 last = key.objectid + root->nodesize;
5600                         path->slots[0]++;
5601                         continue;
5602                 }
5603
5604                 ret = check_cache_range(root, cache, last,
5605                                         key.objectid - last);
5606                 if (ret)
5607                         break;
5608                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5609                         last = key.objectid + key.offset;
5610                 else
5611                         last = key.objectid + root->nodesize;
5612                 path->slots[0]++;
5613         }
5614
5615         if (last < cache->key.objectid + cache->key.offset)
5616                 ret = check_cache_range(root, cache, last,
5617                                         cache->key.objectid +
5618                                         cache->key.offset - last);
5619
5620 out:
5621         btrfs_free_path(path);
5622
5623         if (!ret &&
5624             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5625                 fprintf(stderr, "There are still entries left in the space "
5626                         "cache\n");
5627                 ret = -EINVAL;
5628         }
5629
5630         return ret;
5631 }
5632
5633 static int check_space_cache(struct btrfs_root *root)
5634 {
5635         struct btrfs_block_group_cache *cache;
5636         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5637         int ret;
5638         int error = 0;
5639
5640         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5641             btrfs_super_generation(root->fs_info->super_copy) !=
5642             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5643                 printf("cache and super generation don't match, space cache "
5644                        "will be invalidated\n");
5645                 return 0;
5646         }
5647
5648         if (ctx.progress_enabled) {
5649                 ctx.tp = TASK_FREE_SPACE;
5650                 task_start(ctx.info);
5651         }
5652
5653         while (1) {
5654                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5655                 if (!cache)
5656                         break;
5657
5658                 start = cache->key.objectid + cache->key.offset;
5659                 if (!cache->free_space_ctl) {
5660                         if (btrfs_init_free_space_ctl(cache,
5661                                                       root->sectorsize)) {
5662                                 ret = -ENOMEM;
5663                                 break;
5664                         }
5665                 } else {
5666                         btrfs_remove_free_space_cache(cache);
5667                 }
5668
5669                 if (btrfs_fs_compat_ro(root->fs_info,
5670                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5671                         ret = exclude_super_stripes(root, cache);
5672                         if (ret) {
5673                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5674                                         strerror(-ret));
5675                                 error++;
5676                                 continue;
5677                         }
5678                         ret = load_free_space_tree(root->fs_info, cache);
5679                         free_excluded_extents(root, cache);
5680                         if (ret < 0) {
5681                                 fprintf(stderr, "could not load free space tree: %s\n",
5682                                         strerror(-ret));
5683                                 error++;
5684                                 continue;
5685                         }
5686                         error += ret;
5687                 } else {
5688                         ret = load_free_space_cache(root->fs_info, cache);
5689                         if (!ret)
5690                                 continue;
5691                 }
5692
5693                 ret = verify_space_cache(root, cache);
5694                 if (ret) {
5695                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5696                                 cache->key.objectid);
5697                         error++;
5698                 }
5699         }
5700
5701         task_stop(ctx.info);
5702
5703         return error ? -EINVAL : 0;
5704 }
5705
5706 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5707                         u64 num_bytes, unsigned long leaf_offset,
5708                         struct extent_buffer *eb) {
5709
5710         u64 offset = 0;
5711         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5712         char *data;
5713         unsigned long csum_offset;
5714         u32 csum;
5715         u32 csum_expected;
5716         u64 read_len;
5717         u64 data_checked = 0;
5718         u64 tmp;
5719         int ret = 0;
5720         int mirror;
5721         int num_copies;
5722
5723         if (num_bytes % root->sectorsize)
5724                 return -EINVAL;
5725
5726         data = malloc(num_bytes);
5727         if (!data)
5728                 return -ENOMEM;
5729
5730         while (offset < num_bytes) {
5731                 mirror = 0;
5732 again:
5733                 read_len = num_bytes - offset;
5734                 /* read as much space once a time */
5735                 ret = read_extent_data(root, data + offset,
5736                                 bytenr + offset, &read_len, mirror);
5737                 if (ret)
5738                         goto out;
5739                 data_checked = 0;
5740                 /* verify every 4k data's checksum */
5741                 while (data_checked < read_len) {
5742                         csum = ~(u32)0;
5743                         tmp = offset + data_checked;
5744
5745                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5746                                                csum, root->sectorsize);
5747                         btrfs_csum_final(csum, (u8 *)&csum);
5748
5749                         csum_offset = leaf_offset +
5750                                  tmp / root->sectorsize * csum_size;
5751                         read_extent_buffer(eb, (char *)&csum_expected,
5752                                            csum_offset, csum_size);
5753                         /* try another mirror */
5754                         if (csum != csum_expected) {
5755                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5756                                                 mirror, bytenr + tmp,
5757                                                 csum, csum_expected);
5758                                 num_copies = btrfs_num_copies(
5759                                                 &root->fs_info->mapping_tree,
5760                                                 bytenr, num_bytes);
5761                                 if (mirror < num_copies - 1) {
5762                                         mirror += 1;
5763                                         goto again;
5764                                 }
5765                         }
5766                         data_checked += root->sectorsize;
5767                 }
5768                 offset += read_len;
5769         }
5770 out:
5771         free(data);
5772         return ret;
5773 }
5774
5775 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5776                                u64 num_bytes)
5777 {
5778         struct btrfs_path *path;
5779         struct extent_buffer *leaf;
5780         struct btrfs_key key;
5781         int ret;
5782
5783         path = btrfs_alloc_path();
5784         if (!path) {
5785                 fprintf(stderr, "Error allocating path\n");
5786                 return -ENOMEM;
5787         }
5788
5789         key.objectid = bytenr;
5790         key.type = BTRFS_EXTENT_ITEM_KEY;
5791         key.offset = (u64)-1;
5792
5793 again:
5794         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5795                                 0, 0);
5796         if (ret < 0) {
5797                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5798                 btrfs_free_path(path);
5799                 return ret;
5800         } else if (ret) {
5801                 if (path->slots[0] > 0) {
5802                         path->slots[0]--;
5803                 } else {
5804                         ret = btrfs_prev_leaf(root, path);
5805                         if (ret < 0) {
5806                                 goto out;
5807                         } else if (ret > 0) {
5808                                 ret = 0;
5809                                 goto out;
5810                         }
5811                 }
5812         }
5813
5814         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5815
5816         /*
5817          * Block group items come before extent items if they have the same
5818          * bytenr, so walk back one more just in case.  Dear future traveller,
5819          * first congrats on mastering time travel.  Now if it's not too much
5820          * trouble could you go back to 2006 and tell Chris to make the
5821          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5822          * EXTENT_ITEM_KEY please?
5823          */
5824         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5825                 if (path->slots[0] > 0) {
5826                         path->slots[0]--;
5827                 } else {
5828                         ret = btrfs_prev_leaf(root, path);
5829                         if (ret < 0) {
5830                                 goto out;
5831                         } else if (ret > 0) {
5832                                 ret = 0;
5833                                 goto out;
5834                         }
5835                 }
5836                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5837         }
5838
5839         while (num_bytes) {
5840                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5841                         ret = btrfs_next_leaf(root, path);
5842                         if (ret < 0) {
5843                                 fprintf(stderr, "Error going to next leaf "
5844                                         "%d\n", ret);
5845                                 btrfs_free_path(path);
5846                                 return ret;
5847                         } else if (ret) {
5848                                 break;
5849                         }
5850                 }
5851                 leaf = path->nodes[0];
5852                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5853                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5854                         path->slots[0]++;
5855                         continue;
5856                 }
5857                 if (key.objectid + key.offset < bytenr) {
5858                         path->slots[0]++;
5859                         continue;
5860                 }
5861                 if (key.objectid > bytenr + num_bytes)
5862                         break;
5863
5864                 if (key.objectid == bytenr) {
5865                         if (key.offset >= num_bytes) {
5866                                 num_bytes = 0;
5867                                 break;
5868                         }
5869                         num_bytes -= key.offset;
5870                         bytenr += key.offset;
5871                 } else if (key.objectid < bytenr) {
5872                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5873                                 num_bytes = 0;
5874                                 break;
5875                         }
5876                         num_bytes = (bytenr + num_bytes) -
5877                                 (key.objectid + key.offset);
5878                         bytenr = key.objectid + key.offset;
5879                 } else {
5880                         if (key.objectid + key.offset < bytenr + num_bytes) {
5881                                 u64 new_start = key.objectid + key.offset;
5882                                 u64 new_bytes = bytenr + num_bytes - new_start;
5883
5884                                 /*
5885                                  * Weird case, the extent is in the middle of
5886                                  * our range, we'll have to search one side
5887                                  * and then the other.  Not sure if this happens
5888                                  * in real life, but no harm in coding it up
5889                                  * anyway just in case.
5890                                  */
5891                                 btrfs_release_path(path);
5892                                 ret = check_extent_exists(root, new_start,
5893                                                           new_bytes);
5894                                 if (ret) {
5895                                         fprintf(stderr, "Right section didn't "
5896                                                 "have a record\n");
5897                                         break;
5898                                 }
5899                                 num_bytes = key.objectid - bytenr;
5900                                 goto again;
5901                         }
5902                         num_bytes = key.objectid - bytenr;
5903                 }
5904                 path->slots[0]++;
5905         }
5906         ret = 0;
5907
5908 out:
5909         if (num_bytes && !ret) {
5910                 fprintf(stderr, "There are no extents for csum range "
5911                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5912                 ret = 1;
5913         }
5914
5915         btrfs_free_path(path);
5916         return ret;
5917 }
5918
5919 static int check_csums(struct btrfs_root *root)
5920 {
5921         struct btrfs_path *path;
5922         struct extent_buffer *leaf;
5923         struct btrfs_key key;
5924         u64 offset = 0, num_bytes = 0;
5925         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5926         int errors = 0;
5927         int ret;
5928         u64 data_len;
5929         unsigned long leaf_offset;
5930
5931         root = root->fs_info->csum_root;
5932         if (!extent_buffer_uptodate(root->node)) {
5933                 fprintf(stderr, "No valid csum tree found\n");
5934                 return -ENOENT;
5935         }
5936
5937         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5938         key.type = BTRFS_EXTENT_CSUM_KEY;
5939         key.offset = 0;
5940
5941         path = btrfs_alloc_path();
5942         if (!path)
5943                 return -ENOMEM;
5944
5945         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5946         if (ret < 0) {
5947                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5948                 btrfs_free_path(path);
5949                 return ret;
5950         }
5951
5952         if (ret > 0 && path->slots[0])
5953                 path->slots[0]--;
5954         ret = 0;
5955
5956         while (1) {
5957                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5958                         ret = btrfs_next_leaf(root, path);
5959                         if (ret < 0) {
5960                                 fprintf(stderr, "Error going to next leaf "
5961                                         "%d\n", ret);
5962                                 break;
5963                         }
5964                         if (ret)
5965                                 break;
5966                 }
5967                 leaf = path->nodes[0];
5968
5969                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5970                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5971                         path->slots[0]++;
5972                         continue;
5973                 }
5974
5975                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5976                               csum_size) * root->sectorsize;
5977                 if (!check_data_csum)
5978                         goto skip_csum_check;
5979                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5980                 ret = check_extent_csums(root, key.offset, data_len,
5981                                          leaf_offset, leaf);
5982                 if (ret)
5983                         break;
5984 skip_csum_check:
5985                 if (!num_bytes) {
5986                         offset = key.offset;
5987                 } else if (key.offset != offset + num_bytes) {
5988                         ret = check_extent_exists(root, offset, num_bytes);
5989                         if (ret) {
5990                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5991                                         "there is no extent record\n",
5992                                         offset, offset+num_bytes);
5993                                 errors++;
5994                         }
5995                         offset = key.offset;
5996                         num_bytes = 0;
5997                 }
5998                 num_bytes += data_len;
5999                 path->slots[0]++;
6000         }
6001
6002         btrfs_free_path(path);
6003         return errors;
6004 }
6005
6006 static int is_dropped_key(struct btrfs_key *key,
6007                           struct btrfs_key *drop_key) {
6008         if (key->objectid < drop_key->objectid)
6009                 return 1;
6010         else if (key->objectid == drop_key->objectid) {
6011                 if (key->type < drop_key->type)
6012                         return 1;
6013                 else if (key->type == drop_key->type) {
6014                         if (key->offset < drop_key->offset)
6015                                 return 1;
6016                 }
6017         }
6018         return 0;
6019 }
6020
6021 /*
6022  * Here are the rules for FULL_BACKREF.
6023  *
6024  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6025  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6026  *      FULL_BACKREF set.
6027  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6028  *    if it happened after the relocation occurred since we'll have dropped the
6029  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6030  *    have no real way to know for sure.
6031  *
6032  * We process the blocks one root at a time, and we start from the lowest root
6033  * objectid and go to the highest.  So we can just lookup the owner backref for
6034  * the record and if we don't find it then we know it doesn't exist and we have
6035  * a FULL BACKREF.
6036  *
6037  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6038  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6039  * be set or not and then we can check later once we've gathered all the refs.
6040  */
6041 static int calc_extent_flag(struct btrfs_root *root,
6042                            struct cache_tree *extent_cache,
6043                            struct extent_buffer *buf,
6044                            struct root_item_record *ri,
6045                            u64 *flags)
6046 {
6047         struct extent_record *rec;
6048         struct cache_extent *cache;
6049         struct tree_backref *tback;
6050         u64 owner = 0;
6051
6052         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6053         /* we have added this extent before */
6054         if (!cache)
6055                 return -ENOENT;
6056
6057         rec = container_of(cache, struct extent_record, cache);
6058
6059         /*
6060          * Except file/reloc tree, we can not have
6061          * FULL BACKREF MODE
6062          */
6063         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6064                 goto normal;
6065         /*
6066          * root node
6067          */
6068         if (buf->start == ri->bytenr)
6069                 goto normal;
6070
6071         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6072                 goto full_backref;
6073
6074         owner = btrfs_header_owner(buf);
6075         if (owner == ri->objectid)
6076                 goto normal;
6077
6078         tback = find_tree_backref(rec, 0, owner);
6079         if (!tback)
6080                 goto full_backref;
6081 normal:
6082         *flags = 0;
6083         if (rec->flag_block_full_backref != FLAG_UNSET &&
6084             rec->flag_block_full_backref != 0)
6085                 rec->bad_full_backref = 1;
6086         return 0;
6087 full_backref:
6088         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6089         if (rec->flag_block_full_backref != FLAG_UNSET &&
6090             rec->flag_block_full_backref != 1)
6091                 rec->bad_full_backref = 1;
6092         return 0;
6093 }
6094
6095 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6096 {
6097         fprintf(stderr, "Invalid key type(");
6098         print_key_type(stderr, 0, key_type);
6099         fprintf(stderr, ") found in root(");
6100         print_objectid(stderr, rootid, 0);
6101         fprintf(stderr, ")\n");
6102 }
6103
6104 /*
6105  * Check if the key is valid with its extent buffer.
6106  *
6107  * This is a early check in case invalid key exists in a extent buffer
6108  * This is not comprehensive yet, but should prevent wrong key/item passed
6109  * further
6110  */
6111 static int check_type_with_root(u64 rootid, u8 key_type)
6112 {
6113         switch (key_type) {
6114         /* Only valid in chunk tree */
6115         case BTRFS_DEV_ITEM_KEY:
6116         case BTRFS_CHUNK_ITEM_KEY:
6117                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6118                         goto err;
6119                 break;
6120         /* valid in csum and log tree */
6121         case BTRFS_CSUM_TREE_OBJECTID:
6122                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6123                       is_fstree(rootid)))
6124                         goto err;
6125                 break;
6126         case BTRFS_EXTENT_ITEM_KEY:
6127         case BTRFS_METADATA_ITEM_KEY:
6128         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6129                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6130                         goto err;
6131                 break;
6132         case BTRFS_ROOT_ITEM_KEY:
6133                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6134                         goto err;
6135                 break;
6136         case BTRFS_DEV_EXTENT_KEY:
6137                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6138                         goto err;
6139                 break;
6140         }
6141         return 0;
6142 err:
6143         report_mismatch_key_root(key_type, rootid);
6144         return -EINVAL;
6145 }
6146
6147 static int run_next_block(struct btrfs_root *root,
6148                           struct block_info *bits,
6149                           int bits_nr,
6150                           u64 *last,
6151                           struct cache_tree *pending,
6152                           struct cache_tree *seen,
6153                           struct cache_tree *reada,
6154                           struct cache_tree *nodes,
6155                           struct cache_tree *extent_cache,
6156                           struct cache_tree *chunk_cache,
6157                           struct rb_root *dev_cache,
6158                           struct block_group_tree *block_group_cache,
6159                           struct device_extent_tree *dev_extent_cache,
6160                           struct root_item_record *ri)
6161 {
6162         struct extent_buffer *buf;
6163         struct extent_record *rec = NULL;
6164         u64 bytenr;
6165         u32 size;
6166         u64 parent;
6167         u64 owner;
6168         u64 flags;
6169         u64 ptr;
6170         u64 gen = 0;
6171         int ret = 0;
6172         int i;
6173         int nritems;
6174         struct btrfs_key key;
6175         struct cache_extent *cache;
6176         int reada_bits;
6177
6178         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6179                                     bits_nr, &reada_bits);
6180         if (nritems == 0)
6181                 return 1;
6182
6183         if (!reada_bits) {
6184                 for(i = 0; i < nritems; i++) {
6185                         ret = add_cache_extent(reada, bits[i].start,
6186                                                bits[i].size);
6187                         if (ret == -EEXIST)
6188                                 continue;
6189
6190                         /* fixme, get the parent transid */
6191                         readahead_tree_block(root, bits[i].start,
6192                                              bits[i].size, 0);
6193                 }
6194         }
6195         *last = bits[0].start;
6196         bytenr = bits[0].start;
6197         size = bits[0].size;
6198
6199         cache = lookup_cache_extent(pending, bytenr, size);
6200         if (cache) {
6201                 remove_cache_extent(pending, cache);
6202                 free(cache);
6203         }
6204         cache = lookup_cache_extent(reada, bytenr, size);
6205         if (cache) {
6206                 remove_cache_extent(reada, cache);
6207                 free(cache);
6208         }
6209         cache = lookup_cache_extent(nodes, bytenr, size);
6210         if (cache) {
6211                 remove_cache_extent(nodes, cache);
6212                 free(cache);
6213         }
6214         cache = lookup_cache_extent(extent_cache, bytenr, size);
6215         if (cache) {
6216                 rec = container_of(cache, struct extent_record, cache);
6217                 gen = rec->parent_generation;
6218         }
6219
6220         /* fixme, get the real parent transid */
6221         buf = read_tree_block(root, bytenr, size, gen);
6222         if (!extent_buffer_uptodate(buf)) {
6223                 record_bad_block_io(root->fs_info,
6224                                     extent_cache, bytenr, size);
6225                 goto out;
6226         }
6227
6228         nritems = btrfs_header_nritems(buf);
6229
6230         flags = 0;
6231         if (!init_extent_tree) {
6232                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6233                                        btrfs_header_level(buf), 1, NULL,
6234                                        &flags);
6235                 if (ret < 0) {
6236                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6237                         if (ret < 0) {
6238                                 fprintf(stderr, "Couldn't calc extent flags\n");
6239                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6240                         }
6241                 }
6242         } else {
6243                 flags = 0;
6244                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6245                 if (ret < 0) {
6246                         fprintf(stderr, "Couldn't calc extent flags\n");
6247                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6248                 }
6249         }
6250
6251         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6252                 if (ri != NULL &&
6253                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6254                     ri->objectid == btrfs_header_owner(buf)) {
6255                         /*
6256                          * Ok we got to this block from it's original owner and
6257                          * we have FULL_BACKREF set.  Relocation can leave
6258                          * converted blocks over so this is altogether possible,
6259                          * however it's not possible if the generation > the
6260                          * last snapshot, so check for this case.
6261                          */
6262                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6263                             btrfs_header_generation(buf) > ri->last_snapshot) {
6264                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6265                                 rec->bad_full_backref = 1;
6266                         }
6267                 }
6268         } else {
6269                 if (ri != NULL &&
6270                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6271                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6272                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6273                         rec->bad_full_backref = 1;
6274                 }
6275         }
6276
6277         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6278                 rec->flag_block_full_backref = 1;
6279                 parent = bytenr;
6280                 owner = 0;
6281         } else {
6282                 rec->flag_block_full_backref = 0;
6283                 parent = 0;
6284                 owner = btrfs_header_owner(buf);
6285         }
6286
6287         ret = check_block(root, extent_cache, buf, flags);
6288         if (ret)
6289                 goto out;
6290
6291         if (btrfs_is_leaf(buf)) {
6292                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6293                 for (i = 0; i < nritems; i++) {
6294                         struct btrfs_file_extent_item *fi;
6295                         btrfs_item_key_to_cpu(buf, &key, i);
6296                         /*
6297                          * Check key type against the leaf owner.
6298                          * Could filter quite a lot of early error if
6299                          * owner is correct
6300                          */
6301                         if (check_type_with_root(btrfs_header_owner(buf),
6302                                                  key.type)) {
6303                                 fprintf(stderr, "ignoring invalid key\n");
6304                                 continue;
6305                         }
6306                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6307                                 process_extent_item(root, extent_cache, buf,
6308                                                     i);
6309                                 continue;
6310                         }
6311                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6312                                 process_extent_item(root, extent_cache, buf,
6313                                                     i);
6314                                 continue;
6315                         }
6316                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6317                                 total_csum_bytes +=
6318                                         btrfs_item_size_nr(buf, i);
6319                                 continue;
6320                         }
6321                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6322                                 process_chunk_item(chunk_cache, &key, buf, i);
6323                                 continue;
6324                         }
6325                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6326                                 process_device_item(dev_cache, &key, buf, i);
6327                                 continue;
6328                         }
6329                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6330                                 process_block_group_item(block_group_cache,
6331                                         &key, buf, i);
6332                                 continue;
6333                         }
6334                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6335                                 process_device_extent_item(dev_extent_cache,
6336                                         &key, buf, i);
6337                                 continue;
6338
6339                         }
6340                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6341 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6342                                 process_extent_ref_v0(extent_cache, buf, i);
6343 #else
6344                                 BUG();
6345 #endif
6346                                 continue;
6347                         }
6348
6349                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6350                                 ret = add_tree_backref(extent_cache,
6351                                                 key.objectid, 0, key.offset, 0);
6352                                 if (ret < 0)
6353                                         error("add_tree_backref failed: %s",
6354                                               strerror(-ret));
6355                                 continue;
6356                         }
6357                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6358                                 ret = add_tree_backref(extent_cache,
6359                                                 key.objectid, key.offset, 0, 0);
6360                                 if (ret < 0)
6361                                         error("add_tree_backref failed: %s",
6362                                               strerror(-ret));
6363                                 continue;
6364                         }
6365                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6366                                 struct btrfs_extent_data_ref *ref;
6367                                 ref = btrfs_item_ptr(buf, i,
6368                                                 struct btrfs_extent_data_ref);
6369                                 add_data_backref(extent_cache,
6370                                         key.objectid, 0,
6371                                         btrfs_extent_data_ref_root(buf, ref),
6372                                         btrfs_extent_data_ref_objectid(buf,
6373                                                                        ref),
6374                                         btrfs_extent_data_ref_offset(buf, ref),
6375                                         btrfs_extent_data_ref_count(buf, ref),
6376                                         0, root->sectorsize);
6377                                 continue;
6378                         }
6379                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6380                                 struct btrfs_shared_data_ref *ref;
6381                                 ref = btrfs_item_ptr(buf, i,
6382                                                 struct btrfs_shared_data_ref);
6383                                 add_data_backref(extent_cache,
6384                                         key.objectid, key.offset, 0, 0, 0,
6385                                         btrfs_shared_data_ref_count(buf, ref),
6386                                         0, root->sectorsize);
6387                                 continue;
6388                         }
6389                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6390                                 struct bad_item *bad;
6391
6392                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6393                                         continue;
6394                                 if (!owner)
6395                                         continue;
6396                                 bad = malloc(sizeof(struct bad_item));
6397                                 if (!bad)
6398                                         continue;
6399                                 INIT_LIST_HEAD(&bad->list);
6400                                 memcpy(&bad->key, &key,
6401                                        sizeof(struct btrfs_key));
6402                                 bad->root_id = owner;
6403                                 list_add_tail(&bad->list, &delete_items);
6404                                 continue;
6405                         }
6406                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6407                                 continue;
6408                         fi = btrfs_item_ptr(buf, i,
6409                                             struct btrfs_file_extent_item);
6410                         if (btrfs_file_extent_type(buf, fi) ==
6411                             BTRFS_FILE_EXTENT_INLINE)
6412                                 continue;
6413                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6414                                 continue;
6415
6416                         data_bytes_allocated +=
6417                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6418                         if (data_bytes_allocated < root->sectorsize) {
6419                                 abort();
6420                         }
6421                         data_bytes_referenced +=
6422                                 btrfs_file_extent_num_bytes(buf, fi);
6423                         add_data_backref(extent_cache,
6424                                 btrfs_file_extent_disk_bytenr(buf, fi),
6425                                 parent, owner, key.objectid, key.offset -
6426                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6427                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6428                 }
6429         } else {
6430                 int level;
6431                 struct btrfs_key first_key;
6432
6433                 first_key.objectid = 0;
6434
6435                 if (nritems > 0)
6436                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6437                 level = btrfs_header_level(buf);
6438                 for (i = 0; i < nritems; i++) {
6439                         struct extent_record tmpl;
6440
6441                         ptr = btrfs_node_blockptr(buf, i);
6442                         size = root->nodesize;
6443                         btrfs_node_key_to_cpu(buf, &key, i);
6444                         if (ri != NULL) {
6445                                 if ((level == ri->drop_level)
6446                                     && is_dropped_key(&key, &ri->drop_key)) {
6447                                         continue;
6448                                 }
6449                         }
6450
6451                         memset(&tmpl, 0, sizeof(tmpl));
6452                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6453                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6454                         tmpl.start = ptr;
6455                         tmpl.nr = size;
6456                         tmpl.refs = 1;
6457                         tmpl.metadata = 1;
6458                         tmpl.max_size = size;
6459                         ret = add_extent_rec(extent_cache, &tmpl);
6460                         if (ret < 0)
6461                                 goto out;
6462
6463                         ret = add_tree_backref(extent_cache, ptr, parent,
6464                                         owner, 1);
6465                         if (ret < 0) {
6466                                 error("add_tree_backref failed: %s",
6467                                       strerror(-ret));
6468                                 continue;
6469                         }
6470
6471                         if (level > 1) {
6472                                 add_pending(nodes, seen, ptr, size);
6473                         } else {
6474                                 add_pending(pending, seen, ptr, size);
6475                         }
6476                 }
6477                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6478                                       nritems) * sizeof(struct btrfs_key_ptr);
6479         }
6480         total_btree_bytes += buf->len;
6481         if (fs_root_objectid(btrfs_header_owner(buf)))
6482                 total_fs_tree_bytes += buf->len;
6483         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6484                 total_extent_tree_bytes += buf->len;
6485         if (!found_old_backref &&
6486             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6487             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6488             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6489                 found_old_backref = 1;
6490 out:
6491         free_extent_buffer(buf);
6492         return ret;
6493 }
6494
6495 static int add_root_to_pending(struct extent_buffer *buf,
6496                                struct cache_tree *extent_cache,
6497                                struct cache_tree *pending,
6498                                struct cache_tree *seen,
6499                                struct cache_tree *nodes,
6500                                u64 objectid)
6501 {
6502         struct extent_record tmpl;
6503         int ret;
6504
6505         if (btrfs_header_level(buf) > 0)
6506                 add_pending(nodes, seen, buf->start, buf->len);
6507         else
6508                 add_pending(pending, seen, buf->start, buf->len);
6509
6510         memset(&tmpl, 0, sizeof(tmpl));
6511         tmpl.start = buf->start;
6512         tmpl.nr = buf->len;
6513         tmpl.is_root = 1;
6514         tmpl.refs = 1;
6515         tmpl.metadata = 1;
6516         tmpl.max_size = buf->len;
6517         add_extent_rec(extent_cache, &tmpl);
6518
6519         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6520             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6521                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6522                                 0, 1);
6523         else
6524                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6525                                 1);
6526         return ret;
6527 }
6528
6529 /* as we fix the tree, we might be deleting blocks that
6530  * we're tracking for repair.  This hook makes sure we
6531  * remove any backrefs for blocks as we are fixing them.
6532  */
6533 static int free_extent_hook(struct btrfs_trans_handle *trans,
6534                             struct btrfs_root *root,
6535                             u64 bytenr, u64 num_bytes, u64 parent,
6536                             u64 root_objectid, u64 owner, u64 offset,
6537                             int refs_to_drop)
6538 {
6539         struct extent_record *rec;
6540         struct cache_extent *cache;
6541         int is_data;
6542         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6543
6544         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6545         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6546         if (!cache)
6547                 return 0;
6548
6549         rec = container_of(cache, struct extent_record, cache);
6550         if (is_data) {
6551                 struct data_backref *back;
6552                 back = find_data_backref(rec, parent, root_objectid, owner,
6553                                          offset, 1, bytenr, num_bytes);
6554                 if (!back)
6555                         goto out;
6556                 if (back->node.found_ref) {
6557                         back->found_ref -= refs_to_drop;
6558                         if (rec->refs)
6559                                 rec->refs -= refs_to_drop;
6560                 }
6561                 if (back->node.found_extent_tree) {
6562                         back->num_refs -= refs_to_drop;
6563                         if (rec->extent_item_refs)
6564                                 rec->extent_item_refs -= refs_to_drop;
6565                 }
6566                 if (back->found_ref == 0)
6567                         back->node.found_ref = 0;
6568                 if (back->num_refs == 0)
6569                         back->node.found_extent_tree = 0;
6570
6571                 if (!back->node.found_extent_tree && back->node.found_ref) {
6572                         list_del(&back->node.list);
6573                         free(back);
6574                 }
6575         } else {
6576                 struct tree_backref *back;
6577                 back = find_tree_backref(rec, parent, root_objectid);
6578                 if (!back)
6579                         goto out;
6580                 if (back->node.found_ref) {
6581                         if (rec->refs)
6582                                 rec->refs--;
6583                         back->node.found_ref = 0;
6584                 }
6585                 if (back->node.found_extent_tree) {
6586                         if (rec->extent_item_refs)
6587                                 rec->extent_item_refs--;
6588                         back->node.found_extent_tree = 0;
6589                 }
6590                 if (!back->node.found_extent_tree && back->node.found_ref) {
6591                         list_del(&back->node.list);
6592                         free(back);
6593                 }
6594         }
6595         maybe_free_extent_rec(extent_cache, rec);
6596 out:
6597         return 0;
6598 }
6599
6600 static int delete_extent_records(struct btrfs_trans_handle *trans,
6601                                  struct btrfs_root *root,
6602                                  struct btrfs_path *path,
6603                                  u64 bytenr, u64 new_len)
6604 {
6605         struct btrfs_key key;
6606         struct btrfs_key found_key;
6607         struct extent_buffer *leaf;
6608         int ret;
6609         int slot;
6610
6611
6612         key.objectid = bytenr;
6613         key.type = (u8)-1;
6614         key.offset = (u64)-1;
6615
6616         while(1) {
6617                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6618                                         &key, path, 0, 1);
6619                 if (ret < 0)
6620                         break;
6621
6622                 if (ret > 0) {
6623                         ret = 0;
6624                         if (path->slots[0] == 0)
6625                                 break;
6626                         path->slots[0]--;
6627                 }
6628                 ret = 0;
6629
6630                 leaf = path->nodes[0];
6631                 slot = path->slots[0];
6632
6633                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6634                 if (found_key.objectid != bytenr)
6635                         break;
6636
6637                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6638                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6639                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6640                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6641                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6642                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6643                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6644                         btrfs_release_path(path);
6645                         if (found_key.type == 0) {
6646                                 if (found_key.offset == 0)
6647                                         break;
6648                                 key.offset = found_key.offset - 1;
6649                                 key.type = found_key.type;
6650                         }
6651                         key.type = found_key.type - 1;
6652                         key.offset = (u64)-1;
6653                         continue;
6654                 }
6655
6656                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6657                         found_key.objectid, found_key.type, found_key.offset);
6658
6659                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6660                 if (ret)
6661                         break;
6662                 btrfs_release_path(path);
6663
6664                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6665                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6666                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6667                                 found_key.offset : root->nodesize;
6668
6669                         ret = btrfs_update_block_group(trans, root, bytenr,
6670                                                        bytes, 0, 0);
6671                         if (ret)
6672                                 break;
6673                 }
6674         }
6675
6676         btrfs_release_path(path);
6677         return ret;
6678 }
6679
6680 /*
6681  * for a single backref, this will allocate a new extent
6682  * and add the backref to it.
6683  */
6684 static int record_extent(struct btrfs_trans_handle *trans,
6685                          struct btrfs_fs_info *info,
6686                          struct btrfs_path *path,
6687                          struct extent_record *rec,
6688                          struct extent_backref *back,
6689                          int allocated, u64 flags)
6690 {
6691         int ret;
6692         struct btrfs_root *extent_root = info->extent_root;
6693         struct extent_buffer *leaf;
6694         struct btrfs_key ins_key;
6695         struct btrfs_extent_item *ei;
6696         struct tree_backref *tback;
6697         struct data_backref *dback;
6698         struct btrfs_tree_block_info *bi;
6699
6700         if (!back->is_data)
6701                 rec->max_size = max_t(u64, rec->max_size,
6702                                     info->extent_root->nodesize);
6703
6704         if (!allocated) {
6705                 u32 item_size = sizeof(*ei);
6706
6707                 if (!back->is_data)
6708                         item_size += sizeof(*bi);
6709
6710                 ins_key.objectid = rec->start;
6711                 ins_key.offset = rec->max_size;
6712                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6713
6714                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6715                                         &ins_key, item_size);
6716                 if (ret)
6717                         goto fail;
6718
6719                 leaf = path->nodes[0];
6720                 ei = btrfs_item_ptr(leaf, path->slots[0],
6721                                     struct btrfs_extent_item);
6722
6723                 btrfs_set_extent_refs(leaf, ei, 0);
6724                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6725
6726                 if (back->is_data) {
6727                         btrfs_set_extent_flags(leaf, ei,
6728                                                BTRFS_EXTENT_FLAG_DATA);
6729                 } else {
6730                         struct btrfs_disk_key copy_key;;
6731
6732                         tback = to_tree_backref(back);
6733                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6734                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6735                                              sizeof(*bi));
6736
6737                         btrfs_set_disk_key_objectid(&copy_key,
6738                                                     rec->info_objectid);
6739                         btrfs_set_disk_key_type(&copy_key, 0);
6740                         btrfs_set_disk_key_offset(&copy_key, 0);
6741
6742                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6743                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6744
6745                         btrfs_set_extent_flags(leaf, ei,
6746                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6747                 }
6748
6749                 btrfs_mark_buffer_dirty(leaf);
6750                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6751                                                rec->max_size, 1, 0);
6752                 if (ret)
6753                         goto fail;
6754                 btrfs_release_path(path);
6755         }
6756
6757         if (back->is_data) {
6758                 u64 parent;
6759                 int i;
6760
6761                 dback = to_data_backref(back);
6762                 if (back->full_backref)
6763                         parent = dback->parent;
6764                 else
6765                         parent = 0;
6766
6767                 for (i = 0; i < dback->found_ref; i++) {
6768                         /* if parent != 0, we're doing a full backref
6769                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6770                          * just makes the backref allocator create a data
6771                          * backref
6772                          */
6773                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6774                                                    rec->start, rec->max_size,
6775                                                    parent,
6776                                                    dback->root,
6777                                                    parent ?
6778                                                    BTRFS_FIRST_FREE_OBJECTID :
6779                                                    dback->owner,
6780                                                    dback->offset);
6781                         if (ret)
6782                                 break;
6783                 }
6784                 fprintf(stderr, "adding new data backref"
6785                                 " on %llu %s %llu owner %llu"
6786                                 " offset %llu found %d\n",
6787                                 (unsigned long long)rec->start,
6788                                 back->full_backref ?
6789                                 "parent" : "root",
6790                                 back->full_backref ?
6791                                 (unsigned long long)parent :
6792                                 (unsigned long long)dback->root,
6793                                 (unsigned long long)dback->owner,
6794                                 (unsigned long long)dback->offset,
6795                                 dback->found_ref);
6796         } else {
6797                 u64 parent;
6798
6799                 tback = to_tree_backref(back);
6800                 if (back->full_backref)
6801                         parent = tback->parent;
6802                 else
6803                         parent = 0;
6804
6805                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6806                                            rec->start, rec->max_size,
6807                                            parent, tback->root, 0, 0);
6808                 fprintf(stderr, "adding new tree backref on "
6809                         "start %llu len %llu parent %llu root %llu\n",
6810                         rec->start, rec->max_size, parent, tback->root);
6811         }
6812 fail:
6813         btrfs_release_path(path);
6814         return ret;
6815 }
6816
6817 static struct extent_entry *find_entry(struct list_head *entries,
6818                                        u64 bytenr, u64 bytes)
6819 {
6820         struct extent_entry *entry = NULL;
6821
6822         list_for_each_entry(entry, entries, list) {
6823                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6824                         return entry;
6825         }
6826
6827         return NULL;
6828 }
6829
6830 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6831 {
6832         struct extent_entry *entry, *best = NULL, *prev = NULL;
6833
6834         list_for_each_entry(entry, entries, list) {
6835                 if (!prev) {
6836                         prev = entry;
6837                         continue;
6838                 }
6839
6840                 /*
6841                  * If there are as many broken entries as entries then we know
6842                  * not to trust this particular entry.
6843                  */
6844                 if (entry->broken == entry->count)
6845                         continue;
6846
6847                 /*
6848                  * If our current entry == best then we can't be sure our best
6849                  * is really the best, so we need to keep searching.
6850                  */
6851                 if (best && best->count == entry->count) {
6852                         prev = entry;
6853                         best = NULL;
6854                         continue;
6855                 }
6856
6857                 /* Prev == entry, not good enough, have to keep searching */
6858                 if (!prev->broken && prev->count == entry->count)
6859                         continue;
6860
6861                 if (!best)
6862                         best = (prev->count > entry->count) ? prev : entry;
6863                 else if (best->count < entry->count)
6864                         best = entry;
6865                 prev = entry;
6866         }
6867
6868         return best;
6869 }
6870
6871 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6872                       struct data_backref *dback, struct extent_entry *entry)
6873 {
6874         struct btrfs_trans_handle *trans;
6875         struct btrfs_root *root;
6876         struct btrfs_file_extent_item *fi;
6877         struct extent_buffer *leaf;
6878         struct btrfs_key key;
6879         u64 bytenr, bytes;
6880         int ret, err;
6881
6882         key.objectid = dback->root;
6883         key.type = BTRFS_ROOT_ITEM_KEY;
6884         key.offset = (u64)-1;
6885         root = btrfs_read_fs_root(info, &key);
6886         if (IS_ERR(root)) {
6887                 fprintf(stderr, "Couldn't find root for our ref\n");
6888                 return -EINVAL;
6889         }
6890
6891         /*
6892          * The backref points to the original offset of the extent if it was
6893          * split, so we need to search down to the offset we have and then walk
6894          * forward until we find the backref we're looking for.
6895          */
6896         key.objectid = dback->owner;
6897         key.type = BTRFS_EXTENT_DATA_KEY;
6898         key.offset = dback->offset;
6899         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6900         if (ret < 0) {
6901                 fprintf(stderr, "Error looking up ref %d\n", ret);
6902                 return ret;
6903         }
6904
6905         while (1) {
6906                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6907                         ret = btrfs_next_leaf(root, path);
6908                         if (ret) {
6909                                 fprintf(stderr, "Couldn't find our ref, next\n");
6910                                 return -EINVAL;
6911                         }
6912                 }
6913                 leaf = path->nodes[0];
6914                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6915                 if (key.objectid != dback->owner ||
6916                     key.type != BTRFS_EXTENT_DATA_KEY) {
6917                         fprintf(stderr, "Couldn't find our ref, search\n");
6918                         return -EINVAL;
6919                 }
6920                 fi = btrfs_item_ptr(leaf, path->slots[0],
6921                                     struct btrfs_file_extent_item);
6922                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6923                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6924
6925                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6926                         break;
6927                 path->slots[0]++;
6928         }
6929
6930         btrfs_release_path(path);
6931
6932         trans = btrfs_start_transaction(root, 1);
6933         if (IS_ERR(trans))
6934                 return PTR_ERR(trans);
6935
6936         /*
6937          * Ok we have the key of the file extent we want to fix, now we can cow
6938          * down to the thing and fix it.
6939          */
6940         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6941         if (ret < 0) {
6942                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6943                         key.objectid, key.type, key.offset, ret);
6944                 goto out;
6945         }
6946         if (ret > 0) {
6947                 fprintf(stderr, "Well that's odd, we just found this key "
6948                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6949                         key.offset);
6950                 ret = -EINVAL;
6951                 goto out;
6952         }
6953         leaf = path->nodes[0];
6954         fi = btrfs_item_ptr(leaf, path->slots[0],
6955                             struct btrfs_file_extent_item);
6956
6957         if (btrfs_file_extent_compression(leaf, fi) &&
6958             dback->disk_bytenr != entry->bytenr) {
6959                 fprintf(stderr, "Ref doesn't match the record start and is "
6960                         "compressed, please take a btrfs-image of this file "
6961                         "system and send it to a btrfs developer so they can "
6962                         "complete this functionality for bytenr %Lu\n",
6963                         dback->disk_bytenr);
6964                 ret = -EINVAL;
6965                 goto out;
6966         }
6967
6968         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6969                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6970         } else if (dback->disk_bytenr > entry->bytenr) {
6971                 u64 off_diff, offset;
6972
6973                 off_diff = dback->disk_bytenr - entry->bytenr;
6974                 offset = btrfs_file_extent_offset(leaf, fi);
6975                 if (dback->disk_bytenr + offset +
6976                     btrfs_file_extent_num_bytes(leaf, fi) >
6977                     entry->bytenr + entry->bytes) {
6978                         fprintf(stderr, "Ref is past the entry end, please "
6979                                 "take a btrfs-image of this file system and "
6980                                 "send it to a btrfs developer, ref %Lu\n",
6981                                 dback->disk_bytenr);
6982                         ret = -EINVAL;
6983                         goto out;
6984                 }
6985                 offset += off_diff;
6986                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6987                 btrfs_set_file_extent_offset(leaf, fi, offset);
6988         } else if (dback->disk_bytenr < entry->bytenr) {
6989                 u64 offset;
6990
6991                 offset = btrfs_file_extent_offset(leaf, fi);
6992                 if (dback->disk_bytenr + offset < entry->bytenr) {
6993                         fprintf(stderr, "Ref is before the entry start, please"
6994                                 " take a btrfs-image of this file system and "
6995                                 "send it to a btrfs developer, ref %Lu\n",
6996                                 dback->disk_bytenr);
6997                         ret = -EINVAL;
6998                         goto out;
6999                 }
7000
7001                 offset += dback->disk_bytenr;
7002                 offset -= entry->bytenr;
7003                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7004                 btrfs_set_file_extent_offset(leaf, fi, offset);
7005         }
7006
7007         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7008
7009         /*
7010          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7011          * only do this if we aren't using compression, otherwise it's a
7012          * trickier case.
7013          */
7014         if (!btrfs_file_extent_compression(leaf, fi))
7015                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7016         else
7017                 printf("ram bytes may be wrong?\n");
7018         btrfs_mark_buffer_dirty(leaf);
7019 out:
7020         err = btrfs_commit_transaction(trans, root);
7021         btrfs_release_path(path);
7022         return ret ? ret : err;
7023 }
7024
7025 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7026                            struct extent_record *rec)
7027 {
7028         struct extent_backref *back;
7029         struct data_backref *dback;
7030         struct extent_entry *entry, *best = NULL;
7031         LIST_HEAD(entries);
7032         int nr_entries = 0;
7033         int broken_entries = 0;
7034         int ret = 0;
7035         short mismatch = 0;
7036
7037         /*
7038          * Metadata is easy and the backrefs should always agree on bytenr and
7039          * size, if not we've got bigger issues.
7040          */
7041         if (rec->metadata)
7042                 return 0;
7043
7044         list_for_each_entry(back, &rec->backrefs, list) {
7045                 if (back->full_backref || !back->is_data)
7046                         continue;
7047
7048                 dback = to_data_backref(back);
7049
7050                 /*
7051                  * We only pay attention to backrefs that we found a real
7052                  * backref for.
7053                  */
7054                 if (dback->found_ref == 0)
7055                         continue;
7056
7057                 /*
7058                  * For now we only catch when the bytes don't match, not the
7059                  * bytenr.  We can easily do this at the same time, but I want
7060                  * to have a fs image to test on before we just add repair
7061                  * functionality willy-nilly so we know we won't screw up the
7062                  * repair.
7063                  */
7064
7065                 entry = find_entry(&entries, dback->disk_bytenr,
7066                                    dback->bytes);
7067                 if (!entry) {
7068                         entry = malloc(sizeof(struct extent_entry));
7069                         if (!entry) {
7070                                 ret = -ENOMEM;
7071                                 goto out;
7072                         }
7073                         memset(entry, 0, sizeof(*entry));
7074                         entry->bytenr = dback->disk_bytenr;
7075                         entry->bytes = dback->bytes;
7076                         list_add_tail(&entry->list, &entries);
7077                         nr_entries++;
7078                 }
7079
7080                 /*
7081                  * If we only have on entry we may think the entries agree when
7082                  * in reality they don't so we have to do some extra checking.
7083                  */
7084                 if (dback->disk_bytenr != rec->start ||
7085                     dback->bytes != rec->nr || back->broken)
7086                         mismatch = 1;
7087
7088                 if (back->broken) {
7089                         entry->broken++;
7090                         broken_entries++;
7091                 }
7092
7093                 entry->count++;
7094         }
7095
7096         /* Yay all the backrefs agree, carry on good sir */
7097         if (nr_entries <= 1 && !mismatch)
7098                 goto out;
7099
7100         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7101                 "%Lu\n", rec->start);
7102
7103         /*
7104          * First we want to see if the backrefs can agree amongst themselves who
7105          * is right, so figure out which one of the entries has the highest
7106          * count.
7107          */
7108         best = find_most_right_entry(&entries);
7109
7110         /*
7111          * Ok so we may have an even split between what the backrefs think, so
7112          * this is where we use the extent ref to see what it thinks.
7113          */
7114         if (!best) {
7115                 entry = find_entry(&entries, rec->start, rec->nr);
7116                 if (!entry && (!broken_entries || !rec->found_rec)) {
7117                         fprintf(stderr, "Backrefs don't agree with each other "
7118                                 "and extent record doesn't agree with anybody,"
7119                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7120                                 rec->start, rec->nr);
7121                         ret = -EINVAL;
7122                         goto out;
7123                 } else if (!entry) {
7124                         /*
7125                          * Ok our backrefs were broken, we'll assume this is the
7126                          * correct value and add an entry for this range.
7127                          */
7128                         entry = malloc(sizeof(struct extent_entry));
7129                         if (!entry) {
7130                                 ret = -ENOMEM;
7131                                 goto out;
7132                         }
7133                         memset(entry, 0, sizeof(*entry));
7134                         entry->bytenr = rec->start;
7135                         entry->bytes = rec->nr;
7136                         list_add_tail(&entry->list, &entries);
7137                         nr_entries++;
7138                 }
7139                 entry->count++;
7140                 best = find_most_right_entry(&entries);
7141                 if (!best) {
7142                         fprintf(stderr, "Backrefs and extent record evenly "
7143                                 "split on who is right, this is going to "
7144                                 "require user input to fix bytenr %Lu bytes "
7145                                 "%Lu\n", rec->start, rec->nr);
7146                         ret = -EINVAL;
7147                         goto out;
7148                 }
7149         }
7150
7151         /*
7152          * I don't think this can happen currently as we'll abort() if we catch
7153          * this case higher up, but in case somebody removes that we still can't
7154          * deal with it properly here yet, so just bail out of that's the case.
7155          */
7156         if (best->bytenr != rec->start) {
7157                 fprintf(stderr, "Extent start and backref starts don't match, "
7158                         "please use btrfs-image on this file system and send "
7159                         "it to a btrfs developer so they can make fsck fix "
7160                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7161                         rec->start, rec->nr);
7162                 ret = -EINVAL;
7163                 goto out;
7164         }
7165
7166         /*
7167          * Ok great we all agreed on an extent record, let's go find the real
7168          * references and fix up the ones that don't match.
7169          */
7170         list_for_each_entry(back, &rec->backrefs, list) {
7171                 if (back->full_backref || !back->is_data)
7172                         continue;
7173
7174                 dback = to_data_backref(back);
7175
7176                 /*
7177                  * Still ignoring backrefs that don't have a real ref attached
7178                  * to them.
7179                  */
7180                 if (dback->found_ref == 0)
7181                         continue;
7182
7183                 if (dback->bytes == best->bytes &&
7184                     dback->disk_bytenr == best->bytenr)
7185                         continue;
7186
7187                 ret = repair_ref(info, path, dback, best);
7188                 if (ret)
7189                         goto out;
7190         }
7191
7192         /*
7193          * Ok we messed with the actual refs, which means we need to drop our
7194          * entire cache and go back and rescan.  I know this is a huge pain and
7195          * adds a lot of extra work, but it's the only way to be safe.  Once all
7196          * the backrefs agree we may not need to do anything to the extent
7197          * record itself.
7198          */
7199         ret = -EAGAIN;
7200 out:
7201         while (!list_empty(&entries)) {
7202                 entry = list_entry(entries.next, struct extent_entry, list);
7203                 list_del_init(&entry->list);
7204                 free(entry);
7205         }
7206         return ret;
7207 }
7208
7209 static int process_duplicates(struct btrfs_root *root,
7210                               struct cache_tree *extent_cache,
7211                               struct extent_record *rec)
7212 {
7213         struct extent_record *good, *tmp;
7214         struct cache_extent *cache;
7215         int ret;
7216
7217         /*
7218          * If we found a extent record for this extent then return, or if we
7219          * have more than one duplicate we are likely going to need to delete
7220          * something.
7221          */
7222         if (rec->found_rec || rec->num_duplicates > 1)
7223                 return 0;
7224
7225         /* Shouldn't happen but just in case */
7226         BUG_ON(!rec->num_duplicates);
7227
7228         /*
7229          * So this happens if we end up with a backref that doesn't match the
7230          * actual extent entry.  So either the backref is bad or the extent
7231          * entry is bad.  Either way we want to have the extent_record actually
7232          * reflect what we found in the extent_tree, so we need to take the
7233          * duplicate out and use that as the extent_record since the only way we
7234          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7235          */
7236         remove_cache_extent(extent_cache, &rec->cache);
7237
7238         good = to_extent_record(rec->dups.next);
7239         list_del_init(&good->list);
7240         INIT_LIST_HEAD(&good->backrefs);
7241         INIT_LIST_HEAD(&good->dups);
7242         good->cache.start = good->start;
7243         good->cache.size = good->nr;
7244         good->content_checked = 0;
7245         good->owner_ref_checked = 0;
7246         good->num_duplicates = 0;
7247         good->refs = rec->refs;
7248         list_splice_init(&rec->backrefs, &good->backrefs);
7249         while (1) {
7250                 cache = lookup_cache_extent(extent_cache, good->start,
7251                                             good->nr);
7252                 if (!cache)
7253                         break;
7254                 tmp = container_of(cache, struct extent_record, cache);
7255
7256                 /*
7257                  * If we find another overlapping extent and it's found_rec is
7258                  * set then it's a duplicate and we need to try and delete
7259                  * something.
7260                  */
7261                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7262                         if (list_empty(&good->list))
7263                                 list_add_tail(&good->list,
7264                                               &duplicate_extents);
7265                         good->num_duplicates += tmp->num_duplicates + 1;
7266                         list_splice_init(&tmp->dups, &good->dups);
7267                         list_del_init(&tmp->list);
7268                         list_add_tail(&tmp->list, &good->dups);
7269                         remove_cache_extent(extent_cache, &tmp->cache);
7270                         continue;
7271                 }
7272
7273                 /*
7274                  * Ok we have another non extent item backed extent rec, so lets
7275                  * just add it to this extent and carry on like we did above.
7276                  */
7277                 good->refs += tmp->refs;
7278                 list_splice_init(&tmp->backrefs, &good->backrefs);
7279                 remove_cache_extent(extent_cache, &tmp->cache);
7280                 free(tmp);
7281         }
7282         ret = insert_cache_extent(extent_cache, &good->cache);
7283         BUG_ON(ret);
7284         free(rec);
7285         return good->num_duplicates ? 0 : 1;
7286 }
7287
7288 static int delete_duplicate_records(struct btrfs_root *root,
7289                                     struct extent_record *rec)
7290 {
7291         struct btrfs_trans_handle *trans;
7292         LIST_HEAD(delete_list);
7293         struct btrfs_path *path;
7294         struct extent_record *tmp, *good, *n;
7295         int nr_del = 0;
7296         int ret = 0, err;
7297         struct btrfs_key key;
7298
7299         path = btrfs_alloc_path();
7300         if (!path) {
7301                 ret = -ENOMEM;
7302                 goto out;
7303         }
7304
7305         good = rec;
7306         /* Find the record that covers all of the duplicates. */
7307         list_for_each_entry(tmp, &rec->dups, list) {
7308                 if (good->start < tmp->start)
7309                         continue;
7310                 if (good->nr > tmp->nr)
7311                         continue;
7312
7313                 if (tmp->start + tmp->nr < good->start + good->nr) {
7314                         fprintf(stderr, "Ok we have overlapping extents that "
7315                                 "aren't completely covered by each other, this "
7316                                 "is going to require more careful thought.  "
7317                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7318                                 tmp->start, tmp->nr, good->start, good->nr);
7319                         abort();
7320                 }
7321                 good = tmp;
7322         }
7323
7324         if (good != rec)
7325                 list_add_tail(&rec->list, &delete_list);
7326
7327         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7328                 if (tmp == good)
7329                         continue;
7330                 list_move_tail(&tmp->list, &delete_list);
7331         }
7332
7333         root = root->fs_info->extent_root;
7334         trans = btrfs_start_transaction(root, 1);
7335         if (IS_ERR(trans)) {
7336                 ret = PTR_ERR(trans);
7337                 goto out;
7338         }
7339
7340         list_for_each_entry(tmp, &delete_list, list) {
7341                 if (tmp->found_rec == 0)
7342                         continue;
7343                 key.objectid = tmp->start;
7344                 key.type = BTRFS_EXTENT_ITEM_KEY;
7345                 key.offset = tmp->nr;
7346
7347                 /* Shouldn't happen but just in case */
7348                 if (tmp->metadata) {
7349                         fprintf(stderr, "Well this shouldn't happen, extent "
7350                                 "record overlaps but is metadata? "
7351                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7352                         abort();
7353                 }
7354
7355                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7356                 if (ret) {
7357                         if (ret > 0)
7358                                 ret = -EINVAL;
7359                         break;
7360                 }
7361                 ret = btrfs_del_item(trans, root, path);
7362                 if (ret)
7363                         break;
7364                 btrfs_release_path(path);
7365                 nr_del++;
7366         }
7367         err = btrfs_commit_transaction(trans, root);
7368         if (err && !ret)
7369                 ret = err;
7370 out:
7371         while (!list_empty(&delete_list)) {
7372                 tmp = to_extent_record(delete_list.next);
7373                 list_del_init(&tmp->list);
7374                 if (tmp == rec)
7375                         continue;
7376                 free(tmp);
7377         }
7378
7379         while (!list_empty(&rec->dups)) {
7380                 tmp = to_extent_record(rec->dups.next);
7381                 list_del_init(&tmp->list);
7382                 free(tmp);
7383         }
7384
7385         btrfs_free_path(path);
7386
7387         if (!ret && !nr_del)
7388                 rec->num_duplicates = 0;
7389
7390         return ret ? ret : nr_del;
7391 }
7392
7393 static int find_possible_backrefs(struct btrfs_fs_info *info,
7394                                   struct btrfs_path *path,
7395                                   struct cache_tree *extent_cache,
7396                                   struct extent_record *rec)
7397 {
7398         struct btrfs_root *root;
7399         struct extent_backref *back;
7400         struct data_backref *dback;
7401         struct cache_extent *cache;
7402         struct btrfs_file_extent_item *fi;
7403         struct btrfs_key key;
7404         u64 bytenr, bytes;
7405         int ret;
7406
7407         list_for_each_entry(back, &rec->backrefs, list) {
7408                 /* Don't care about full backrefs (poor unloved backrefs) */
7409                 if (back->full_backref || !back->is_data)
7410                         continue;
7411
7412                 dback = to_data_backref(back);
7413
7414                 /* We found this one, we don't need to do a lookup */
7415                 if (dback->found_ref)
7416                         continue;
7417
7418                 key.objectid = dback->root;
7419                 key.type = BTRFS_ROOT_ITEM_KEY;
7420                 key.offset = (u64)-1;
7421
7422                 root = btrfs_read_fs_root(info, &key);
7423
7424                 /* No root, definitely a bad ref, skip */
7425                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7426                         continue;
7427                 /* Other err, exit */
7428                 if (IS_ERR(root))
7429                         return PTR_ERR(root);
7430
7431                 key.objectid = dback->owner;
7432                 key.type = BTRFS_EXTENT_DATA_KEY;
7433                 key.offset = dback->offset;
7434                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7435                 if (ret) {
7436                         btrfs_release_path(path);
7437                         if (ret < 0)
7438                                 return ret;
7439                         /* Didn't find it, we can carry on */
7440                         ret = 0;
7441                         continue;
7442                 }
7443
7444                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7445                                     struct btrfs_file_extent_item);
7446                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7447                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7448                 btrfs_release_path(path);
7449                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7450                 if (cache) {
7451                         struct extent_record *tmp;
7452                         tmp = container_of(cache, struct extent_record, cache);
7453
7454                         /*
7455                          * If we found an extent record for the bytenr for this
7456                          * particular backref then we can't add it to our
7457                          * current extent record.  We only want to add backrefs
7458                          * that don't have a corresponding extent item in the
7459                          * extent tree since they likely belong to this record
7460                          * and we need to fix it if it doesn't match bytenrs.
7461                          */
7462                         if  (tmp->found_rec)
7463                                 continue;
7464                 }
7465
7466                 dback->found_ref += 1;
7467                 dback->disk_bytenr = bytenr;
7468                 dback->bytes = bytes;
7469
7470                 /*
7471                  * Set this so the verify backref code knows not to trust the
7472                  * values in this backref.
7473                  */
7474                 back->broken = 1;
7475         }
7476
7477         return 0;
7478 }
7479
7480 /*
7481  * Record orphan data ref into corresponding root.
7482  *
7483  * Return 0 if the extent item contains data ref and recorded.
7484  * Return 1 if the extent item contains no useful data ref
7485  *   On that case, it may contains only shared_dataref or metadata backref
7486  *   or the file extent exists(this should be handled by the extent bytenr
7487  *   recovery routine)
7488  * Return <0 if something goes wrong.
7489  */
7490 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7491                                       struct extent_record *rec)
7492 {
7493         struct btrfs_key key;
7494         struct btrfs_root *dest_root;
7495         struct extent_backref *back;
7496         struct data_backref *dback;
7497         struct orphan_data_extent *orphan;
7498         struct btrfs_path *path;
7499         int recorded_data_ref = 0;
7500         int ret = 0;
7501
7502         if (rec->metadata)
7503                 return 1;
7504         path = btrfs_alloc_path();
7505         if (!path)
7506                 return -ENOMEM;
7507         list_for_each_entry(back, &rec->backrefs, list) {
7508                 if (back->full_backref || !back->is_data ||
7509                     !back->found_extent_tree)
7510                         continue;
7511                 dback = to_data_backref(back);
7512                 if (dback->found_ref)
7513                         continue;
7514                 key.objectid = dback->root;
7515                 key.type = BTRFS_ROOT_ITEM_KEY;
7516                 key.offset = (u64)-1;
7517
7518                 dest_root = btrfs_read_fs_root(fs_info, &key);
7519
7520                 /* For non-exist root we just skip it */
7521                 if (IS_ERR(dest_root) || !dest_root)
7522                         continue;
7523
7524                 key.objectid = dback->owner;
7525                 key.type = BTRFS_EXTENT_DATA_KEY;
7526                 key.offset = dback->offset;
7527
7528                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7529                 btrfs_release_path(path);
7530                 /*
7531                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7532                  * we need to record it for inode/file extent rebuild.
7533                  * For ret > 0, we record it only for file extent rebuild.
7534                  * For ret == 0, the file extent exists but only bytenr
7535                  * mismatch, let the original bytenr fix routine to handle,
7536                  * don't record it.
7537                  */
7538                 if (ret == 0)
7539                         continue;
7540                 ret = 0;
7541                 orphan = malloc(sizeof(*orphan));
7542                 if (!orphan) {
7543                         ret = -ENOMEM;
7544                         goto out;
7545                 }
7546                 INIT_LIST_HEAD(&orphan->list);
7547                 orphan->root = dback->root;
7548                 orphan->objectid = dback->owner;
7549                 orphan->offset = dback->offset;
7550                 orphan->disk_bytenr = rec->cache.start;
7551                 orphan->disk_len = rec->cache.size;
7552                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7553                 recorded_data_ref = 1;
7554         }
7555 out:
7556         btrfs_free_path(path);
7557         if (!ret)
7558                 return !recorded_data_ref;
7559         else
7560                 return ret;
7561 }
7562
7563 /*
7564  * when an incorrect extent item is found, this will delete
7565  * all of the existing entries for it and recreate them
7566  * based on what the tree scan found.
7567  */
7568 static int fixup_extent_refs(struct btrfs_fs_info *info,
7569                              struct cache_tree *extent_cache,
7570                              struct extent_record *rec)
7571 {
7572         struct btrfs_trans_handle *trans = NULL;
7573         int ret;
7574         struct btrfs_path *path;
7575         struct list_head *cur = rec->backrefs.next;
7576         struct cache_extent *cache;
7577         struct extent_backref *back;
7578         int allocated = 0;
7579         u64 flags = 0;
7580
7581         if (rec->flag_block_full_backref)
7582                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7583
7584         path = btrfs_alloc_path();
7585         if (!path)
7586                 return -ENOMEM;
7587
7588         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7589                 /*
7590                  * Sometimes the backrefs themselves are so broken they don't
7591                  * get attached to any meaningful rec, so first go back and
7592                  * check any of our backrefs that we couldn't find and throw
7593                  * them into the list if we find the backref so that
7594                  * verify_backrefs can figure out what to do.
7595                  */
7596                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7597                 if (ret < 0)
7598                         goto out;
7599         }
7600
7601         /* step one, make sure all of the backrefs agree */
7602         ret = verify_backrefs(info, path, rec);
7603         if (ret < 0)
7604                 goto out;
7605
7606         trans = btrfs_start_transaction(info->extent_root, 1);
7607         if (IS_ERR(trans)) {
7608                 ret = PTR_ERR(trans);
7609                 goto out;
7610         }
7611
7612         /* step two, delete all the existing records */
7613         ret = delete_extent_records(trans, info->extent_root, path,
7614                                     rec->start, rec->max_size);
7615
7616         if (ret < 0)
7617                 goto out;
7618
7619         /* was this block corrupt?  If so, don't add references to it */
7620         cache = lookup_cache_extent(info->corrupt_blocks,
7621                                     rec->start, rec->max_size);
7622         if (cache) {
7623                 ret = 0;
7624                 goto out;
7625         }
7626
7627         /* step three, recreate all the refs we did find */
7628         while(cur != &rec->backrefs) {
7629                 back = to_extent_backref(cur);
7630                 cur = cur->next;
7631
7632                 /*
7633                  * if we didn't find any references, don't create a
7634                  * new extent record
7635                  */
7636                 if (!back->found_ref)
7637                         continue;
7638
7639                 rec->bad_full_backref = 0;
7640                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7641                 allocated = 1;
7642
7643                 if (ret)
7644                         goto out;
7645         }
7646 out:
7647         if (trans) {
7648                 int err = btrfs_commit_transaction(trans, info->extent_root);
7649                 if (!ret)
7650                         ret = err;
7651         }
7652
7653         btrfs_free_path(path);
7654         return ret;
7655 }
7656
7657 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7658                               struct extent_record *rec)
7659 {
7660         struct btrfs_trans_handle *trans;
7661         struct btrfs_root *root = fs_info->extent_root;
7662         struct btrfs_path *path;
7663         struct btrfs_extent_item *ei;
7664         struct btrfs_key key;
7665         u64 flags;
7666         int ret = 0;
7667
7668         key.objectid = rec->start;
7669         if (rec->metadata) {
7670                 key.type = BTRFS_METADATA_ITEM_KEY;
7671                 key.offset = rec->info_level;
7672         } else {
7673                 key.type = BTRFS_EXTENT_ITEM_KEY;
7674                 key.offset = rec->max_size;
7675         }
7676
7677         path = btrfs_alloc_path();
7678         if (!path)
7679                 return -ENOMEM;
7680
7681         trans = btrfs_start_transaction(root, 0);
7682         if (IS_ERR(trans)) {
7683                 btrfs_free_path(path);
7684                 return PTR_ERR(trans);
7685         }
7686
7687         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7688         if (ret < 0) {
7689                 btrfs_free_path(path);
7690                 btrfs_commit_transaction(trans, root);
7691                 return ret;
7692         } else if (ret) {
7693                 fprintf(stderr, "Didn't find extent for %llu\n",
7694                         (unsigned long long)rec->start);
7695                 btrfs_free_path(path);
7696                 btrfs_commit_transaction(trans, root);
7697                 return -ENOENT;
7698         }
7699
7700         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7701                             struct btrfs_extent_item);
7702         flags = btrfs_extent_flags(path->nodes[0], ei);
7703         if (rec->flag_block_full_backref) {
7704                 fprintf(stderr, "setting full backref on %llu\n",
7705                         (unsigned long long)key.objectid);
7706                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7707         } else {
7708                 fprintf(stderr, "clearing full backref on %llu\n",
7709                         (unsigned long long)key.objectid);
7710                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7711         }
7712         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7713         btrfs_mark_buffer_dirty(path->nodes[0]);
7714         btrfs_free_path(path);
7715         return btrfs_commit_transaction(trans, root);
7716 }
7717
7718 /* right now we only prune from the extent allocation tree */
7719 static int prune_one_block(struct btrfs_trans_handle *trans,
7720                            struct btrfs_fs_info *info,
7721                            struct btrfs_corrupt_block *corrupt)
7722 {
7723         int ret;
7724         struct btrfs_path path;
7725         struct extent_buffer *eb;
7726         u64 found;
7727         int slot;
7728         int nritems;
7729         int level = corrupt->level + 1;
7730
7731         btrfs_init_path(&path);
7732 again:
7733         /* we want to stop at the parent to our busted block */
7734         path.lowest_level = level;
7735
7736         ret = btrfs_search_slot(trans, info->extent_root,
7737                                 &corrupt->key, &path, -1, 1);
7738
7739         if (ret < 0)
7740                 goto out;
7741
7742         eb = path.nodes[level];
7743         if (!eb) {
7744                 ret = -ENOENT;
7745                 goto out;
7746         }
7747
7748         /*
7749          * hopefully the search gave us the block we want to prune,
7750          * lets try that first
7751          */
7752         slot = path.slots[level];
7753         found =  btrfs_node_blockptr(eb, slot);
7754         if (found == corrupt->cache.start)
7755                 goto del_ptr;
7756
7757         nritems = btrfs_header_nritems(eb);
7758
7759         /* the search failed, lets scan this node and hope we find it */
7760         for (slot = 0; slot < nritems; slot++) {
7761                 found =  btrfs_node_blockptr(eb, slot);
7762                 if (found == corrupt->cache.start)
7763                         goto del_ptr;
7764         }
7765         /*
7766          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7767          * to this block
7768          */
7769         if (eb == info->extent_root->node) {
7770                 ret = -ENOENT;
7771                 goto out;
7772         } else {
7773                 level++;
7774                 btrfs_release_path(&path);
7775                 goto again;
7776         }
7777
7778 del_ptr:
7779         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7780         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7781
7782 out:
7783         btrfs_release_path(&path);
7784         return ret;
7785 }
7786
7787 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7788 {
7789         struct btrfs_trans_handle *trans = NULL;
7790         struct cache_extent *cache;
7791         struct btrfs_corrupt_block *corrupt;
7792
7793         while (1) {
7794                 cache = search_cache_extent(info->corrupt_blocks, 0);
7795                 if (!cache)
7796                         break;
7797                 if (!trans) {
7798                         trans = btrfs_start_transaction(info->extent_root, 1);
7799                         if (IS_ERR(trans))
7800                                 return PTR_ERR(trans);
7801                 }
7802                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7803                 prune_one_block(trans, info, corrupt);
7804                 remove_cache_extent(info->corrupt_blocks, cache);
7805         }
7806         if (trans)
7807                 return btrfs_commit_transaction(trans, info->extent_root);
7808         return 0;
7809 }
7810
7811 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7812 {
7813         struct btrfs_block_group_cache *cache;
7814         u64 start, end;
7815         int ret;
7816
7817         while (1) {
7818                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7819                                             &start, &end, EXTENT_DIRTY);
7820                 if (ret)
7821                         break;
7822                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7823                                    GFP_NOFS);
7824         }
7825
7826         start = 0;
7827         while (1) {
7828                 cache = btrfs_lookup_first_block_group(fs_info, start);
7829                 if (!cache)
7830                         break;
7831                 if (cache->cached)
7832                         cache->cached = 0;
7833                 start = cache->key.objectid + cache->key.offset;
7834         }
7835 }
7836
7837 static int check_extent_refs(struct btrfs_root *root,
7838                              struct cache_tree *extent_cache)
7839 {
7840         struct extent_record *rec;
7841         struct cache_extent *cache;
7842         int err = 0;
7843         int ret = 0;
7844         int fixed = 0;
7845         int had_dups = 0;
7846         int recorded = 0;
7847
7848         if (repair) {
7849                 /*
7850                  * if we're doing a repair, we have to make sure
7851                  * we don't allocate from the problem extents.
7852                  * In the worst case, this will be all the
7853                  * extents in the FS
7854                  */
7855                 cache = search_cache_extent(extent_cache, 0);
7856                 while(cache) {
7857                         rec = container_of(cache, struct extent_record, cache);
7858                         set_extent_dirty(root->fs_info->excluded_extents,
7859                                          rec->start,
7860                                          rec->start + rec->max_size - 1,
7861                                          GFP_NOFS);
7862                         cache = next_cache_extent(cache);
7863                 }
7864
7865                 /* pin down all the corrupted blocks too */
7866                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7867                 while(cache) {
7868                         set_extent_dirty(root->fs_info->excluded_extents,
7869                                          cache->start,
7870                                          cache->start + cache->size - 1,
7871                                          GFP_NOFS);
7872                         cache = next_cache_extent(cache);
7873                 }
7874                 prune_corrupt_blocks(root->fs_info);
7875                 reset_cached_block_groups(root->fs_info);
7876         }
7877
7878         reset_cached_block_groups(root->fs_info);
7879
7880         /*
7881          * We need to delete any duplicate entries we find first otherwise we
7882          * could mess up the extent tree when we have backrefs that actually
7883          * belong to a different extent item and not the weird duplicate one.
7884          */
7885         while (repair && !list_empty(&duplicate_extents)) {
7886                 rec = to_extent_record(duplicate_extents.next);
7887                 list_del_init(&rec->list);
7888
7889                 /* Sometimes we can find a backref before we find an actual
7890                  * extent, so we need to process it a little bit to see if there
7891                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7892                  * if this is a backref screwup.  If we need to delete stuff
7893                  * process_duplicates() will return 0, otherwise it will return
7894                  * 1 and we
7895                  */
7896                 if (process_duplicates(root, extent_cache, rec))
7897                         continue;
7898                 ret = delete_duplicate_records(root, rec);
7899                 if (ret < 0)
7900                         return ret;
7901                 /*
7902                  * delete_duplicate_records will return the number of entries
7903                  * deleted, so if it's greater than 0 then we know we actually
7904                  * did something and we need to remove.
7905                  */
7906                 if (ret)
7907                         had_dups = 1;
7908         }
7909
7910         if (had_dups)
7911                 return -EAGAIN;
7912
7913         while(1) {
7914                 int cur_err = 0;
7915
7916                 fixed = 0;
7917                 recorded = 0;
7918                 cache = search_cache_extent(extent_cache, 0);
7919                 if (!cache)
7920                         break;
7921                 rec = container_of(cache, struct extent_record, cache);
7922                 if (rec->num_duplicates) {
7923                         fprintf(stderr, "extent item %llu has multiple extent "
7924                                 "items\n", (unsigned long long)rec->start);
7925                         err = 1;
7926                         cur_err = 1;
7927                 }
7928
7929                 if (rec->refs != rec->extent_item_refs) {
7930                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7931                                 (unsigned long long)rec->start,
7932                                 (unsigned long long)rec->nr);
7933                         fprintf(stderr, "extent item %llu, found %llu\n",
7934                                 (unsigned long long)rec->extent_item_refs,
7935                                 (unsigned long long)rec->refs);
7936                         ret = record_orphan_data_extents(root->fs_info, rec);
7937                         if (ret < 0)
7938                                 goto repair_abort;
7939                         if (ret == 0) {
7940                                 recorded = 1;
7941                         } else {
7942                                 /*
7943                                  * we can't use the extent to repair file
7944                                  * extent, let the fallback method handle it.
7945                                  */
7946                                 if (!fixed && repair) {
7947                                         ret = fixup_extent_refs(
7948                                                         root->fs_info,
7949                                                         extent_cache, rec);
7950                                         if (ret)
7951                                                 goto repair_abort;
7952                                         fixed = 1;
7953                                 }
7954                         }
7955                         err = 1;
7956                         cur_err = 1;
7957                 }
7958                 if (all_backpointers_checked(rec, 1)) {
7959                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7960                                 (unsigned long long)rec->start,
7961                                 (unsigned long long)rec->nr);
7962
7963                         if (!fixed && !recorded && repair) {
7964                                 ret = fixup_extent_refs(root->fs_info,
7965                                                         extent_cache, rec);
7966                                 if (ret)
7967                                         goto repair_abort;
7968                                 fixed = 1;
7969                         }
7970                         cur_err = 1;
7971                         err = 1;
7972                 }
7973                 if (!rec->owner_ref_checked) {
7974                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7975                                 (unsigned long long)rec->start,
7976                                 (unsigned long long)rec->nr);
7977                         if (!fixed && !recorded && repair) {
7978                                 ret = fixup_extent_refs(root->fs_info,
7979                                                         extent_cache, rec);
7980                                 if (ret)
7981                                         goto repair_abort;
7982                                 fixed = 1;
7983                         }
7984                         err = 1;
7985                         cur_err = 1;
7986                 }
7987                 if (rec->bad_full_backref) {
7988                         fprintf(stderr, "bad full backref, on [%llu]\n",
7989                                 (unsigned long long)rec->start);
7990                         if (repair) {
7991                                 ret = fixup_extent_flags(root->fs_info, rec);
7992                                 if (ret)
7993                                         goto repair_abort;
7994                                 fixed = 1;
7995                         }
7996                         err = 1;
7997                         cur_err = 1;
7998                 }
7999                 /*
8000                  * Although it's not a extent ref's problem, we reuse this
8001                  * routine for error reporting.
8002                  * No repair function yet.
8003                  */
8004                 if (rec->crossing_stripes) {
8005                         fprintf(stderr,
8006                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8007                                 rec->start, rec->start + rec->max_size);
8008                         err = 1;
8009                         cur_err = 1;
8010                 }
8011
8012                 if (rec->wrong_chunk_type) {
8013                         fprintf(stderr,
8014                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8015                                 rec->start, rec->start + rec->max_size);
8016                         err = 1;
8017                         cur_err = 1;
8018                 }
8019
8020                 remove_cache_extent(extent_cache, cache);
8021                 free_all_extent_backrefs(rec);
8022                 if (!init_extent_tree && repair && (!cur_err || fixed))
8023                         clear_extent_dirty(root->fs_info->excluded_extents,
8024                                            rec->start,
8025                                            rec->start + rec->max_size - 1,
8026                                            GFP_NOFS);
8027                 free(rec);
8028         }
8029 repair_abort:
8030         if (repair) {
8031                 if (ret && ret != -EAGAIN) {
8032                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8033                         exit(1);
8034                 } else if (!ret) {
8035                         struct btrfs_trans_handle *trans;
8036
8037                         root = root->fs_info->extent_root;
8038                         trans = btrfs_start_transaction(root, 1);
8039                         if (IS_ERR(trans)) {
8040                                 ret = PTR_ERR(trans);
8041                                 goto repair_abort;
8042                         }
8043
8044                         btrfs_fix_block_accounting(trans, root);
8045                         ret = btrfs_commit_transaction(trans, root);
8046                         if (ret)
8047                                 goto repair_abort;
8048                 }
8049                 if (err)
8050                         fprintf(stderr, "repaired damaged extent references\n");
8051                 return ret;
8052         }
8053         return err;
8054 }
8055
8056 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8057 {
8058         u64 stripe_size;
8059
8060         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8061                 stripe_size = length;
8062                 stripe_size /= num_stripes;
8063         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8064                 stripe_size = length * 2;
8065                 stripe_size /= num_stripes;
8066         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8067                 stripe_size = length;
8068                 stripe_size /= (num_stripes - 1);
8069         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8070                 stripe_size = length;
8071                 stripe_size /= (num_stripes - 2);
8072         } else {
8073                 stripe_size = length;
8074         }
8075         return stripe_size;
8076 }
8077
8078 /*
8079  * Check the chunk with its block group/dev list ref:
8080  * Return 0 if all refs seems valid.
8081  * Return 1 if part of refs seems valid, need later check for rebuild ref
8082  * like missing block group and needs to search extent tree to rebuild them.
8083  * Return -1 if essential refs are missing and unable to rebuild.
8084  */
8085 static int check_chunk_refs(struct chunk_record *chunk_rec,
8086                             struct block_group_tree *block_group_cache,
8087                             struct device_extent_tree *dev_extent_cache,
8088                             int silent)
8089 {
8090         struct cache_extent *block_group_item;
8091         struct block_group_record *block_group_rec;
8092         struct cache_extent *dev_extent_item;
8093         struct device_extent_record *dev_extent_rec;
8094         u64 devid;
8095         u64 offset;
8096         u64 length;
8097         int metadump_v2 = 0;
8098         int i;
8099         int ret = 0;
8100
8101         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8102                                                chunk_rec->offset,
8103                                                chunk_rec->length);
8104         if (block_group_item) {
8105                 block_group_rec = container_of(block_group_item,
8106                                                struct block_group_record,
8107                                                cache);
8108                 if (chunk_rec->length != block_group_rec->offset ||
8109                     chunk_rec->offset != block_group_rec->objectid ||
8110                     (!metadump_v2 &&
8111                      chunk_rec->type_flags != block_group_rec->flags)) {
8112                         if (!silent)
8113                                 fprintf(stderr,
8114                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8115                                         chunk_rec->objectid,
8116                                         chunk_rec->type,
8117                                         chunk_rec->offset,
8118                                         chunk_rec->length,
8119                                         chunk_rec->offset,
8120                                         chunk_rec->type_flags,
8121                                         block_group_rec->objectid,
8122                                         block_group_rec->type,
8123                                         block_group_rec->offset,
8124                                         block_group_rec->offset,
8125                                         block_group_rec->objectid,
8126                                         block_group_rec->flags);
8127                         ret = -1;
8128                 } else {
8129                         list_del_init(&block_group_rec->list);
8130                         chunk_rec->bg_rec = block_group_rec;
8131                 }
8132         } else {
8133                 if (!silent)
8134                         fprintf(stderr,
8135                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8136                                 chunk_rec->objectid,
8137                                 chunk_rec->type,
8138                                 chunk_rec->offset,
8139                                 chunk_rec->length,
8140                                 chunk_rec->offset,
8141                                 chunk_rec->type_flags);
8142                 ret = 1;
8143         }
8144
8145         if (metadump_v2)
8146                 return ret;
8147
8148         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8149                                     chunk_rec->num_stripes);
8150         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8151                 devid = chunk_rec->stripes[i].devid;
8152                 offset = chunk_rec->stripes[i].offset;
8153                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8154                                                        devid, offset, length);
8155                 if (dev_extent_item) {
8156                         dev_extent_rec = container_of(dev_extent_item,
8157                                                 struct device_extent_record,
8158                                                 cache);
8159                         if (dev_extent_rec->objectid != devid ||
8160                             dev_extent_rec->offset != offset ||
8161                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8162                             dev_extent_rec->length != length) {
8163                                 if (!silent)
8164                                         fprintf(stderr,
8165                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8166                                                 chunk_rec->objectid,
8167                                                 chunk_rec->type,
8168                                                 chunk_rec->offset,
8169                                                 chunk_rec->stripes[i].devid,
8170                                                 chunk_rec->stripes[i].offset,
8171                                                 dev_extent_rec->objectid,
8172                                                 dev_extent_rec->offset,
8173                                                 dev_extent_rec->length);
8174                                 ret = -1;
8175                         } else {
8176                                 list_move(&dev_extent_rec->chunk_list,
8177                                           &chunk_rec->dextents);
8178                         }
8179                 } else {
8180                         if (!silent)
8181                                 fprintf(stderr,
8182                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8183                                         chunk_rec->objectid,
8184                                         chunk_rec->type,
8185                                         chunk_rec->offset,
8186                                         chunk_rec->stripes[i].devid,
8187                                         chunk_rec->stripes[i].offset);
8188                         ret = -1;
8189                 }
8190         }
8191         return ret;
8192 }
8193
8194 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8195 int check_chunks(struct cache_tree *chunk_cache,
8196                  struct block_group_tree *block_group_cache,
8197                  struct device_extent_tree *dev_extent_cache,
8198                  struct list_head *good, struct list_head *bad,
8199                  struct list_head *rebuild, int silent)
8200 {
8201         struct cache_extent *chunk_item;
8202         struct chunk_record *chunk_rec;
8203         struct block_group_record *bg_rec;
8204         struct device_extent_record *dext_rec;
8205         int err;
8206         int ret = 0;
8207
8208         chunk_item = first_cache_extent(chunk_cache);
8209         while (chunk_item) {
8210                 chunk_rec = container_of(chunk_item, struct chunk_record,
8211                                          cache);
8212                 err = check_chunk_refs(chunk_rec, block_group_cache,
8213                                        dev_extent_cache, silent);
8214                 if (err < 0)
8215                         ret = err;
8216                 if (err == 0 && good)
8217                         list_add_tail(&chunk_rec->list, good);
8218                 if (err > 0 && rebuild)
8219                         list_add_tail(&chunk_rec->list, rebuild);
8220                 if (err < 0 && bad)
8221                         list_add_tail(&chunk_rec->list, bad);
8222                 chunk_item = next_cache_extent(chunk_item);
8223         }
8224
8225         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8226                 if (!silent)
8227                         fprintf(stderr,
8228                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8229                                 bg_rec->objectid,
8230                                 bg_rec->offset,
8231                                 bg_rec->flags);
8232                 if (!ret)
8233                         ret = 1;
8234         }
8235
8236         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8237                             chunk_list) {
8238                 if (!silent)
8239                         fprintf(stderr,
8240                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8241                                 dext_rec->objectid,
8242                                 dext_rec->offset,
8243                                 dext_rec->length);
8244                 if (!ret)
8245                         ret = 1;
8246         }
8247         return ret;
8248 }
8249
8250
8251 static int check_device_used(struct device_record *dev_rec,
8252                              struct device_extent_tree *dext_cache)
8253 {
8254         struct cache_extent *cache;
8255         struct device_extent_record *dev_extent_rec;
8256         u64 total_byte = 0;
8257
8258         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8259         while (cache) {
8260                 dev_extent_rec = container_of(cache,
8261                                               struct device_extent_record,
8262                                               cache);
8263                 if (dev_extent_rec->objectid != dev_rec->devid)
8264                         break;
8265
8266                 list_del_init(&dev_extent_rec->device_list);
8267                 total_byte += dev_extent_rec->length;
8268                 cache = next_cache_extent(cache);
8269         }
8270
8271         if (total_byte != dev_rec->byte_used) {
8272                 fprintf(stderr,
8273                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8274                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8275                         dev_rec->type, dev_rec->offset);
8276                 return -1;
8277         } else {
8278                 return 0;
8279         }
8280 }
8281
8282 /* check btrfs_dev_item -> btrfs_dev_extent */
8283 static int check_devices(struct rb_root *dev_cache,
8284                          struct device_extent_tree *dev_extent_cache)
8285 {
8286         struct rb_node *dev_node;
8287         struct device_record *dev_rec;
8288         struct device_extent_record *dext_rec;
8289         int err;
8290         int ret = 0;
8291
8292         dev_node = rb_first(dev_cache);
8293         while (dev_node) {
8294                 dev_rec = container_of(dev_node, struct device_record, node);
8295                 err = check_device_used(dev_rec, dev_extent_cache);
8296                 if (err)
8297                         ret = err;
8298
8299                 dev_node = rb_next(dev_node);
8300         }
8301         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8302                             device_list) {
8303                 fprintf(stderr,
8304                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8305                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8306                 if (!ret)
8307                         ret = 1;
8308         }
8309         return ret;
8310 }
8311
8312 static int add_root_item_to_list(struct list_head *head,
8313                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8314                                   u8 level, u8 drop_level,
8315                                   int level_size, struct btrfs_key *drop_key)
8316 {
8317
8318         struct root_item_record *ri_rec;
8319         ri_rec = malloc(sizeof(*ri_rec));
8320         if (!ri_rec)
8321                 return -ENOMEM;
8322         ri_rec->bytenr = bytenr;
8323         ri_rec->objectid = objectid;
8324         ri_rec->level = level;
8325         ri_rec->level_size = level_size;
8326         ri_rec->drop_level = drop_level;
8327         ri_rec->last_snapshot = last_snapshot;
8328         if (drop_key)
8329                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8330         list_add_tail(&ri_rec->list, head);
8331
8332         return 0;
8333 }
8334
8335 static void free_root_item_list(struct list_head *list)
8336 {
8337         struct root_item_record *ri_rec;
8338
8339         while (!list_empty(list)) {
8340                 ri_rec = list_first_entry(list, struct root_item_record,
8341                                           list);
8342                 list_del_init(&ri_rec->list);
8343                 free(ri_rec);
8344         }
8345 }
8346
8347 static int deal_root_from_list(struct list_head *list,
8348                                struct btrfs_root *root,
8349                                struct block_info *bits,
8350                                int bits_nr,
8351                                struct cache_tree *pending,
8352                                struct cache_tree *seen,
8353                                struct cache_tree *reada,
8354                                struct cache_tree *nodes,
8355                                struct cache_tree *extent_cache,
8356                                struct cache_tree *chunk_cache,
8357                                struct rb_root *dev_cache,
8358                                struct block_group_tree *block_group_cache,
8359                                struct device_extent_tree *dev_extent_cache)
8360 {
8361         int ret = 0;
8362         u64 last;
8363
8364         while (!list_empty(list)) {
8365                 struct root_item_record *rec;
8366                 struct extent_buffer *buf;
8367                 rec = list_entry(list->next,
8368                                  struct root_item_record, list);
8369                 last = 0;
8370                 buf = read_tree_block(root->fs_info->tree_root,
8371                                       rec->bytenr, rec->level_size, 0);
8372                 if (!extent_buffer_uptodate(buf)) {
8373                         free_extent_buffer(buf);
8374                         ret = -EIO;
8375                         break;
8376                 }
8377                 ret = add_root_to_pending(buf, extent_cache, pending,
8378                                     seen, nodes, rec->objectid);
8379                 if (ret < 0)
8380                         break;
8381                 /*
8382                  * To rebuild extent tree, we need deal with snapshot
8383                  * one by one, otherwise we deal with node firstly which
8384                  * can maximize readahead.
8385                  */
8386                 while (1) {
8387                         ret = run_next_block(root, bits, bits_nr, &last,
8388                                              pending, seen, reada, nodes,
8389                                              extent_cache, chunk_cache,
8390                                              dev_cache, block_group_cache,
8391                                              dev_extent_cache, rec);
8392                         if (ret != 0)
8393                                 break;
8394                 }
8395                 free_extent_buffer(buf);
8396                 list_del(&rec->list);
8397                 free(rec);
8398                 if (ret < 0)
8399                         break;
8400         }
8401         while (ret >= 0) {
8402                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8403                                      reada, nodes, extent_cache, chunk_cache,
8404                                      dev_cache, block_group_cache,
8405                                      dev_extent_cache, NULL);
8406                 if (ret != 0) {
8407                         if (ret > 0)
8408                                 ret = 0;
8409                         break;
8410                 }
8411         }
8412         return ret;
8413 }
8414
8415 static int check_chunks_and_extents(struct btrfs_root *root)
8416 {
8417         struct rb_root dev_cache;
8418         struct cache_tree chunk_cache;
8419         struct block_group_tree block_group_cache;
8420         struct device_extent_tree dev_extent_cache;
8421         struct cache_tree extent_cache;
8422         struct cache_tree seen;
8423         struct cache_tree pending;
8424         struct cache_tree reada;
8425         struct cache_tree nodes;
8426         struct extent_io_tree excluded_extents;
8427         struct cache_tree corrupt_blocks;
8428         struct btrfs_path path;
8429         struct btrfs_key key;
8430         struct btrfs_key found_key;
8431         int ret, err = 0;
8432         struct block_info *bits;
8433         int bits_nr;
8434         struct extent_buffer *leaf;
8435         int slot;
8436         struct btrfs_root_item ri;
8437         struct list_head dropping_trees;
8438         struct list_head normal_trees;
8439         struct btrfs_root *root1;
8440         u64 objectid;
8441         u32 level_size;
8442         u8 level;
8443
8444         dev_cache = RB_ROOT;
8445         cache_tree_init(&chunk_cache);
8446         block_group_tree_init(&block_group_cache);
8447         device_extent_tree_init(&dev_extent_cache);
8448
8449         cache_tree_init(&extent_cache);
8450         cache_tree_init(&seen);
8451         cache_tree_init(&pending);
8452         cache_tree_init(&nodes);
8453         cache_tree_init(&reada);
8454         cache_tree_init(&corrupt_blocks);
8455         extent_io_tree_init(&excluded_extents);
8456         INIT_LIST_HEAD(&dropping_trees);
8457         INIT_LIST_HEAD(&normal_trees);
8458
8459         if (repair) {
8460                 root->fs_info->excluded_extents = &excluded_extents;
8461                 root->fs_info->fsck_extent_cache = &extent_cache;
8462                 root->fs_info->free_extent_hook = free_extent_hook;
8463                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8464         }
8465
8466         bits_nr = 1024;
8467         bits = malloc(bits_nr * sizeof(struct block_info));
8468         if (!bits) {
8469                 perror("malloc");
8470                 exit(1);
8471         }
8472
8473         if (ctx.progress_enabled) {
8474                 ctx.tp = TASK_EXTENTS;
8475                 task_start(ctx.info);
8476         }
8477
8478 again:
8479         root1 = root->fs_info->tree_root;
8480         level = btrfs_header_level(root1->node);
8481         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8482                                     root1->node->start, 0, level, 0,
8483                                     root1->nodesize, NULL);
8484         if (ret < 0)
8485                 goto out;
8486         root1 = root->fs_info->chunk_root;
8487         level = btrfs_header_level(root1->node);
8488         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8489                                     root1->node->start, 0, level, 0,
8490                                     root1->nodesize, NULL);
8491         if (ret < 0)
8492                 goto out;
8493         btrfs_init_path(&path);
8494         key.offset = 0;
8495         key.objectid = 0;
8496         key.type = BTRFS_ROOT_ITEM_KEY;
8497         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8498                                         &key, &path, 0, 0);
8499         if (ret < 0)
8500                 goto out;
8501         while(1) {
8502                 leaf = path.nodes[0];
8503                 slot = path.slots[0];
8504                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8505                         ret = btrfs_next_leaf(root, &path);
8506                         if (ret != 0)
8507                                 break;
8508                         leaf = path.nodes[0];
8509                         slot = path.slots[0];
8510                 }
8511                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8512                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8513                         unsigned long offset;
8514                         u64 last_snapshot;
8515
8516                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8517                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8518                         last_snapshot = btrfs_root_last_snapshot(&ri);
8519                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8520                                 level = btrfs_root_level(&ri);
8521                                 level_size = root->nodesize;
8522                                 ret = add_root_item_to_list(&normal_trees,
8523                                                 found_key.objectid,
8524                                                 btrfs_root_bytenr(&ri),
8525                                                 last_snapshot, level,
8526                                                 0, level_size, NULL);
8527                                 if (ret < 0)
8528                                         goto out;
8529                         } else {
8530                                 level = btrfs_root_level(&ri);
8531                                 level_size = root->nodesize;
8532                                 objectid = found_key.objectid;
8533                                 btrfs_disk_key_to_cpu(&found_key,
8534                                                       &ri.drop_progress);
8535                                 ret = add_root_item_to_list(&dropping_trees,
8536                                                 objectid,
8537                                                 btrfs_root_bytenr(&ri),
8538                                                 last_snapshot, level,
8539                                                 ri.drop_level,
8540                                                 level_size, &found_key);
8541                                 if (ret < 0)
8542                                         goto out;
8543                         }
8544                 }
8545                 path.slots[0]++;
8546         }
8547         btrfs_release_path(&path);
8548
8549         /*
8550          * check_block can return -EAGAIN if it fixes something, please keep
8551          * this in mind when dealing with return values from these functions, if
8552          * we get -EAGAIN we want to fall through and restart the loop.
8553          */
8554         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8555                                   &seen, &reada, &nodes, &extent_cache,
8556                                   &chunk_cache, &dev_cache, &block_group_cache,
8557                                   &dev_extent_cache);
8558         if (ret < 0) {
8559                 if (ret == -EAGAIN)
8560                         goto loop;
8561                 goto out;
8562         }
8563         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8564                                   &pending, &seen, &reada, &nodes,
8565                                   &extent_cache, &chunk_cache, &dev_cache,
8566                                   &block_group_cache, &dev_extent_cache);
8567         if (ret < 0) {
8568                 if (ret == -EAGAIN)
8569                         goto loop;
8570                 goto out;
8571         }
8572
8573         ret = check_chunks(&chunk_cache, &block_group_cache,
8574                            &dev_extent_cache, NULL, NULL, NULL, 0);
8575         if (ret) {
8576                 if (ret == -EAGAIN)
8577                         goto loop;
8578                 err = ret;
8579         }
8580
8581         ret = check_extent_refs(root, &extent_cache);
8582         if (ret < 0) {
8583                 if (ret == -EAGAIN)
8584                         goto loop;
8585                 goto out;
8586         }
8587
8588         ret = check_devices(&dev_cache, &dev_extent_cache);
8589         if (ret && err)
8590                 ret = err;
8591
8592 out:
8593         task_stop(ctx.info);
8594         if (repair) {
8595                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8596                 extent_io_tree_cleanup(&excluded_extents);
8597                 root->fs_info->fsck_extent_cache = NULL;
8598                 root->fs_info->free_extent_hook = NULL;
8599                 root->fs_info->corrupt_blocks = NULL;
8600                 root->fs_info->excluded_extents = NULL;
8601         }
8602         free(bits);
8603         free_chunk_cache_tree(&chunk_cache);
8604         free_device_cache_tree(&dev_cache);
8605         free_block_group_tree(&block_group_cache);
8606         free_device_extent_tree(&dev_extent_cache);
8607         free_extent_cache_tree(&seen);
8608         free_extent_cache_tree(&pending);
8609         free_extent_cache_tree(&reada);
8610         free_extent_cache_tree(&nodes);
8611         return ret;
8612 loop:
8613         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8614         free_extent_cache_tree(&seen);
8615         free_extent_cache_tree(&pending);
8616         free_extent_cache_tree(&reada);
8617         free_extent_cache_tree(&nodes);
8618         free_chunk_cache_tree(&chunk_cache);
8619         free_block_group_tree(&block_group_cache);
8620         free_device_cache_tree(&dev_cache);
8621         free_device_extent_tree(&dev_extent_cache);
8622         free_extent_record_cache(root->fs_info, &extent_cache);
8623         free_root_item_list(&normal_trees);
8624         free_root_item_list(&dropping_trees);
8625         extent_io_tree_cleanup(&excluded_extents);
8626         goto again;
8627 }
8628
8629 /*
8630  * Check backrefs of a tree block given by @bytenr or @eb.
8631  *
8632  * @root:       the root containing the @bytenr or @eb
8633  * @eb:         tree block extent buffer, can be NULL
8634  * @bytenr:     bytenr of the tree block to search
8635  * @level:      tree level of the tree block
8636  * @owner:      owner of the tree block
8637  *
8638  * Return >0 for any error found and output error message
8639  * Return 0 for no error found
8640  */
8641 static int check_tree_block_ref(struct btrfs_root *root,
8642                                 struct extent_buffer *eb, u64 bytenr,
8643                                 int level, u64 owner)
8644 {
8645         struct btrfs_key key;
8646         struct btrfs_root *extent_root = root->fs_info->extent_root;
8647         struct btrfs_path path;
8648         struct btrfs_extent_item *ei;
8649         struct btrfs_extent_inline_ref *iref;
8650         struct extent_buffer *leaf;
8651         unsigned long end;
8652         unsigned long ptr;
8653         int slot;
8654         int skinny_level;
8655         int type;
8656         u32 nodesize = root->nodesize;
8657         u32 item_size;
8658         u64 offset;
8659         int found_ref = 0;
8660         int err = 0;
8661         int ret;
8662
8663         btrfs_init_path(&path);
8664         key.objectid = bytenr;
8665         if (btrfs_fs_incompat(root->fs_info,
8666                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8667                 key.type = BTRFS_METADATA_ITEM_KEY;
8668         else
8669                 key.type = BTRFS_EXTENT_ITEM_KEY;
8670         key.offset = (u64)-1;
8671
8672         /* Search for the backref in extent tree */
8673         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8674         if (ret < 0) {
8675                 err |= BACKREF_MISSING;
8676                 goto out;
8677         }
8678         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8679         if (ret) {
8680                 err |= BACKREF_MISSING;
8681                 goto out;
8682         }
8683
8684         leaf = path.nodes[0];
8685         slot = path.slots[0];
8686         btrfs_item_key_to_cpu(leaf, &key, slot);
8687
8688         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8689
8690         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8691                 skinny_level = (int)key.offset;
8692                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8693         } else {
8694                 struct btrfs_tree_block_info *info;
8695
8696                 info = (struct btrfs_tree_block_info *)(ei + 1);
8697                 skinny_level = btrfs_tree_block_level(leaf, info);
8698                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8699         }
8700
8701         if (eb) {
8702                 u64 header_gen;
8703                 u64 extent_gen;
8704
8705                 if (!(btrfs_extent_flags(leaf, ei) &
8706                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8707                         error(
8708                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8709                                 key.objectid, nodesize,
8710                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8711                         err = BACKREF_MISMATCH;
8712                 }
8713                 header_gen = btrfs_header_generation(eb);
8714                 extent_gen = btrfs_extent_generation(leaf, ei);
8715                 if (header_gen != extent_gen) {
8716                         error(
8717         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8718                                 key.objectid, nodesize, header_gen,
8719                                 extent_gen);
8720                         err = BACKREF_MISMATCH;
8721                 }
8722                 if (level != skinny_level) {
8723                         error(
8724                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8725                                 key.objectid, nodesize, level, skinny_level);
8726                         err = BACKREF_MISMATCH;
8727                 }
8728                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8729                         error(
8730                         "extent[%llu %u] is referred by other roots than %llu",
8731                                 key.objectid, nodesize, root->objectid);
8732                         err = BACKREF_MISMATCH;
8733                 }
8734         }
8735
8736         /*
8737          * Iterate the extent/metadata item to find the exact backref
8738          */
8739         item_size = btrfs_item_size_nr(leaf, slot);
8740         ptr = (unsigned long)iref;
8741         end = (unsigned long)ei + item_size;
8742         while (ptr < end) {
8743                 iref = (struct btrfs_extent_inline_ref *)ptr;
8744                 type = btrfs_extent_inline_ref_type(leaf, iref);
8745                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8746
8747                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8748                         (offset == root->objectid || offset == owner)) {
8749                         found_ref = 1;
8750                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8751                         /* Check if the backref points to valid referencer */
8752                         found_ref = !check_tree_block_ref(root, NULL, offset,
8753                                                           level + 1, owner);
8754                 }
8755
8756                 if (found_ref)
8757                         break;
8758                 ptr += btrfs_extent_inline_ref_size(type);
8759         }
8760
8761         /*
8762          * Inlined extent item doesn't have what we need, check
8763          * TREE_BLOCK_REF_KEY
8764          */
8765         if (!found_ref) {
8766                 btrfs_release_path(&path);
8767                 key.objectid = bytenr;
8768                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8769                 key.offset = root->objectid;
8770
8771                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8772                 if (!ret)
8773                         found_ref = 1;
8774         }
8775         if (!found_ref)
8776                 err |= BACKREF_MISSING;
8777 out:
8778         btrfs_release_path(&path);
8779         if (eb && (err & BACKREF_MISSING))
8780                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8781                         bytenr, nodesize, owner, level);
8782         return err;
8783 }
8784
8785 /*
8786  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8787  *
8788  * Return >0 any error found and output error message
8789  * Return 0 for no error found
8790  */
8791 static int check_extent_data_item(struct btrfs_root *root,
8792                                   struct extent_buffer *eb, int slot)
8793 {
8794         struct btrfs_file_extent_item *fi;
8795         struct btrfs_path path;
8796         struct btrfs_root *extent_root = root->fs_info->extent_root;
8797         struct btrfs_key fi_key;
8798         struct btrfs_key dbref_key;
8799         struct extent_buffer *leaf;
8800         struct btrfs_extent_item *ei;
8801         struct btrfs_extent_inline_ref *iref;
8802         struct btrfs_extent_data_ref *dref;
8803         u64 owner;
8804         u64 file_extent_gen;
8805         u64 disk_bytenr;
8806         u64 disk_num_bytes;
8807         u64 extent_num_bytes;
8808         u64 extent_flags;
8809         u64 extent_gen;
8810         u32 item_size;
8811         unsigned long end;
8812         unsigned long ptr;
8813         int type;
8814         u64 ref_root;
8815         int found_dbackref = 0;
8816         int err = 0;
8817         int ret;
8818
8819         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8820         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8821         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8822
8823         /* Nothing to check for hole and inline data extents */
8824         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8825             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8826                 return 0;
8827
8828         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8829         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8830         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8831
8832         /* Check unaligned disk_num_bytes and num_bytes */
8833         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8834                 error(
8835 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8836                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8837                         root->sectorsize);
8838                 err |= BYTES_UNALIGNED;
8839         } else {
8840                 data_bytes_allocated += disk_num_bytes;
8841         }
8842         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8843                 error(
8844 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8845                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8846                         root->sectorsize);
8847                 err |= BYTES_UNALIGNED;
8848         } else {
8849                 data_bytes_referenced += extent_num_bytes;
8850         }
8851         owner = btrfs_header_owner(eb);
8852
8853         /* Check the extent item of the file extent in extent tree */
8854         btrfs_init_path(&path);
8855         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8856         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8857         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8858
8859         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8860         if (ret) {
8861                 err |= BACKREF_MISSING;
8862                 goto error;
8863         }
8864
8865         leaf = path.nodes[0];
8866         slot = path.slots[0];
8867         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8868
8869         extent_flags = btrfs_extent_flags(leaf, ei);
8870         extent_gen = btrfs_extent_generation(leaf, ei);
8871
8872         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8873                 error(
8874                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8875                     disk_bytenr, disk_num_bytes,
8876                     BTRFS_EXTENT_FLAG_DATA);
8877                 err |= BACKREF_MISMATCH;
8878         }
8879
8880         if (file_extent_gen < extent_gen) {
8881                 error(
8882 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8883                         disk_bytenr, disk_num_bytes, file_extent_gen,
8884                         extent_gen);
8885                 err |= BACKREF_MISMATCH;
8886         }
8887
8888         /* Check data backref inside that extent item */
8889         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8890         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8891         ptr = (unsigned long)iref;
8892         end = (unsigned long)ei + item_size;
8893         while (ptr < end) {
8894                 iref = (struct btrfs_extent_inline_ref *)ptr;
8895                 type = btrfs_extent_inline_ref_type(leaf, iref);
8896                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8897
8898                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8899                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8900                         if (ref_root == owner || ref_root == root->objectid)
8901                                 found_dbackref = 1;
8902                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8903                         found_dbackref = !check_tree_block_ref(root, NULL,
8904                                 btrfs_extent_inline_ref_offset(leaf, iref),
8905                                 0, owner);
8906                 }
8907
8908                 if (found_dbackref)
8909                         break;
8910                 ptr += btrfs_extent_inline_ref_size(type);
8911         }
8912
8913         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8914         if (!found_dbackref) {
8915                 btrfs_release_path(&path);
8916
8917                 btrfs_init_path(&path);
8918                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8919                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8920                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8921                                 fi_key.objectid, fi_key.offset);
8922
8923                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8924                                         &dbref_key, &path, 0, 0);
8925                 if (!ret)
8926                         found_dbackref = 1;
8927         }
8928
8929         if (!found_dbackref)
8930                 err |= BACKREF_MISSING;
8931 error:
8932         btrfs_release_path(&path);
8933         if (err & BACKREF_MISSING) {
8934                 error("data extent[%llu %llu] backref lost",
8935                       disk_bytenr, disk_num_bytes);
8936         }
8937         return err;
8938 }
8939
8940 /*
8941  * Get real tree block level for the case like shared block
8942  * Return >= 0 as tree level
8943  * Return <0 for error
8944  */
8945 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8946 {
8947         struct extent_buffer *eb;
8948         struct btrfs_path path;
8949         struct btrfs_key key;
8950         struct btrfs_extent_item *ei;
8951         u64 flags;
8952         u64 transid;
8953         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8954         u8 backref_level;
8955         u8 header_level;
8956         int ret;
8957
8958         /* Search extent tree for extent generation and level */
8959         key.objectid = bytenr;
8960         key.type = BTRFS_METADATA_ITEM_KEY;
8961         key.offset = (u64)-1;
8962
8963         btrfs_init_path(&path);
8964         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8965         if (ret < 0)
8966                 goto release_out;
8967         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8968         if (ret < 0)
8969                 goto release_out;
8970         if (ret > 0) {
8971                 ret = -ENOENT;
8972                 goto release_out;
8973         }
8974
8975         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8976         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8977                             struct btrfs_extent_item);
8978         flags = btrfs_extent_flags(path.nodes[0], ei);
8979         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8980                 ret = -ENOENT;
8981                 goto release_out;
8982         }
8983
8984         /* Get transid for later read_tree_block() check */
8985         transid = btrfs_extent_generation(path.nodes[0], ei);
8986
8987         /* Get backref level as one source */
8988         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8989                 backref_level = key.offset;
8990         } else {
8991                 struct btrfs_tree_block_info *info;
8992
8993                 info = (struct btrfs_tree_block_info *)(ei + 1);
8994                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8995         }
8996         btrfs_release_path(&path);
8997
8998         /* Get level from tree block as an alternative source */
8999         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9000         if (!extent_buffer_uptodate(eb)) {
9001                 free_extent_buffer(eb);
9002                 return -EIO;
9003         }
9004         header_level = btrfs_header_level(eb);
9005         free_extent_buffer(eb);
9006
9007         if (header_level != backref_level)
9008                 return -EIO;
9009         return header_level;
9010
9011 release_out:
9012         btrfs_release_path(&path);
9013         return ret;
9014 }
9015
9016 /*
9017  * Check if a tree block backref is valid (points to a valid tree block)
9018  * if level == -1, level will be resolved
9019  * Return >0 for any error found and print error message
9020  */
9021 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9022                                     u64 bytenr, int level)
9023 {
9024         struct btrfs_root *root;
9025         struct btrfs_key key;
9026         struct btrfs_path path;
9027         struct extent_buffer *eb;
9028         struct extent_buffer *node;
9029         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9030         int err = 0;
9031         int ret;
9032
9033         /* Query level for level == -1 special case */
9034         if (level == -1)
9035                 level = query_tree_block_level(fs_info, bytenr);
9036         if (level < 0) {
9037                 err |= REFERENCER_MISSING;
9038                 goto out;
9039         }
9040
9041         key.objectid = root_id;
9042         key.type = BTRFS_ROOT_ITEM_KEY;
9043         key.offset = (u64)-1;
9044
9045         root = btrfs_read_fs_root(fs_info, &key);
9046         if (IS_ERR(root)) {
9047                 err |= REFERENCER_MISSING;
9048                 goto out;
9049         }
9050
9051         /* Read out the tree block to get item/node key */
9052         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9053         if (!extent_buffer_uptodate(eb)) {
9054                 err |= REFERENCER_MISSING;
9055                 free_extent_buffer(eb);
9056                 goto out;
9057         }
9058
9059         /* Empty tree, no need to check key */
9060         if (!btrfs_header_nritems(eb) && !level) {
9061                 free_extent_buffer(eb);
9062                 goto out;
9063         }
9064
9065         if (level)
9066                 btrfs_node_key_to_cpu(eb, &key, 0);
9067         else
9068                 btrfs_item_key_to_cpu(eb, &key, 0);
9069
9070         free_extent_buffer(eb);
9071
9072         btrfs_init_path(&path);
9073         path.lowest_level = level;
9074         /* Search with the first key, to ensure we can reach it */
9075         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9076         if (ret < 0) {
9077                 err |= REFERENCER_MISSING;
9078                 goto release_out;
9079         }
9080
9081         node = path.nodes[level];
9082         if (btrfs_header_bytenr(node) != bytenr) {
9083                 error(
9084         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9085                         bytenr, nodesize, bytenr,
9086                         btrfs_header_bytenr(node));
9087                 err |= REFERENCER_MISMATCH;
9088         }
9089         if (btrfs_header_level(node) != level) {
9090                 error(
9091         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9092                         bytenr, nodesize, level,
9093                         btrfs_header_level(node));
9094                 err |= REFERENCER_MISMATCH;
9095         }
9096
9097 release_out:
9098         btrfs_release_path(&path);
9099 out:
9100         if (err & REFERENCER_MISSING) {
9101                 if (level < 0)
9102                         error("extent [%llu %d] lost referencer (owner: %llu)",
9103                                 bytenr, nodesize, root_id);
9104                 else
9105                         error(
9106                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9107                                 bytenr, nodesize, root_id, level);
9108         }
9109
9110         return err;
9111 }
9112
9113 /*
9114  * Check referencer for shared block backref
9115  * If level == -1, this function will resolve the level.
9116  */
9117 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9118                                      u64 parent, u64 bytenr, int level)
9119 {
9120         struct extent_buffer *eb;
9121         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9122         u32 nr;
9123         int found_parent = 0;
9124         int i;
9125
9126         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9127         if (!extent_buffer_uptodate(eb))
9128                 goto out;
9129
9130         if (level == -1)
9131                 level = query_tree_block_level(fs_info, bytenr);
9132         if (level < 0)
9133                 goto out;
9134
9135         if (level + 1 != btrfs_header_level(eb))
9136                 goto out;
9137
9138         nr = btrfs_header_nritems(eb);
9139         for (i = 0; i < nr; i++) {
9140                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9141                         found_parent = 1;
9142                         break;
9143                 }
9144         }
9145 out:
9146         free_extent_buffer(eb);
9147         if (!found_parent) {
9148                 error(
9149         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9150                         bytenr, nodesize, parent, level);
9151                 return REFERENCER_MISSING;
9152         }
9153         return 0;
9154 }
9155
9156 /*
9157  * Check referencer for normal (inlined) data ref
9158  * If len == 0, it will be resolved by searching in extent tree
9159  */
9160 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9161                                      u64 root_id, u64 objectid, u64 offset,
9162                                      u64 bytenr, u64 len, u32 count)
9163 {
9164         struct btrfs_root *root;
9165         struct btrfs_root *extent_root = fs_info->extent_root;
9166         struct btrfs_key key;
9167         struct btrfs_path path;
9168         struct extent_buffer *leaf;
9169         struct btrfs_file_extent_item *fi;
9170         u32 found_count = 0;
9171         int slot;
9172         int ret = 0;
9173
9174         if (!len) {
9175                 key.objectid = bytenr;
9176                 key.type = BTRFS_EXTENT_ITEM_KEY;
9177                 key.offset = (u64)-1;
9178
9179                 btrfs_init_path(&path);
9180                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9181                 if (ret < 0)
9182                         goto out;
9183                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9184                 if (ret)
9185                         goto out;
9186                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9187                 if (key.objectid != bytenr ||
9188                     key.type != BTRFS_EXTENT_ITEM_KEY)
9189                         goto out;
9190                 len = key.offset;
9191                 btrfs_release_path(&path);
9192         }
9193         key.objectid = root_id;
9194         key.type = BTRFS_ROOT_ITEM_KEY;
9195         key.offset = (u64)-1;
9196         btrfs_init_path(&path);
9197
9198         root = btrfs_read_fs_root(fs_info, &key);
9199         if (IS_ERR(root))
9200                 goto out;
9201
9202         key.objectid = objectid;
9203         key.type = BTRFS_EXTENT_DATA_KEY;
9204         /*
9205          * It can be nasty as data backref offset is
9206          * file offset - file extent offset, which is smaller or
9207          * equal to original backref offset.  The only special case is
9208          * overflow.  So we need to special check and do further search.
9209          */
9210         key.offset = offset & (1ULL << 63) ? 0 : offset;
9211
9212         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9213         if (ret < 0)
9214                 goto out;
9215
9216         /*
9217          * Search afterwards to get correct one
9218          * NOTE: As we must do a comprehensive check on the data backref to
9219          * make sure the dref count also matches, we must iterate all file
9220          * extents for that inode.
9221          */
9222         while (1) {
9223                 leaf = path.nodes[0];
9224                 slot = path.slots[0];
9225
9226                 btrfs_item_key_to_cpu(leaf, &key, slot);
9227                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9228                         break;
9229                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9230                 /*
9231                  * Except normal disk bytenr and disk num bytes, we still
9232                  * need to do extra check on dbackref offset as
9233                  * dbackref offset = file_offset - file_extent_offset
9234                  */
9235                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9236                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9237                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9238                     offset)
9239                         found_count++;
9240
9241                 ret = btrfs_next_item(root, &path);
9242                 if (ret)
9243                         break;
9244         }
9245 out:
9246         btrfs_release_path(&path);
9247         if (found_count != count) {
9248                 error(
9249 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9250                         bytenr, len, root_id, objectid, offset, count, found_count);
9251                 return REFERENCER_MISSING;
9252         }
9253         return 0;
9254 }
9255
9256 /*
9257  * Check if the referencer of a shared data backref exists
9258  */
9259 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9260                                      u64 parent, u64 bytenr)
9261 {
9262         struct extent_buffer *eb;
9263         struct btrfs_key key;
9264         struct btrfs_file_extent_item *fi;
9265         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9266         u32 nr;
9267         int found_parent = 0;
9268         int i;
9269
9270         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9271         if (!extent_buffer_uptodate(eb))
9272                 goto out;
9273
9274         nr = btrfs_header_nritems(eb);
9275         for (i = 0; i < nr; i++) {
9276                 btrfs_item_key_to_cpu(eb, &key, i);
9277                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9278                         continue;
9279
9280                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9281                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9282                         continue;
9283
9284                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9285                         found_parent = 1;
9286                         break;
9287                 }
9288         }
9289
9290 out:
9291         free_extent_buffer(eb);
9292         if (!found_parent) {
9293                 error("shared extent %llu referencer lost (parent: %llu)",
9294                         bytenr, parent);
9295                 return REFERENCER_MISSING;
9296         }
9297         return 0;
9298 }
9299
9300 /*
9301  * This function will check a given extent item, including its backref and
9302  * itself (like crossing stripe boundary and type)
9303  *
9304  * Since we don't use extent_record anymore, introduce new error bit
9305  */
9306 static int check_extent_item(struct btrfs_fs_info *fs_info,
9307                              struct extent_buffer *eb, int slot)
9308 {
9309         struct btrfs_extent_item *ei;
9310         struct btrfs_extent_inline_ref *iref;
9311         struct btrfs_extent_data_ref *dref;
9312         unsigned long end;
9313         unsigned long ptr;
9314         int type;
9315         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9316         u32 item_size = btrfs_item_size_nr(eb, slot);
9317         u64 flags;
9318         u64 offset;
9319         int metadata = 0;
9320         int level;
9321         struct btrfs_key key;
9322         int ret;
9323         int err = 0;
9324
9325         btrfs_item_key_to_cpu(eb, &key, slot);
9326         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9327                 bytes_used += key.offset;
9328         else
9329                 bytes_used += nodesize;
9330
9331         if (item_size < sizeof(*ei)) {
9332                 /*
9333                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9334                  * old thing when on disk format is still un-determined.
9335                  * No need to care about it anymore
9336                  */
9337                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9338                 return -ENOTTY;
9339         }
9340
9341         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9342         flags = btrfs_extent_flags(eb, ei);
9343
9344         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9345                 metadata = 1;
9346         if (metadata && check_crossing_stripes(global_info, key.objectid,
9347                                                eb->len)) {
9348                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9349                       key.objectid, key.objectid + nodesize);
9350                 err |= CROSSING_STRIPE_BOUNDARY;
9351         }
9352
9353         ptr = (unsigned long)(ei + 1);
9354
9355         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9356                 /* Old EXTENT_ITEM metadata */
9357                 struct btrfs_tree_block_info *info;
9358
9359                 info = (struct btrfs_tree_block_info *)ptr;
9360                 level = btrfs_tree_block_level(eb, info);
9361                 ptr += sizeof(struct btrfs_tree_block_info);
9362         } else {
9363                 /* New METADATA_ITEM */
9364                 level = key.offset;
9365         }
9366         end = (unsigned long)ei + item_size;
9367
9368         if (ptr >= end) {
9369                 err |= ITEM_SIZE_MISMATCH;
9370                 goto out;
9371         }
9372
9373         /* Now check every backref in this extent item */
9374 next:
9375         iref = (struct btrfs_extent_inline_ref *)ptr;
9376         type = btrfs_extent_inline_ref_type(eb, iref);
9377         offset = btrfs_extent_inline_ref_offset(eb, iref);
9378         switch (type) {
9379         case BTRFS_TREE_BLOCK_REF_KEY:
9380                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9381                                                level);
9382                 err |= ret;
9383                 break;
9384         case BTRFS_SHARED_BLOCK_REF_KEY:
9385                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9386                                                  level);
9387                 err |= ret;
9388                 break;
9389         case BTRFS_EXTENT_DATA_REF_KEY:
9390                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9391                 ret = check_extent_data_backref(fs_info,
9392                                 btrfs_extent_data_ref_root(eb, dref),
9393                                 btrfs_extent_data_ref_objectid(eb, dref),
9394                                 btrfs_extent_data_ref_offset(eb, dref),
9395                                 key.objectid, key.offset,
9396                                 btrfs_extent_data_ref_count(eb, dref));
9397                 err |= ret;
9398                 break;
9399         case BTRFS_SHARED_DATA_REF_KEY:
9400                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9401                 err |= ret;
9402                 break;
9403         default:
9404                 error("extent[%llu %d %llu] has unknown ref type: %d",
9405                         key.objectid, key.type, key.offset, type);
9406                 err |= UNKNOWN_TYPE;
9407                 goto out;
9408         }
9409
9410         ptr += btrfs_extent_inline_ref_size(type);
9411         if (ptr < end)
9412                 goto next;
9413
9414 out:
9415         return err;
9416 }
9417
9418 /*
9419  * Check if a dev extent item is referred correctly by its chunk
9420  */
9421 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9422                                  struct extent_buffer *eb, int slot)
9423 {
9424         struct btrfs_root *chunk_root = fs_info->chunk_root;
9425         struct btrfs_dev_extent *ptr;
9426         struct btrfs_path path;
9427         struct btrfs_key chunk_key;
9428         struct btrfs_key devext_key;
9429         struct btrfs_chunk *chunk;
9430         struct extent_buffer *l;
9431         int num_stripes;
9432         u64 length;
9433         int i;
9434         int found_chunk = 0;
9435         int ret;
9436
9437         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9438         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9439         length = btrfs_dev_extent_length(eb, ptr);
9440
9441         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9442         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9443         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9444
9445         btrfs_init_path(&path);
9446         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9447         if (ret)
9448                 goto out;
9449
9450         l = path.nodes[0];
9451         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9452         if (btrfs_chunk_length(l, chunk) != length)
9453                 goto out;
9454
9455         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9456         for (i = 0; i < num_stripes; i++) {
9457                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9458                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9459
9460                 if (devid == devext_key.objectid &&
9461                     offset == devext_key.offset) {
9462                         found_chunk = 1;
9463                         break;
9464                 }
9465         }
9466 out:
9467         btrfs_release_path(&path);
9468         if (!found_chunk) {
9469                 error(
9470                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9471                         devext_key.objectid, devext_key.offset, length);
9472                 return REFERENCER_MISSING;
9473         }
9474         return 0;
9475 }
9476
9477 /*
9478  * Check if the used space is correct with the dev item
9479  */
9480 static int check_dev_item(struct btrfs_fs_info *fs_info,
9481                           struct extent_buffer *eb, int slot)
9482 {
9483         struct btrfs_root *dev_root = fs_info->dev_root;
9484         struct btrfs_dev_item *dev_item;
9485         struct btrfs_path path;
9486         struct btrfs_key key;
9487         struct btrfs_dev_extent *ptr;
9488         u64 dev_id;
9489         u64 used;
9490         u64 total = 0;
9491         int ret;
9492
9493         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9494         dev_id = btrfs_device_id(eb, dev_item);
9495         used = btrfs_device_bytes_used(eb, dev_item);
9496
9497         key.objectid = dev_id;
9498         key.type = BTRFS_DEV_EXTENT_KEY;
9499         key.offset = 0;
9500
9501         btrfs_init_path(&path);
9502         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9503         if (ret < 0) {
9504                 btrfs_item_key_to_cpu(eb, &key, slot);
9505                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9506                         key.objectid, key.type, key.offset);
9507                 btrfs_release_path(&path);
9508                 return REFERENCER_MISSING;
9509         }
9510
9511         /* Iterate dev_extents to calculate the used space of a device */
9512         while (1) {
9513                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9514
9515                 if (key.objectid > dev_id)
9516                         break;
9517                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9518                         goto next;
9519
9520                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9521                                      struct btrfs_dev_extent);
9522                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9523 next:
9524                 ret = btrfs_next_item(dev_root, &path);
9525                 if (ret)
9526                         break;
9527         }
9528         btrfs_release_path(&path);
9529
9530         if (used != total) {
9531                 btrfs_item_key_to_cpu(eb, &key, slot);
9532                 error(
9533 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9534                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9535                         BTRFS_DEV_EXTENT_KEY, dev_id);
9536                 return ACCOUNTING_MISMATCH;
9537         }
9538         return 0;
9539 }
9540
9541 /*
9542  * Check a block group item with its referener (chunk) and its used space
9543  * with extent/metadata item
9544  */
9545 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9546                                   struct extent_buffer *eb, int slot)
9547 {
9548         struct btrfs_root *extent_root = fs_info->extent_root;
9549         struct btrfs_root *chunk_root = fs_info->chunk_root;
9550         struct btrfs_block_group_item *bi;
9551         struct btrfs_block_group_item bg_item;
9552         struct btrfs_path path;
9553         struct btrfs_key bg_key;
9554         struct btrfs_key chunk_key;
9555         struct btrfs_key extent_key;
9556         struct btrfs_chunk *chunk;
9557         struct extent_buffer *leaf;
9558         struct btrfs_extent_item *ei;
9559         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9560         u64 flags;
9561         u64 bg_flags;
9562         u64 used;
9563         u64 total = 0;
9564         int ret;
9565         int err = 0;
9566
9567         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9568         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9569         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9570         used = btrfs_block_group_used(&bg_item);
9571         bg_flags = btrfs_block_group_flags(&bg_item);
9572
9573         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9574         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9575         chunk_key.offset = bg_key.objectid;
9576
9577         btrfs_init_path(&path);
9578         /* Search for the referencer chunk */
9579         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9580         if (ret) {
9581                 error(
9582                 "block group[%llu %llu] did not find the related chunk item",
9583                         bg_key.objectid, bg_key.offset);
9584                 err |= REFERENCER_MISSING;
9585         } else {
9586                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9587                                         struct btrfs_chunk);
9588                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9589                                                 bg_key.offset) {
9590                         error(
9591         "block group[%llu %llu] related chunk item length does not match",
9592                                 bg_key.objectid, bg_key.offset);
9593                         err |= REFERENCER_MISMATCH;
9594                 }
9595         }
9596         btrfs_release_path(&path);
9597
9598         /* Search from the block group bytenr */
9599         extent_key.objectid = bg_key.objectid;
9600         extent_key.type = 0;
9601         extent_key.offset = 0;
9602
9603         btrfs_init_path(&path);
9604         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9605         if (ret < 0)
9606                 goto out;
9607
9608         /* Iterate extent tree to account used space */
9609         while (1) {
9610                 leaf = path.nodes[0];
9611                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9612                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9613                         break;
9614
9615                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9616                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9617                         goto next;
9618                 if (extent_key.objectid < bg_key.objectid)
9619                         goto next;
9620
9621                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9622                         total += nodesize;
9623                 else
9624                         total += extent_key.offset;
9625
9626                 ei = btrfs_item_ptr(leaf, path.slots[0],
9627                                     struct btrfs_extent_item);
9628                 flags = btrfs_extent_flags(leaf, ei);
9629                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9630                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9631                                 error(
9632                         "bad extent[%llu, %llu) type mismatch with chunk",
9633                                         extent_key.objectid,
9634                                         extent_key.objectid + extent_key.offset);
9635                                 err |= CHUNK_TYPE_MISMATCH;
9636                         }
9637                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9638                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9639                                     BTRFS_BLOCK_GROUP_METADATA))) {
9640                                 error(
9641                         "bad extent[%llu, %llu) type mismatch with chunk",
9642                                         extent_key.objectid,
9643                                         extent_key.objectid + nodesize);
9644                                 err |= CHUNK_TYPE_MISMATCH;
9645                         }
9646                 }
9647 next:
9648                 ret = btrfs_next_item(extent_root, &path);
9649                 if (ret)
9650                         break;
9651         }
9652
9653 out:
9654         btrfs_release_path(&path);
9655
9656         if (total != used) {
9657                 error(
9658                 "block group[%llu %llu] used %llu but extent items used %llu",
9659                         bg_key.objectid, bg_key.offset, used, total);
9660                 err |= ACCOUNTING_MISMATCH;
9661         }
9662         return err;
9663 }
9664
9665 /*
9666  * Check a chunk item.
9667  * Including checking all referred dev_extents and block group
9668  */
9669 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9670                             struct extent_buffer *eb, int slot)
9671 {
9672         struct btrfs_root *extent_root = fs_info->extent_root;
9673         struct btrfs_root *dev_root = fs_info->dev_root;
9674         struct btrfs_path path;
9675         struct btrfs_key chunk_key;
9676         struct btrfs_key bg_key;
9677         struct btrfs_key devext_key;
9678         struct btrfs_chunk *chunk;
9679         struct extent_buffer *leaf;
9680         struct btrfs_block_group_item *bi;
9681         struct btrfs_block_group_item bg_item;
9682         struct btrfs_dev_extent *ptr;
9683         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9684         u64 length;
9685         u64 chunk_end;
9686         u64 type;
9687         u64 profile;
9688         int num_stripes;
9689         u64 offset;
9690         u64 objectid;
9691         int i;
9692         int ret;
9693         int err = 0;
9694
9695         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9696         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9697         length = btrfs_chunk_length(eb, chunk);
9698         chunk_end = chunk_key.offset + length;
9699         if (!IS_ALIGNED(length, sectorsize)) {
9700                 error("chunk[%llu %llu) not aligned to %u",
9701                         chunk_key.offset, chunk_end, sectorsize);
9702                 err |= BYTES_UNALIGNED;
9703                 goto out;
9704         }
9705
9706         type = btrfs_chunk_type(eb, chunk);
9707         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9708         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9709                 error("chunk[%llu %llu) has no chunk type",
9710                         chunk_key.offset, chunk_end);
9711                 err |= UNKNOWN_TYPE;
9712         }
9713         if (profile && (profile & (profile - 1))) {
9714                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9715                         chunk_key.offset, chunk_end, profile);
9716                 err |= UNKNOWN_TYPE;
9717         }
9718
9719         bg_key.objectid = chunk_key.offset;
9720         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9721         bg_key.offset = length;
9722
9723         btrfs_init_path(&path);
9724         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9725         if (ret) {
9726                 error(
9727                 "chunk[%llu %llu) did not find the related block group item",
9728                         chunk_key.offset, chunk_end);
9729                 err |= REFERENCER_MISSING;
9730         } else{
9731                 leaf = path.nodes[0];
9732                 bi = btrfs_item_ptr(leaf, path.slots[0],
9733                                     struct btrfs_block_group_item);
9734                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9735                                    sizeof(bg_item));
9736                 if (btrfs_block_group_flags(&bg_item) != type) {
9737                         error(
9738 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9739                                 chunk_key.offset, chunk_end, type,
9740                                 btrfs_block_group_flags(&bg_item));
9741                         err |= REFERENCER_MISSING;
9742                 }
9743         }
9744
9745         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9746         for (i = 0; i < num_stripes; i++) {
9747                 btrfs_release_path(&path);
9748                 btrfs_init_path(&path);
9749                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9750                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9751                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9752
9753                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9754                                         0, 0);
9755                 if (ret)
9756                         goto not_match_dev;
9757
9758                 leaf = path.nodes[0];
9759                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9760                                      struct btrfs_dev_extent);
9761                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9762                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9763                 if (objectid != chunk_key.objectid ||
9764                     offset != chunk_key.offset ||
9765                     btrfs_dev_extent_length(leaf, ptr) != length)
9766                         goto not_match_dev;
9767                 continue;
9768 not_match_dev:
9769                 err |= BACKREF_MISSING;
9770                 error(
9771                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9772                         chunk_key.objectid, chunk_end, i);
9773                 continue;
9774         }
9775         btrfs_release_path(&path);
9776 out:
9777         return err;
9778 }
9779
9780 /*
9781  * Main entry function to check known items and update related accounting info
9782  */
9783 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9784 {
9785         struct btrfs_fs_info *fs_info = root->fs_info;
9786         struct btrfs_key key;
9787         int slot = 0;
9788         int type;
9789         struct btrfs_extent_data_ref *dref;
9790         int ret;
9791         int err = 0;
9792
9793 next:
9794         btrfs_item_key_to_cpu(eb, &key, slot);
9795         type = key.type;
9796
9797         switch (type) {
9798         case BTRFS_EXTENT_DATA_KEY:
9799                 ret = check_extent_data_item(root, eb, slot);
9800                 err |= ret;
9801                 break;
9802         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9803                 ret = check_block_group_item(fs_info, eb, slot);
9804                 err |= ret;
9805                 break;
9806         case BTRFS_DEV_ITEM_KEY:
9807                 ret = check_dev_item(fs_info, eb, slot);
9808                 err |= ret;
9809                 break;
9810         case BTRFS_CHUNK_ITEM_KEY:
9811                 ret = check_chunk_item(fs_info, eb, slot);
9812                 err |= ret;
9813                 break;
9814         case BTRFS_DEV_EXTENT_KEY:
9815                 ret = check_dev_extent_item(fs_info, eb, slot);
9816                 err |= ret;
9817                 break;
9818         case BTRFS_EXTENT_ITEM_KEY:
9819         case BTRFS_METADATA_ITEM_KEY:
9820                 ret = check_extent_item(fs_info, eb, slot);
9821                 err |= ret;
9822                 break;
9823         case BTRFS_EXTENT_CSUM_KEY:
9824                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9825                 break;
9826         case BTRFS_TREE_BLOCK_REF_KEY:
9827                 ret = check_tree_block_backref(fs_info, key.offset,
9828                                                key.objectid, -1);
9829                 err |= ret;
9830                 break;
9831         case BTRFS_EXTENT_DATA_REF_KEY:
9832                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9833                 ret = check_extent_data_backref(fs_info,
9834                                 btrfs_extent_data_ref_root(eb, dref),
9835                                 btrfs_extent_data_ref_objectid(eb, dref),
9836                                 btrfs_extent_data_ref_offset(eb, dref),
9837                                 key.objectid, 0,
9838                                 btrfs_extent_data_ref_count(eb, dref));
9839                 err |= ret;
9840                 break;
9841         case BTRFS_SHARED_BLOCK_REF_KEY:
9842                 ret = check_shared_block_backref(fs_info, key.offset,
9843                                                  key.objectid, -1);
9844                 err |= ret;
9845                 break;
9846         case BTRFS_SHARED_DATA_REF_KEY:
9847                 ret = check_shared_data_backref(fs_info, key.offset,
9848                                                 key.objectid);
9849                 err |= ret;
9850                 break;
9851         default:
9852                 break;
9853         }
9854
9855         if (++slot < btrfs_header_nritems(eb))
9856                 goto next;
9857
9858         return err;
9859 }
9860
9861 /*
9862  * Helper function for later fs/subvol tree check.  To determine if a tree
9863  * block should be checked.
9864  * This function will ensure only the direct referencer with lowest rootid to
9865  * check a fs/subvolume tree block.
9866  *
9867  * Backref check at extent tree would detect errors like missing subvolume
9868  * tree, so we can do aggressive check to reduce duplicated checks.
9869  */
9870 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9871 {
9872         struct btrfs_root *extent_root = root->fs_info->extent_root;
9873         struct btrfs_key key;
9874         struct btrfs_path path;
9875         struct extent_buffer *leaf;
9876         int slot;
9877         struct btrfs_extent_item *ei;
9878         unsigned long ptr;
9879         unsigned long end;
9880         int type;
9881         u32 item_size;
9882         u64 offset;
9883         struct btrfs_extent_inline_ref *iref;
9884         int ret;
9885
9886         btrfs_init_path(&path);
9887         key.objectid = btrfs_header_bytenr(eb);
9888         key.type = BTRFS_METADATA_ITEM_KEY;
9889         key.offset = (u64)-1;
9890
9891         /*
9892          * Any failure in backref resolving means we can't determine
9893          * whom the tree block belongs to.
9894          * So in that case, we need to check that tree block
9895          */
9896         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9897         if (ret < 0)
9898                 goto need_check;
9899
9900         ret = btrfs_previous_extent_item(extent_root, &path,
9901                                          btrfs_header_bytenr(eb));
9902         if (ret)
9903                 goto need_check;
9904
9905         leaf = path.nodes[0];
9906         slot = path.slots[0];
9907         btrfs_item_key_to_cpu(leaf, &key, slot);
9908         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9909
9910         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9911                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9912         } else {
9913                 struct btrfs_tree_block_info *info;
9914
9915                 info = (struct btrfs_tree_block_info *)(ei + 1);
9916                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9917         }
9918
9919         item_size = btrfs_item_size_nr(leaf, slot);
9920         ptr = (unsigned long)iref;
9921         end = (unsigned long)ei + item_size;
9922         while (ptr < end) {
9923                 iref = (struct btrfs_extent_inline_ref *)ptr;
9924                 type = btrfs_extent_inline_ref_type(leaf, iref);
9925                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9926
9927                 /*
9928                  * We only check the tree block if current root is
9929                  * the lowest referencer of it.
9930                  */
9931                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9932                     offset < root->objectid) {
9933                         btrfs_release_path(&path);
9934                         return 0;
9935                 }
9936
9937                 ptr += btrfs_extent_inline_ref_size(type);
9938         }
9939         /*
9940          * Normally we should also check keyed tree block ref, but that may be
9941          * very time consuming.  Inlined ref should already make us skip a lot
9942          * of refs now.  So skip search keyed tree block ref.
9943          */
9944
9945 need_check:
9946         btrfs_release_path(&path);
9947         return 1;
9948 }
9949
9950 /*
9951  * Traversal function for tree block. We will do:
9952  * 1) Skip shared fs/subvolume tree blocks
9953  * 2) Update related bytes accounting
9954  * 3) Pre-order traversal
9955  */
9956 static int traverse_tree_block(struct btrfs_root *root,
9957                                 struct extent_buffer *node)
9958 {
9959         struct extent_buffer *eb;
9960         struct btrfs_key key;
9961         struct btrfs_key drop_key;
9962         int level;
9963         u64 nr;
9964         int i;
9965         int err = 0;
9966         int ret;
9967
9968         /*
9969          * Skip shared fs/subvolume tree block, in that case they will
9970          * be checked by referencer with lowest rootid
9971          */
9972         if (is_fstree(root->objectid) && !should_check(root, node))
9973                 return 0;
9974
9975         /* Update bytes accounting */
9976         total_btree_bytes += node->len;
9977         if (fs_root_objectid(btrfs_header_owner(node)))
9978                 total_fs_tree_bytes += node->len;
9979         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9980                 total_extent_tree_bytes += node->len;
9981         if (!found_old_backref &&
9982             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9983             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9984             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9985                 found_old_backref = 1;
9986
9987         /* pre-order tranversal, check itself first */
9988         level = btrfs_header_level(node);
9989         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9990                                    btrfs_header_level(node),
9991                                    btrfs_header_owner(node));
9992         err |= ret;
9993         if (err)
9994                 error(
9995         "check %s failed root %llu bytenr %llu level %d, force continue check",
9996                         level ? "node":"leaf", root->objectid,
9997                         btrfs_header_bytenr(node), btrfs_header_level(node));
9998
9999         if (!level) {
10000                 btree_space_waste += btrfs_leaf_free_space(root, node);
10001                 ret = check_leaf_items(root, node);
10002                 err |= ret;
10003                 return err;
10004         }
10005
10006         nr = btrfs_header_nritems(node);
10007         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10008         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10009                 sizeof(struct btrfs_key_ptr);
10010
10011         /* Then check all its children */
10012         for (i = 0; i < nr; i++) {
10013                 u64 blocknr = btrfs_node_blockptr(node, i);
10014
10015                 btrfs_node_key_to_cpu(node, &key, i);
10016                 if (level == root->root_item.drop_level &&
10017                     is_dropped_key(&key, &drop_key))
10018                         continue;
10019
10020                 /*
10021                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10022                  * to call the function itself.
10023                  */
10024                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10025                 if (extent_buffer_uptodate(eb)) {
10026                         ret = traverse_tree_block(root, eb);
10027                         err |= ret;
10028                 }
10029                 free_extent_buffer(eb);
10030         }
10031
10032         return err;
10033 }
10034
10035 /*
10036  * Low memory usage version check_chunks_and_extents.
10037  */
10038 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10039 {
10040         struct btrfs_path path;
10041         struct btrfs_key key;
10042         struct btrfs_root *root1;
10043         struct btrfs_root *cur_root;
10044         int err = 0;
10045         int ret;
10046
10047         root1 = root->fs_info->chunk_root;
10048         ret = traverse_tree_block(root1, root1->node);
10049         err |= ret;
10050
10051         root1 = root->fs_info->tree_root;
10052         ret = traverse_tree_block(root1, root1->node);
10053         err |= ret;
10054
10055         btrfs_init_path(&path);
10056         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10057         key.offset = 0;
10058         key.type = BTRFS_ROOT_ITEM_KEY;
10059
10060         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10061         if (ret) {
10062                 error("cannot find extent treet in tree_root");
10063                 goto out;
10064         }
10065
10066         while (1) {
10067                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10068                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10069                         goto next;
10070                 key.offset = (u64)-1;
10071
10072                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10073                 if (IS_ERR(cur_root) || !cur_root) {
10074                         error("failed to read tree: %lld", key.objectid);
10075                         goto next;
10076                 }
10077
10078                 ret = traverse_tree_block(cur_root, cur_root->node);
10079                 err |= ret;
10080
10081 next:
10082                 ret = btrfs_next_item(root1, &path);
10083                 if (ret)
10084                         goto out;
10085         }
10086
10087 out:
10088         btrfs_release_path(&path);
10089         return err;
10090 }
10091
10092 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10093                            struct btrfs_root *root, int overwrite)
10094 {
10095         struct extent_buffer *c;
10096         struct extent_buffer *old = root->node;
10097         int level;
10098         int ret;
10099         struct btrfs_disk_key disk_key = {0,0,0};
10100
10101         level = 0;
10102
10103         if (overwrite) {
10104                 c = old;
10105                 extent_buffer_get(c);
10106                 goto init;
10107         }
10108         c = btrfs_alloc_free_block(trans, root,
10109                                    root->nodesize,
10110                                    root->root_key.objectid,
10111                                    &disk_key, level, 0, 0);
10112         if (IS_ERR(c)) {
10113                 c = old;
10114                 extent_buffer_get(c);
10115                 overwrite = 1;
10116         }
10117 init:
10118         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10119         btrfs_set_header_level(c, level);
10120         btrfs_set_header_bytenr(c, c->start);
10121         btrfs_set_header_generation(c, trans->transid);
10122         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10123         btrfs_set_header_owner(c, root->root_key.objectid);
10124
10125         write_extent_buffer(c, root->fs_info->fsid,
10126                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10127
10128         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10129                             btrfs_header_chunk_tree_uuid(c),
10130                             BTRFS_UUID_SIZE);
10131
10132         btrfs_mark_buffer_dirty(c);
10133         /*
10134          * this case can happen in the following case:
10135          *
10136          * 1.overwrite previous root.
10137          *
10138          * 2.reinit reloc data root, this is because we skip pin
10139          * down reloc data tree before which means we can allocate
10140          * same block bytenr here.
10141          */
10142         if (old->start == c->start) {
10143                 btrfs_set_root_generation(&root->root_item,
10144                                           trans->transid);
10145                 root->root_item.level = btrfs_header_level(root->node);
10146                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10147                                         &root->root_key, &root->root_item);
10148                 if (ret) {
10149                         free_extent_buffer(c);
10150                         return ret;
10151                 }
10152         }
10153         free_extent_buffer(old);
10154         root->node = c;
10155         add_root_to_dirty_list(root);
10156         return 0;
10157 }
10158
10159 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10160                                 struct extent_buffer *eb, int tree_root)
10161 {
10162         struct extent_buffer *tmp;
10163         struct btrfs_root_item *ri;
10164         struct btrfs_key key;
10165         u64 bytenr;
10166         u32 nodesize;
10167         int level = btrfs_header_level(eb);
10168         int nritems;
10169         int ret;
10170         int i;
10171
10172         /*
10173          * If we have pinned this block before, don't pin it again.
10174          * This can not only avoid forever loop with broken filesystem
10175          * but also give us some speedups.
10176          */
10177         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10178                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10179                 return 0;
10180
10181         btrfs_pin_extent(fs_info, eb->start, eb->len);
10182
10183         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10184         nritems = btrfs_header_nritems(eb);
10185         for (i = 0; i < nritems; i++) {
10186                 if (level == 0) {
10187                         btrfs_item_key_to_cpu(eb, &key, i);
10188                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10189                                 continue;
10190                         /* Skip the extent root and reloc roots */
10191                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10192                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10193                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10194                                 continue;
10195                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10196                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10197
10198                         /*
10199                          * If at any point we start needing the real root we
10200                          * will have to build a stump root for the root we are
10201                          * in, but for now this doesn't actually use the root so
10202                          * just pass in extent_root.
10203                          */
10204                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10205                                               nodesize, 0);
10206                         if (!extent_buffer_uptodate(tmp)) {
10207                                 fprintf(stderr, "Error reading root block\n");
10208                                 return -EIO;
10209                         }
10210                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10211                         free_extent_buffer(tmp);
10212                         if (ret)
10213                                 return ret;
10214                 } else {
10215                         bytenr = btrfs_node_blockptr(eb, i);
10216
10217                         /* If we aren't the tree root don't read the block */
10218                         if (level == 1 && !tree_root) {
10219                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10220                                 continue;
10221                         }
10222
10223                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10224                                               nodesize, 0);
10225                         if (!extent_buffer_uptodate(tmp)) {
10226                                 fprintf(stderr, "Error reading tree block\n");
10227                                 return -EIO;
10228                         }
10229                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10230                         free_extent_buffer(tmp);
10231                         if (ret)
10232                                 return ret;
10233                 }
10234         }
10235
10236         return 0;
10237 }
10238
10239 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10240 {
10241         int ret;
10242
10243         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10244         if (ret)
10245                 return ret;
10246
10247         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10248 }
10249
10250 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10251 {
10252         struct btrfs_block_group_cache *cache;
10253         struct btrfs_path *path;
10254         struct extent_buffer *leaf;
10255         struct btrfs_chunk *chunk;
10256         struct btrfs_key key;
10257         int ret;
10258         u64 start;
10259
10260         path = btrfs_alloc_path();
10261         if (!path)
10262                 return -ENOMEM;
10263
10264         key.objectid = 0;
10265         key.type = BTRFS_CHUNK_ITEM_KEY;
10266         key.offset = 0;
10267
10268         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10269         if (ret < 0) {
10270                 btrfs_free_path(path);
10271                 return ret;
10272         }
10273
10274         /*
10275          * We do this in case the block groups were screwed up and had alloc
10276          * bits that aren't actually set on the chunks.  This happens with
10277          * restored images every time and could happen in real life I guess.
10278          */
10279         fs_info->avail_data_alloc_bits = 0;
10280         fs_info->avail_metadata_alloc_bits = 0;
10281         fs_info->avail_system_alloc_bits = 0;
10282
10283         /* First we need to create the in-memory block groups */
10284         while (1) {
10285                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10286                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10287                         if (ret < 0) {
10288                                 btrfs_free_path(path);
10289                                 return ret;
10290                         }
10291                         if (ret) {
10292                                 ret = 0;
10293                                 break;
10294                         }
10295                 }
10296                 leaf = path->nodes[0];
10297                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10298                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10299                         path->slots[0]++;
10300                         continue;
10301                 }
10302
10303                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10304                                        struct btrfs_chunk);
10305                 btrfs_add_block_group(fs_info, 0,
10306                                       btrfs_chunk_type(leaf, chunk),
10307                                       key.objectid, key.offset,
10308                                       btrfs_chunk_length(leaf, chunk));
10309                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10310                                  key.offset + btrfs_chunk_length(leaf, chunk),
10311                                  GFP_NOFS);
10312                 path->slots[0]++;
10313         }
10314         start = 0;
10315         while (1) {
10316                 cache = btrfs_lookup_first_block_group(fs_info, start);
10317                 if (!cache)
10318                         break;
10319                 cache->cached = 1;
10320                 start = cache->key.objectid + cache->key.offset;
10321         }
10322
10323         btrfs_free_path(path);
10324         return 0;
10325 }
10326
10327 static int reset_balance(struct btrfs_trans_handle *trans,
10328                          struct btrfs_fs_info *fs_info)
10329 {
10330         struct btrfs_root *root = fs_info->tree_root;
10331         struct btrfs_path *path;
10332         struct extent_buffer *leaf;
10333         struct btrfs_key key;
10334         int del_slot, del_nr = 0;
10335         int ret;
10336         int found = 0;
10337
10338         path = btrfs_alloc_path();
10339         if (!path)
10340                 return -ENOMEM;
10341
10342         key.objectid = BTRFS_BALANCE_OBJECTID;
10343         key.type = BTRFS_BALANCE_ITEM_KEY;
10344         key.offset = 0;
10345
10346         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10347         if (ret) {
10348                 if (ret > 0)
10349                         ret = 0;
10350                 if (!ret)
10351                         goto reinit_data_reloc;
10352                 else
10353                         goto out;
10354         }
10355
10356         ret = btrfs_del_item(trans, root, path);
10357         if (ret)
10358                 goto out;
10359         btrfs_release_path(path);
10360
10361         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10362         key.type = BTRFS_ROOT_ITEM_KEY;
10363         key.offset = 0;
10364
10365         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10366         if (ret < 0)
10367                 goto out;
10368         while (1) {
10369                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10370                         if (!found)
10371                                 break;
10372
10373                         if (del_nr) {
10374                                 ret = btrfs_del_items(trans, root, path,
10375                                                       del_slot, del_nr);
10376                                 del_nr = 0;
10377                                 if (ret)
10378                                         goto out;
10379                         }
10380                         key.offset++;
10381                         btrfs_release_path(path);
10382
10383                         found = 0;
10384                         ret = btrfs_search_slot(trans, root, &key, path,
10385                                                 -1, 1);
10386                         if (ret < 0)
10387                                 goto out;
10388                         continue;
10389                 }
10390                 found = 1;
10391                 leaf = path->nodes[0];
10392                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10393                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10394                         break;
10395                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10396                         path->slots[0]++;
10397                         continue;
10398                 }
10399                 if (!del_nr) {
10400                         del_slot = path->slots[0];
10401                         del_nr = 1;
10402                 } else {
10403                         del_nr++;
10404                 }
10405                 path->slots[0]++;
10406         }
10407
10408         if (del_nr) {
10409                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10410                 if (ret)
10411                         goto out;
10412         }
10413         btrfs_release_path(path);
10414
10415 reinit_data_reloc:
10416         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10417         key.type = BTRFS_ROOT_ITEM_KEY;
10418         key.offset = (u64)-1;
10419         root = btrfs_read_fs_root(fs_info, &key);
10420         if (IS_ERR(root)) {
10421                 fprintf(stderr, "Error reading data reloc tree\n");
10422                 ret = PTR_ERR(root);
10423                 goto out;
10424         }
10425         record_root_in_trans(trans, root);
10426         ret = btrfs_fsck_reinit_root(trans, root, 0);
10427         if (ret)
10428                 goto out;
10429         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10430 out:
10431         btrfs_free_path(path);
10432         return ret;
10433 }
10434
10435 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10436                               struct btrfs_fs_info *fs_info)
10437 {
10438         u64 start = 0;
10439         int ret;
10440
10441         /*
10442          * The only reason we don't do this is because right now we're just
10443          * walking the trees we find and pinning down their bytes, we don't look
10444          * at any of the leaves.  In order to do mixed groups we'd have to check
10445          * the leaves of any fs roots and pin down the bytes for any file
10446          * extents we find.  Not hard but why do it if we don't have to?
10447          */
10448         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10449                 fprintf(stderr, "We don't support re-initing the extent tree "
10450                         "for mixed block groups yet, please notify a btrfs "
10451                         "developer you want to do this so they can add this "
10452                         "functionality.\n");
10453                 return -EINVAL;
10454         }
10455
10456         /*
10457          * first we need to walk all of the trees except the extent tree and pin
10458          * down the bytes that are in use so we don't overwrite any existing
10459          * metadata.
10460          */
10461         ret = pin_metadata_blocks(fs_info);
10462         if (ret) {
10463                 fprintf(stderr, "error pinning down used bytes\n");
10464                 return ret;
10465         }
10466
10467         /*
10468          * Need to drop all the block groups since we're going to recreate all
10469          * of them again.
10470          */
10471         btrfs_free_block_groups(fs_info);
10472         ret = reset_block_groups(fs_info);
10473         if (ret) {
10474                 fprintf(stderr, "error resetting the block groups\n");
10475                 return ret;
10476         }
10477
10478         /* Ok we can allocate now, reinit the extent root */
10479         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10480         if (ret) {
10481                 fprintf(stderr, "extent root initialization failed\n");
10482                 /*
10483                  * When the transaction code is updated we should end the
10484                  * transaction, but for now progs only knows about commit so
10485                  * just return an error.
10486                  */
10487                 return ret;
10488         }
10489
10490         /*
10491          * Now we have all the in-memory block groups setup so we can make
10492          * allocations properly, and the metadata we care about is safe since we
10493          * pinned all of it above.
10494          */
10495         while (1) {
10496                 struct btrfs_block_group_cache *cache;
10497
10498                 cache = btrfs_lookup_first_block_group(fs_info, start);
10499                 if (!cache)
10500                         break;
10501                 start = cache->key.objectid + cache->key.offset;
10502                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10503                                         &cache->key, &cache->item,
10504                                         sizeof(cache->item));
10505                 if (ret) {
10506                         fprintf(stderr, "Error adding block group\n");
10507                         return ret;
10508                 }
10509                 btrfs_extent_post_op(trans, fs_info->extent_root);
10510         }
10511
10512         ret = reset_balance(trans, fs_info);
10513         if (ret)
10514                 fprintf(stderr, "error resetting the pending balance\n");
10515
10516         return ret;
10517 }
10518
10519 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10520 {
10521         struct btrfs_path *path;
10522         struct btrfs_trans_handle *trans;
10523         struct btrfs_key key;
10524         int ret;
10525
10526         printf("Recowing metadata block %llu\n", eb->start);
10527         key.objectid = btrfs_header_owner(eb);
10528         key.type = BTRFS_ROOT_ITEM_KEY;
10529         key.offset = (u64)-1;
10530
10531         root = btrfs_read_fs_root(root->fs_info, &key);
10532         if (IS_ERR(root)) {
10533                 fprintf(stderr, "Couldn't find owner root %llu\n",
10534                         key.objectid);
10535                 return PTR_ERR(root);
10536         }
10537
10538         path = btrfs_alloc_path();
10539         if (!path)
10540                 return -ENOMEM;
10541
10542         trans = btrfs_start_transaction(root, 1);
10543         if (IS_ERR(trans)) {
10544                 btrfs_free_path(path);
10545                 return PTR_ERR(trans);
10546         }
10547
10548         path->lowest_level = btrfs_header_level(eb);
10549         if (path->lowest_level)
10550                 btrfs_node_key_to_cpu(eb, &key, 0);
10551         else
10552                 btrfs_item_key_to_cpu(eb, &key, 0);
10553
10554         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10555         btrfs_commit_transaction(trans, root);
10556         btrfs_free_path(path);
10557         return ret;
10558 }
10559
10560 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10561 {
10562         struct btrfs_path *path;
10563         struct btrfs_trans_handle *trans;
10564         struct btrfs_key key;
10565         int ret;
10566
10567         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10568                bad->key.type, bad->key.offset);
10569         key.objectid = bad->root_id;
10570         key.type = BTRFS_ROOT_ITEM_KEY;
10571         key.offset = (u64)-1;
10572
10573         root = btrfs_read_fs_root(root->fs_info, &key);
10574         if (IS_ERR(root)) {
10575                 fprintf(stderr, "Couldn't find owner root %llu\n",
10576                         key.objectid);
10577                 return PTR_ERR(root);
10578         }
10579
10580         path = btrfs_alloc_path();
10581         if (!path)
10582                 return -ENOMEM;
10583
10584         trans = btrfs_start_transaction(root, 1);
10585         if (IS_ERR(trans)) {
10586                 btrfs_free_path(path);
10587                 return PTR_ERR(trans);
10588         }
10589
10590         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10591         if (ret) {
10592                 if (ret > 0)
10593                         ret = 0;
10594                 goto out;
10595         }
10596         ret = btrfs_del_item(trans, root, path);
10597 out:
10598         btrfs_commit_transaction(trans, root);
10599         btrfs_free_path(path);
10600         return ret;
10601 }
10602
10603 static int zero_log_tree(struct btrfs_root *root)
10604 {
10605         struct btrfs_trans_handle *trans;
10606         int ret;
10607
10608         trans = btrfs_start_transaction(root, 1);
10609         if (IS_ERR(trans)) {
10610                 ret = PTR_ERR(trans);
10611                 return ret;
10612         }
10613         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10614         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10615         ret = btrfs_commit_transaction(trans, root);
10616         return ret;
10617 }
10618
10619 static int populate_csum(struct btrfs_trans_handle *trans,
10620                          struct btrfs_root *csum_root, char *buf, u64 start,
10621                          u64 len)
10622 {
10623         u64 offset = 0;
10624         u64 sectorsize;
10625         int ret = 0;
10626
10627         while (offset < len) {
10628                 sectorsize = csum_root->sectorsize;
10629                 ret = read_extent_data(csum_root, buf, start + offset,
10630                                        &sectorsize, 0);
10631                 if (ret)
10632                         break;
10633                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10634                                             start + offset, buf, sectorsize);
10635                 if (ret)
10636                         break;
10637                 offset += sectorsize;
10638         }
10639         return ret;
10640 }
10641
10642 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10643                                       struct btrfs_root *csum_root,
10644                                       struct btrfs_root *cur_root)
10645 {
10646         struct btrfs_path *path;
10647         struct btrfs_key key;
10648         struct extent_buffer *node;
10649         struct btrfs_file_extent_item *fi;
10650         char *buf = NULL;
10651         u64 start = 0;
10652         u64 len = 0;
10653         int slot = 0;
10654         int ret = 0;
10655
10656         path = btrfs_alloc_path();
10657         if (!path)
10658                 return -ENOMEM;
10659         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10660         if (!buf) {
10661                 ret = -ENOMEM;
10662                 goto out;
10663         }
10664
10665         key.objectid = 0;
10666         key.offset = 0;
10667         key.type = 0;
10668
10669         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10670         if (ret < 0)
10671                 goto out;
10672         /* Iterate all regular file extents and fill its csum */
10673         while (1) {
10674                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10675
10676                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10677                         goto next;
10678                 node = path->nodes[0];
10679                 slot = path->slots[0];
10680                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10681                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10682                         goto next;
10683                 start = btrfs_file_extent_disk_bytenr(node, fi);
10684                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10685
10686                 ret = populate_csum(trans, csum_root, buf, start, len);
10687                 if (ret == -EEXIST)
10688                         ret = 0;
10689                 if (ret < 0)
10690                         goto out;
10691 next:
10692                 /*
10693                  * TODO: if next leaf is corrupted, jump to nearest next valid
10694                  * leaf.
10695                  */
10696                 ret = btrfs_next_item(cur_root, path);
10697                 if (ret < 0)
10698                         goto out;
10699                 if (ret > 0) {
10700                         ret = 0;
10701                         goto out;
10702                 }
10703         }
10704
10705 out:
10706         btrfs_free_path(path);
10707         free(buf);
10708         return ret;
10709 }
10710
10711 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10712                                   struct btrfs_root *csum_root)
10713 {
10714         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10715         struct btrfs_path *path;
10716         struct btrfs_root *tree_root = fs_info->tree_root;
10717         struct btrfs_root *cur_root;
10718         struct extent_buffer *node;
10719         struct btrfs_key key;
10720         int slot = 0;
10721         int ret = 0;
10722
10723         path = btrfs_alloc_path();
10724         if (!path)
10725                 return -ENOMEM;
10726
10727         key.objectid = BTRFS_FS_TREE_OBJECTID;
10728         key.offset = 0;
10729         key.type = BTRFS_ROOT_ITEM_KEY;
10730
10731         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10732         if (ret < 0)
10733                 goto out;
10734         if (ret > 0) {
10735                 ret = -ENOENT;
10736                 goto out;
10737         }
10738
10739         while (1) {
10740                 node = path->nodes[0];
10741                 slot = path->slots[0];
10742                 btrfs_item_key_to_cpu(node, &key, slot);
10743                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10744                         goto out;
10745                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10746                         goto next;
10747                 if (!is_fstree(key.objectid))
10748                         goto next;
10749                 key.offset = (u64)-1;
10750
10751                 cur_root = btrfs_read_fs_root(fs_info, &key);
10752                 if (IS_ERR(cur_root) || !cur_root) {
10753                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10754                                 key.objectid);
10755                         goto out;
10756                 }
10757                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10758                                 cur_root);
10759                 if (ret < 0)
10760                         goto out;
10761 next:
10762                 ret = btrfs_next_item(tree_root, path);
10763                 if (ret > 0) {
10764                         ret = 0;
10765                         goto out;
10766                 }
10767                 if (ret < 0)
10768                         goto out;
10769         }
10770
10771 out:
10772         btrfs_free_path(path);
10773         return ret;
10774 }
10775
10776 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10777                                       struct btrfs_root *csum_root)
10778 {
10779         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10780         struct btrfs_path *path;
10781         struct btrfs_extent_item *ei;
10782         struct extent_buffer *leaf;
10783         char *buf;
10784         struct btrfs_key key;
10785         int ret;
10786
10787         path = btrfs_alloc_path();
10788         if (!path)
10789                 return -ENOMEM;
10790
10791         key.objectid = 0;
10792         key.type = BTRFS_EXTENT_ITEM_KEY;
10793         key.offset = 0;
10794
10795         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10796         if (ret < 0) {
10797                 btrfs_free_path(path);
10798                 return ret;
10799         }
10800
10801         buf = malloc(csum_root->sectorsize);
10802         if (!buf) {
10803                 btrfs_free_path(path);
10804                 return -ENOMEM;
10805         }
10806
10807         while (1) {
10808                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10809                         ret = btrfs_next_leaf(extent_root, path);
10810                         if (ret < 0)
10811                                 break;
10812                         if (ret) {
10813                                 ret = 0;
10814                                 break;
10815                         }
10816                 }
10817                 leaf = path->nodes[0];
10818
10819                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10820                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10821                         path->slots[0]++;
10822                         continue;
10823                 }
10824
10825                 ei = btrfs_item_ptr(leaf, path->slots[0],
10826                                     struct btrfs_extent_item);
10827                 if (!(btrfs_extent_flags(leaf, ei) &
10828                       BTRFS_EXTENT_FLAG_DATA)) {
10829                         path->slots[0]++;
10830                         continue;
10831                 }
10832
10833                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10834                                     key.offset);
10835                 if (ret)
10836                         break;
10837                 path->slots[0]++;
10838         }
10839
10840         btrfs_free_path(path);
10841         free(buf);
10842         return ret;
10843 }
10844
10845 /*
10846  * Recalculate the csum and put it into the csum tree.
10847  *
10848  * Extent tree init will wipe out all the extent info, so in that case, we
10849  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10850  * will use fs/subvol trees to init the csum tree.
10851  */
10852 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10853                           struct btrfs_root *csum_root,
10854                           int search_fs_tree)
10855 {
10856         if (search_fs_tree)
10857                 return fill_csum_tree_from_fs(trans, csum_root);
10858         else
10859                 return fill_csum_tree_from_extent(trans, csum_root);
10860 }
10861
10862 static void free_roots_info_cache(void)
10863 {
10864         if (!roots_info_cache)
10865                 return;
10866
10867         while (!cache_tree_empty(roots_info_cache)) {
10868                 struct cache_extent *entry;
10869                 struct root_item_info *rii;
10870
10871                 entry = first_cache_extent(roots_info_cache);
10872                 if (!entry)
10873                         break;
10874                 remove_cache_extent(roots_info_cache, entry);
10875                 rii = container_of(entry, struct root_item_info, cache_extent);
10876                 free(rii);
10877         }
10878
10879         free(roots_info_cache);
10880         roots_info_cache = NULL;
10881 }
10882
10883 static int build_roots_info_cache(struct btrfs_fs_info *info)
10884 {
10885         int ret = 0;
10886         struct btrfs_key key;
10887         struct extent_buffer *leaf;
10888         struct btrfs_path *path;
10889
10890         if (!roots_info_cache) {
10891                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10892                 if (!roots_info_cache)
10893                         return -ENOMEM;
10894                 cache_tree_init(roots_info_cache);
10895         }
10896
10897         path = btrfs_alloc_path();
10898         if (!path)
10899                 return -ENOMEM;
10900
10901         key.objectid = 0;
10902         key.type = BTRFS_EXTENT_ITEM_KEY;
10903         key.offset = 0;
10904
10905         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10906         if (ret < 0)
10907                 goto out;
10908         leaf = path->nodes[0];
10909
10910         while (1) {
10911                 struct btrfs_key found_key;
10912                 struct btrfs_extent_item *ei;
10913                 struct btrfs_extent_inline_ref *iref;
10914                 int slot = path->slots[0];
10915                 int type;
10916                 u64 flags;
10917                 u64 root_id;
10918                 u8 level;
10919                 struct cache_extent *entry;
10920                 struct root_item_info *rii;
10921
10922                 if (slot >= btrfs_header_nritems(leaf)) {
10923                         ret = btrfs_next_leaf(info->extent_root, path);
10924                         if (ret < 0) {
10925                                 break;
10926                         } else if (ret) {
10927                                 ret = 0;
10928                                 break;
10929                         }
10930                         leaf = path->nodes[0];
10931                         slot = path->slots[0];
10932                 }
10933
10934                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10935
10936                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10937                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10938                         goto next;
10939
10940                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10941                 flags = btrfs_extent_flags(leaf, ei);
10942
10943                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10944                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10945                         goto next;
10946
10947                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10948                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10949                         level = found_key.offset;
10950                 } else {
10951                         struct btrfs_tree_block_info *binfo;
10952
10953                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10954                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10955                         level = btrfs_tree_block_level(leaf, binfo);
10956                 }
10957
10958                 /*
10959                  * For a root extent, it must be of the following type and the
10960                  * first (and only one) iref in the item.
10961                  */
10962                 type = btrfs_extent_inline_ref_type(leaf, iref);
10963                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10964                         goto next;
10965
10966                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10967                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10968                 if (!entry) {
10969                         rii = malloc(sizeof(struct root_item_info));
10970                         if (!rii) {
10971                                 ret = -ENOMEM;
10972                                 goto out;
10973                         }
10974                         rii->cache_extent.start = root_id;
10975                         rii->cache_extent.size = 1;
10976                         rii->level = (u8)-1;
10977                         entry = &rii->cache_extent;
10978                         ret = insert_cache_extent(roots_info_cache, entry);
10979                         ASSERT(ret == 0);
10980                 } else {
10981                         rii = container_of(entry, struct root_item_info,
10982                                            cache_extent);
10983                 }
10984
10985                 ASSERT(rii->cache_extent.start == root_id);
10986                 ASSERT(rii->cache_extent.size == 1);
10987
10988                 if (level > rii->level || rii->level == (u8)-1) {
10989                         rii->level = level;
10990                         rii->bytenr = found_key.objectid;
10991                         rii->gen = btrfs_extent_generation(leaf, ei);
10992                         rii->node_count = 1;
10993                 } else if (level == rii->level) {
10994                         rii->node_count++;
10995                 }
10996 next:
10997                 path->slots[0]++;
10998         }
10999
11000 out:
11001         btrfs_free_path(path);
11002
11003         return ret;
11004 }
11005
11006 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11007                                   struct btrfs_path *path,
11008                                   const struct btrfs_key *root_key,
11009                                   const int read_only_mode)
11010 {
11011         const u64 root_id = root_key->objectid;
11012         struct cache_extent *entry;
11013         struct root_item_info *rii;
11014         struct btrfs_root_item ri;
11015         unsigned long offset;
11016
11017         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11018         if (!entry) {
11019                 fprintf(stderr,
11020                         "Error: could not find extent items for root %llu\n",
11021                         root_key->objectid);
11022                 return -ENOENT;
11023         }
11024
11025         rii = container_of(entry, struct root_item_info, cache_extent);
11026         ASSERT(rii->cache_extent.start == root_id);
11027         ASSERT(rii->cache_extent.size == 1);
11028
11029         if (rii->node_count != 1) {
11030                 fprintf(stderr,
11031                         "Error: could not find btree root extent for root %llu\n",
11032                         root_id);
11033                 return -ENOENT;
11034         }
11035
11036         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11037         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11038
11039         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11040             btrfs_root_level(&ri) != rii->level ||
11041             btrfs_root_generation(&ri) != rii->gen) {
11042
11043                 /*
11044                  * If we're in repair mode but our caller told us to not update
11045                  * the root item, i.e. just check if it needs to be updated, don't
11046                  * print this message, since the caller will call us again shortly
11047                  * for the same root item without read only mode (the caller will
11048                  * open a transaction first).
11049                  */
11050                 if (!(read_only_mode && repair))
11051                         fprintf(stderr,
11052                                 "%sroot item for root %llu,"
11053                                 " current bytenr %llu, current gen %llu, current level %u,"
11054                                 " new bytenr %llu, new gen %llu, new level %u\n",
11055                                 (read_only_mode ? "" : "fixing "),
11056                                 root_id,
11057                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11058                                 btrfs_root_level(&ri),
11059                                 rii->bytenr, rii->gen, rii->level);
11060
11061                 if (btrfs_root_generation(&ri) > rii->gen) {
11062                         fprintf(stderr,
11063                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11064                                 root_id, btrfs_root_generation(&ri), rii->gen);
11065                         return -EINVAL;
11066                 }
11067
11068                 if (!read_only_mode) {
11069                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11070                         btrfs_set_root_level(&ri, rii->level);
11071                         btrfs_set_root_generation(&ri, rii->gen);
11072                         write_extent_buffer(path->nodes[0], &ri,
11073                                             offset, sizeof(ri));
11074                 }
11075
11076                 return 1;
11077         }
11078
11079         return 0;
11080 }
11081
11082 /*
11083  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11084  * caused read-only snapshots to be corrupted if they were created at a moment
11085  * when the source subvolume/snapshot had orphan items. The issue was that the
11086  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11087  * node instead of the post orphan cleanup root node.
11088  * So this function, and its callees, just detects and fixes those cases. Even
11089  * though the regression was for read-only snapshots, this function applies to
11090  * any snapshot/subvolume root.
11091  * This must be run before any other repair code - not doing it so, makes other
11092  * repair code delete or modify backrefs in the extent tree for example, which
11093  * will result in an inconsistent fs after repairing the root items.
11094  */
11095 static int repair_root_items(struct btrfs_fs_info *info)
11096 {
11097         struct btrfs_path *path = NULL;
11098         struct btrfs_key key;
11099         struct extent_buffer *leaf;
11100         struct btrfs_trans_handle *trans = NULL;
11101         int ret = 0;
11102         int bad_roots = 0;
11103         int need_trans = 0;
11104
11105         ret = build_roots_info_cache(info);
11106         if (ret)
11107                 goto out;
11108
11109         path = btrfs_alloc_path();
11110         if (!path) {
11111                 ret = -ENOMEM;
11112                 goto out;
11113         }
11114
11115         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11116         key.type = BTRFS_ROOT_ITEM_KEY;
11117         key.offset = 0;
11118
11119 again:
11120         /*
11121          * Avoid opening and committing transactions if a leaf doesn't have
11122          * any root items that need to be fixed, so that we avoid rotating
11123          * backup roots unnecessarily.
11124          */
11125         if (need_trans) {
11126                 trans = btrfs_start_transaction(info->tree_root, 1);
11127                 if (IS_ERR(trans)) {
11128                         ret = PTR_ERR(trans);
11129                         goto out;
11130                 }
11131         }
11132
11133         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11134                                 0, trans ? 1 : 0);
11135         if (ret < 0)
11136                 goto out;
11137         leaf = path->nodes[0];
11138
11139         while (1) {
11140                 struct btrfs_key found_key;
11141
11142                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11143                         int no_more_keys = find_next_key(path, &key);
11144
11145                         btrfs_release_path(path);
11146                         if (trans) {
11147                                 ret = btrfs_commit_transaction(trans,
11148                                                                info->tree_root);
11149                                 trans = NULL;
11150                                 if (ret < 0)
11151                                         goto out;
11152                         }
11153                         need_trans = 0;
11154                         if (no_more_keys)
11155                                 break;
11156                         goto again;
11157                 }
11158
11159                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11160
11161                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11162                         goto next;
11163                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11164                         goto next;
11165
11166                 ret = maybe_repair_root_item(info, path, &found_key,
11167                                              trans ? 0 : 1);
11168                 if (ret < 0)
11169                         goto out;
11170                 if (ret) {
11171                         if (!trans && repair) {
11172                                 need_trans = 1;
11173                                 key = found_key;
11174                                 btrfs_release_path(path);
11175                                 goto again;
11176                         }
11177                         bad_roots++;
11178                 }
11179 next:
11180                 path->slots[0]++;
11181         }
11182         ret = 0;
11183 out:
11184         free_roots_info_cache();
11185         btrfs_free_path(path);
11186         if (trans)
11187                 btrfs_commit_transaction(trans, info->tree_root);
11188         if (ret < 0)
11189                 return ret;
11190
11191         return bad_roots;
11192 }
11193
11194 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11195 {
11196         struct btrfs_trans_handle *trans;
11197         struct btrfs_block_group_cache *bg_cache;
11198         u64 current = 0;
11199         int ret = 0;
11200
11201         /* Clear all free space cache inodes and its extent data */
11202         while (1) {
11203                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11204                 if (!bg_cache)
11205                         break;
11206                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11207                 if (ret < 0)
11208                         return ret;
11209                 current = bg_cache->key.objectid + bg_cache->key.offset;
11210         }
11211
11212         /* Don't forget to set cache_generation to -1 */
11213         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11214         if (IS_ERR(trans)) {
11215                 error("failed to update super block cache generation");
11216                 return PTR_ERR(trans);
11217         }
11218         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11219         btrfs_commit_transaction(trans, fs_info->tree_root);
11220
11221         return ret;
11222 }
11223
11224 const char * const cmd_check_usage[] = {
11225         "btrfs check [options] <device>",
11226         "Check structural integrity of a filesystem (unmounted).",
11227         "Check structural integrity of an unmounted filesystem. Verify internal",
11228         "trees' consistency and item connectivity. In the repair mode try to",
11229         "fix the problems found. ",
11230         "WARNING: the repair mode is considered dangerous",
11231         "",
11232         "-s|--super <superblock>     use this superblock copy",
11233         "-b|--backup                 use the first valid backup root copy",
11234         "--repair                    try to repair the filesystem",
11235         "--readonly                  run in read-only mode (default)",
11236         "--init-csum-tree            create a new CRC tree",
11237         "--init-extent-tree          create a new extent tree",
11238         "--mode <MODE>               allows choice of memory/IO trade-offs",
11239         "                            where MODE is one of:",
11240         "                            original - read inodes and extents to memory (requires",
11241         "                                       more memory, does less IO)",
11242         "                            lowmem   - try to use less memory but read blocks again",
11243         "                                       when needed",
11244         "--check-data-csum           verify checksums of data blocks",
11245         "-Q|--qgroup-report          print a report on qgroup consistency",
11246         "-E|--subvol-extents <subvolid>",
11247         "                            print subvolume extents and sharing state",
11248         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11249         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11250         "-p|--progress               indicate progress",
11251         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11252         "                            NOTE: v1 support implemented",
11253         NULL
11254 };
11255
11256 int cmd_check(int argc, char **argv)
11257 {
11258         struct cache_tree root_cache;
11259         struct btrfs_root *root;
11260         struct btrfs_fs_info *info;
11261         u64 bytenr = 0;
11262         u64 subvolid = 0;
11263         u64 tree_root_bytenr = 0;
11264         u64 chunk_root_bytenr = 0;
11265         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11266         int ret;
11267         u64 num;
11268         int init_csum_tree = 0;
11269         int readonly = 0;
11270         int clear_space_cache = 0;
11271         int qgroup_report = 0;
11272         int qgroups_repaired = 0;
11273         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11274
11275         while(1) {
11276                 int c;
11277                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11278                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11279                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11280                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11281                 static const struct option long_options[] = {
11282                         { "super", required_argument, NULL, 's' },
11283                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11284                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11285                         { "init-csum-tree", no_argument, NULL,
11286                                 GETOPT_VAL_INIT_CSUM },
11287                         { "init-extent-tree", no_argument, NULL,
11288                                 GETOPT_VAL_INIT_EXTENT },
11289                         { "check-data-csum", no_argument, NULL,
11290                                 GETOPT_VAL_CHECK_CSUM },
11291                         { "backup", no_argument, NULL, 'b' },
11292                         { "subvol-extents", required_argument, NULL, 'E' },
11293                         { "qgroup-report", no_argument, NULL, 'Q' },
11294                         { "tree-root", required_argument, NULL, 'r' },
11295                         { "chunk-root", required_argument, NULL,
11296                                 GETOPT_VAL_CHUNK_TREE },
11297                         { "progress", no_argument, NULL, 'p' },
11298                         { "mode", required_argument, NULL,
11299                                 GETOPT_VAL_MODE },
11300                         { "clear-space-cache", required_argument, NULL,
11301                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11302                         { NULL, 0, NULL, 0}
11303                 };
11304
11305                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11306                 if (c < 0)
11307                         break;
11308                 switch(c) {
11309                         case 'a': /* ignored */ break;
11310                         case 'b':
11311                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11312                                 break;
11313                         case 's':
11314                                 num = arg_strtou64(optarg);
11315                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11316                                         error(
11317                                         "super mirror should be less than %d",
11318                                                 BTRFS_SUPER_MIRROR_MAX);
11319                                         exit(1);
11320                                 }
11321                                 bytenr = btrfs_sb_offset(((int)num));
11322                                 printf("using SB copy %llu, bytenr %llu\n", num,
11323                                        (unsigned long long)bytenr);
11324                                 break;
11325                         case 'Q':
11326                                 qgroup_report = 1;
11327                                 break;
11328                         case 'E':
11329                                 subvolid = arg_strtou64(optarg);
11330                                 break;
11331                         case 'r':
11332                                 tree_root_bytenr = arg_strtou64(optarg);
11333                                 break;
11334                         case GETOPT_VAL_CHUNK_TREE:
11335                                 chunk_root_bytenr = arg_strtou64(optarg);
11336                                 break;
11337                         case 'p':
11338                                 ctx.progress_enabled = true;
11339                                 break;
11340                         case '?':
11341                         case 'h':
11342                                 usage(cmd_check_usage);
11343                         case GETOPT_VAL_REPAIR:
11344                                 printf("enabling repair mode\n");
11345                                 repair = 1;
11346                                 ctree_flags |= OPEN_CTREE_WRITES;
11347                                 break;
11348                         case GETOPT_VAL_READONLY:
11349                                 readonly = 1;
11350                                 break;
11351                         case GETOPT_VAL_INIT_CSUM:
11352                                 printf("Creating a new CRC tree\n");
11353                                 init_csum_tree = 1;
11354                                 repair = 1;
11355                                 ctree_flags |= OPEN_CTREE_WRITES;
11356                                 break;
11357                         case GETOPT_VAL_INIT_EXTENT:
11358                                 init_extent_tree = 1;
11359                                 ctree_flags |= (OPEN_CTREE_WRITES |
11360                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11361                                 repair = 1;
11362                                 break;
11363                         case GETOPT_VAL_CHECK_CSUM:
11364                                 check_data_csum = 1;
11365                                 break;
11366                         case GETOPT_VAL_MODE:
11367                                 check_mode = parse_check_mode(optarg);
11368                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11369                                         error("unknown mode: %s", optarg);
11370                                         exit(1);
11371                                 }
11372                                 break;
11373                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11374                                 if (strcmp(optarg, "v1") != 0) {
11375                                         error(
11376                         "only v1 support implmented, unrecognized value %s",
11377                         optarg);
11378                                         exit(1);
11379                                 }
11380                                 clear_space_cache = 1;
11381                                 ctree_flags |= OPEN_CTREE_WRITES;
11382                                 break;
11383                 }
11384         }
11385
11386         if (check_argc_exact(argc - optind, 1))
11387                 usage(cmd_check_usage);
11388
11389         if (ctx.progress_enabled) {
11390                 ctx.tp = TASK_NOTHING;
11391                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11392         }
11393
11394         /* This check is the only reason for --readonly to exist */
11395         if (readonly && repair) {
11396                 error("repair options are not compatible with --readonly");
11397                 exit(1);
11398         }
11399
11400         /*
11401          * Not supported yet
11402          */
11403         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11404                 error("low memory mode doesn't support repair yet");
11405                 exit(1);
11406         }
11407
11408         radix_tree_init();
11409         cache_tree_init(&root_cache);
11410
11411         if((ret = check_mounted(argv[optind])) < 0) {
11412                 error("could not check mount status: %s", strerror(-ret));
11413                 goto err_out;
11414         } else if(ret) {
11415                 error("%s is currently mounted, aborting", argv[optind]);
11416                 ret = -EBUSY;
11417                 goto err_out;
11418         }
11419
11420         /* only allow partial opening under repair mode */
11421         if (repair)
11422                 ctree_flags |= OPEN_CTREE_PARTIAL;
11423
11424         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11425                                   chunk_root_bytenr, ctree_flags);
11426         if (!info) {
11427                 error("cannot open file system");
11428                 ret = -EIO;
11429                 goto err_out;
11430         }
11431
11432         global_info = info;
11433         root = info->fs_root;
11434         if (clear_space_cache) {
11435                 if (btrfs_fs_compat_ro(info,
11436                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11437                         error(
11438                         "free space cache v2 detected, clearing not implemented");
11439                         ret = 1;
11440                         goto close_out;
11441                 }
11442                 printf("Clearing free space cache\n");
11443                 ret = clear_free_space_cache(info);
11444                 if (ret) {
11445                         error("failed to clear free space cache");
11446                         ret = 1;
11447                 } else {
11448                         printf("Free space cache cleared\n");
11449                 }
11450                 goto close_out;
11451         }
11452
11453         /*
11454          * repair mode will force us to commit transaction which
11455          * will make us fail to load log tree when mounting.
11456          */
11457         if (repair && btrfs_super_log_root(info->super_copy)) {
11458                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11459                 if (!ret) {
11460                         ret = 1;
11461                         goto close_out;
11462                 }
11463                 ret = zero_log_tree(root);
11464                 if (ret) {
11465                         error("failed to zero log tree: %d", ret);
11466                         goto close_out;
11467                 }
11468         }
11469
11470         uuid_unparse(info->super_copy->fsid, uuidbuf);
11471         if (qgroup_report) {
11472                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11473                        uuidbuf);
11474                 ret = qgroup_verify_all(info);
11475                 if (ret == 0)
11476                         report_qgroups(1);
11477                 goto close_out;
11478         }
11479         if (subvolid) {
11480                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11481                        subvolid, argv[optind], uuidbuf);
11482                 ret = print_extent_state(info, subvolid);
11483                 goto close_out;
11484         }
11485         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11486
11487         if (!extent_buffer_uptodate(info->tree_root->node) ||
11488             !extent_buffer_uptodate(info->dev_root->node) ||
11489             !extent_buffer_uptodate(info->chunk_root->node)) {
11490                 error("critical roots corrupted, unable to check the filesystem");
11491                 ret = -EIO;
11492                 goto close_out;
11493         }
11494
11495         if (init_extent_tree || init_csum_tree) {
11496                 struct btrfs_trans_handle *trans;
11497
11498                 trans = btrfs_start_transaction(info->extent_root, 0);
11499                 if (IS_ERR(trans)) {
11500                         error("error starting transaction");
11501                         ret = PTR_ERR(trans);
11502                         goto close_out;
11503                 }
11504
11505                 if (init_extent_tree) {
11506                         printf("Creating a new extent tree\n");
11507                         ret = reinit_extent_tree(trans, info);
11508                         if (ret)
11509                                 goto close_out;
11510                 }
11511
11512                 if (init_csum_tree) {
11513                         printf("Reinitialize checksum tree\n");
11514                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11515                         if (ret) {
11516                                 error("checksum tree initialization failed: %d",
11517                                                 ret);
11518                                 ret = -EIO;
11519                                 goto close_out;
11520                         }
11521
11522                         ret = fill_csum_tree(trans, info->csum_root,
11523                                              init_extent_tree);
11524                         if (ret) {
11525                                 error("checksum tree refilling failed: %d", ret);
11526                                 return -EIO;
11527                         }
11528                 }
11529                 /*
11530                  * Ok now we commit and run the normal fsck, which will add
11531                  * extent entries for all of the items it finds.
11532                  */
11533                 ret = btrfs_commit_transaction(trans, info->extent_root);
11534                 if (ret)
11535                         goto close_out;
11536         }
11537         if (!extent_buffer_uptodate(info->extent_root->node)) {
11538                 error("critical: extent_root, unable to check the filesystem");
11539                 ret = -EIO;
11540                 goto close_out;
11541         }
11542         if (!extent_buffer_uptodate(info->csum_root->node)) {
11543                 error("critical: csum_root, unable to check the filesystem");
11544                 ret = -EIO;
11545                 goto close_out;
11546         }
11547
11548         if (!ctx.progress_enabled)
11549                 printf("checking extents");
11550         if (check_mode == CHECK_MODE_LOWMEM)
11551                 ret = check_chunks_and_extents_v2(root);
11552         else
11553                 ret = check_chunks_and_extents(root);
11554         if (ret)
11555                 printf("Errors found in extent allocation tree or chunk allocation");
11556
11557         ret = repair_root_items(info);
11558         if (ret < 0)
11559                 goto close_out;
11560         if (repair) {
11561                 fprintf(stderr, "Fixed %d roots.\n", ret);
11562                 ret = 0;
11563         } else if (ret > 0) {
11564                 fprintf(stderr,
11565                        "Found %d roots with an outdated root item.\n",
11566                        ret);
11567                 fprintf(stderr,
11568                         "Please run a filesystem check with the option --repair to fix them.\n");
11569                 ret = 1;
11570                 goto close_out;
11571         }
11572
11573         if (!ctx.progress_enabled) {
11574                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11575                         fprintf(stderr, "checking free space tree\n");
11576                 else
11577                         fprintf(stderr, "checking free space cache\n");
11578         }
11579         ret = check_space_cache(root);
11580         if (ret)
11581                 goto out;
11582
11583         /*
11584          * We used to have to have these hole extents in between our real
11585          * extents so if we don't have this flag set we need to make sure there
11586          * are no gaps in the file extents for inodes, otherwise we can just
11587          * ignore it when this happens.
11588          */
11589         no_holes = btrfs_fs_incompat(root->fs_info,
11590                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11591         if (!ctx.progress_enabled)
11592                 fprintf(stderr, "checking fs roots\n");
11593         ret = check_fs_roots(root, &root_cache);
11594         if (ret)
11595                 goto out;
11596
11597         fprintf(stderr, "checking csums\n");
11598         ret = check_csums(root);
11599         if (ret)
11600                 goto out;
11601
11602         fprintf(stderr, "checking root refs\n");
11603         ret = check_root_refs(root, &root_cache);
11604         if (ret)
11605                 goto out;
11606
11607         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11608                 struct extent_buffer *eb;
11609
11610                 eb = list_first_entry(&root->fs_info->recow_ebs,
11611                                       struct extent_buffer, recow);
11612                 list_del_init(&eb->recow);
11613                 ret = recow_extent_buffer(root, eb);
11614                 if (ret)
11615                         break;
11616         }
11617
11618         while (!list_empty(&delete_items)) {
11619                 struct bad_item *bad;
11620
11621                 bad = list_first_entry(&delete_items, struct bad_item, list);
11622                 list_del_init(&bad->list);
11623                 if (repair)
11624                         ret = delete_bad_item(root, bad);
11625                 free(bad);
11626         }
11627
11628         if (info->quota_enabled) {
11629                 int err;
11630                 fprintf(stderr, "checking quota groups\n");
11631                 err = qgroup_verify_all(info);
11632                 if (err)
11633                         goto out;
11634                 report_qgroups(0);
11635                 err = repair_qgroups(info, &qgroups_repaired);
11636                 if (err)
11637                         goto out;
11638         }
11639
11640         if (!list_empty(&root->fs_info->recow_ebs)) {
11641                 error("transid errors in file system");
11642                 ret = 1;
11643         }
11644 out:
11645         /* Don't override original ret */
11646         if (!ret && qgroups_repaired)
11647                 ret = qgroups_repaired;
11648
11649         if (found_old_backref) { /*
11650                  * there was a disk format change when mixed
11651                  * backref was in testing tree. The old format
11652                  * existed about one week.
11653                  */
11654                 printf("\n * Found old mixed backref format. "
11655                        "The old format is not supported! *"
11656                        "\n * Please mount the FS in readonly mode, "
11657                        "backup data and re-format the FS. *\n\n");
11658                 ret = 1;
11659         }
11660         printf("found %llu bytes used err is %d\n",
11661                (unsigned long long)bytes_used, ret);
11662         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11663         printf("total tree bytes: %llu\n",
11664                (unsigned long long)total_btree_bytes);
11665         printf("total fs tree bytes: %llu\n",
11666                (unsigned long long)total_fs_tree_bytes);
11667         printf("total extent tree bytes: %llu\n",
11668                (unsigned long long)total_extent_tree_bytes);
11669         printf("btree space waste bytes: %llu\n",
11670                (unsigned long long)btree_space_waste);
11671         printf("file data blocks allocated: %llu\n referenced %llu\n",
11672                 (unsigned long long)data_bytes_allocated,
11673                 (unsigned long long)data_bytes_referenced);
11674
11675         free_qgroup_counts();
11676         free_root_recs_tree(&root_cache);
11677 close_out:
11678         close_ctree(root);
11679 err_out:
11680         if (ctx.progress_enabled)
11681                 task_deinit(ctx.info);
11682
11683         return ret;
11684 }