btrfs-progs: check: improved error handling in calc_extent_flag
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         unsigned int filetype:8;
189         int errors;
190         unsigned int ref_type;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         size_t size;
663         int ret;
664
665         rec = malloc(sizeof(*rec));
666         if (!rec)
667                 return ERR_PTR(-ENOMEM);
668         memcpy(rec, orig_rec, sizeof(*rec));
669         rec->refs = 1;
670         INIT_LIST_HEAD(&rec->backrefs);
671         INIT_LIST_HEAD(&rec->orphan_extents);
672         rec->holes = RB_ROOT;
673
674         list_for_each_entry(orig, &orig_rec->backrefs, list) {
675                 size = sizeof(*orig) + orig->namelen + 1;
676                 backref = malloc(size);
677                 if (!backref) {
678                         ret = -ENOMEM;
679                         goto cleanup;
680                 }
681                 memcpy(backref, orig, size);
682                 list_add_tail(&backref->list, &rec->backrefs);
683         }
684         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
685                 dst_orphan = malloc(sizeof(*dst_orphan));
686                 if (!dst_orphan) {
687                         ret = -ENOMEM;
688                         goto cleanup;
689                 }
690                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
691                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
692         }
693         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
694         BUG_ON(ret < 0);
695
696         return rec;
697
698 cleanup:
699         if (!list_empty(&rec->backrefs))
700                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
701                         list_del(&orig->list);
702                         free(orig);
703                 }
704
705         if (!list_empty(&rec->orphan_extents))
706                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
707                         list_del(&orig->list);
708                         free(orig);
709                 }
710
711         free(rec);
712
713         return ERR_PTR(ret);
714 }
715
716 static void print_orphan_data_extents(struct list_head *orphan_extents,
717                                       u64 objectid)
718 {
719         struct orphan_data_extent *orphan;
720
721         if (list_empty(orphan_extents))
722                 return;
723         printf("The following data extent is lost in tree %llu:\n",
724                objectid);
725         list_for_each_entry(orphan, orphan_extents, list) {
726                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
727                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
728                        orphan->disk_len);
729         }
730 }
731
732 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
733 {
734         u64 root_objectid = root->root_key.objectid;
735         int errors = rec->errors;
736
737         if (!errors)
738                 return;
739         /* reloc root errors, we print its corresponding fs root objectid*/
740         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
741                 root_objectid = root->root_key.offset;
742                 fprintf(stderr, "reloc");
743         }
744         fprintf(stderr, "root %llu inode %llu errors %x",
745                 (unsigned long long) root_objectid,
746                 (unsigned long long) rec->ino, rec->errors);
747
748         if (errors & I_ERR_NO_INODE_ITEM)
749                 fprintf(stderr, ", no inode item");
750         if (errors & I_ERR_NO_ORPHAN_ITEM)
751                 fprintf(stderr, ", no orphan item");
752         if (errors & I_ERR_DUP_INODE_ITEM)
753                 fprintf(stderr, ", dup inode item");
754         if (errors & I_ERR_DUP_DIR_INDEX)
755                 fprintf(stderr, ", dup dir index");
756         if (errors & I_ERR_ODD_DIR_ITEM)
757                 fprintf(stderr, ", odd dir item");
758         if (errors & I_ERR_ODD_FILE_EXTENT)
759                 fprintf(stderr, ", odd file extent");
760         if (errors & I_ERR_BAD_FILE_EXTENT)
761                 fprintf(stderr, ", bad file extent");
762         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
763                 fprintf(stderr, ", file extent overlap");
764         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
765                 fprintf(stderr, ", file extent discount");
766         if (errors & I_ERR_DIR_ISIZE_WRONG)
767                 fprintf(stderr, ", dir isize wrong");
768         if (errors & I_ERR_FILE_NBYTES_WRONG)
769                 fprintf(stderr, ", nbytes wrong");
770         if (errors & I_ERR_ODD_CSUM_ITEM)
771                 fprintf(stderr, ", odd csum item");
772         if (errors & I_ERR_SOME_CSUM_MISSING)
773                 fprintf(stderr, ", some csum missing");
774         if (errors & I_ERR_LINK_COUNT_WRONG)
775                 fprintf(stderr, ", link count wrong");
776         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
777                 fprintf(stderr, ", orphan file extent");
778         fprintf(stderr, "\n");
779         /* Print the orphan extents if needed */
780         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
781                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
782
783         /* Print the holes if needed */
784         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
785                 struct file_extent_hole *hole;
786                 struct rb_node *node;
787                 int found = 0;
788
789                 node = rb_first(&rec->holes);
790                 fprintf(stderr, "Found file extent holes:\n");
791                 while (node) {
792                         found = 1;
793                         hole = rb_entry(node, struct file_extent_hole, node);
794                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
795                                 hole->start, hole->len);
796                         node = rb_next(node);
797                 }
798                 if (!found)
799                         fprintf(stderr, "\tstart: 0, len: %llu\n",
800                                 round_up(rec->isize, root->sectorsize));
801         }
802 }
803
804 static void print_ref_error(int errors)
805 {
806         if (errors & REF_ERR_NO_DIR_ITEM)
807                 fprintf(stderr, ", no dir item");
808         if (errors & REF_ERR_NO_DIR_INDEX)
809                 fprintf(stderr, ", no dir index");
810         if (errors & REF_ERR_NO_INODE_REF)
811                 fprintf(stderr, ", no inode ref");
812         if (errors & REF_ERR_DUP_DIR_ITEM)
813                 fprintf(stderr, ", dup dir item");
814         if (errors & REF_ERR_DUP_DIR_INDEX)
815                 fprintf(stderr, ", dup dir index");
816         if (errors & REF_ERR_DUP_INODE_REF)
817                 fprintf(stderr, ", dup inode ref");
818         if (errors & REF_ERR_INDEX_UNMATCH)
819                 fprintf(stderr, ", index mismatch");
820         if (errors & REF_ERR_FILETYPE_UNMATCH)
821                 fprintf(stderr, ", filetype mismatch");
822         if (errors & REF_ERR_NAME_TOO_LONG)
823                 fprintf(stderr, ", name too long");
824         if (errors & REF_ERR_NO_ROOT_REF)
825                 fprintf(stderr, ", no root ref");
826         if (errors & REF_ERR_NO_ROOT_BACKREF)
827                 fprintf(stderr, ", no root backref");
828         if (errors & REF_ERR_DUP_ROOT_REF)
829                 fprintf(stderr, ", dup root ref");
830         if (errors & REF_ERR_DUP_ROOT_BACKREF)
831                 fprintf(stderr, ", dup root backref");
832         fprintf(stderr, "\n");
833 }
834
835 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
836                                           u64 ino, int mod)
837 {
838         struct ptr_node *node;
839         struct cache_extent *cache;
840         struct inode_record *rec = NULL;
841         int ret;
842
843         cache = lookup_cache_extent(inode_cache, ino, 1);
844         if (cache) {
845                 node = container_of(cache, struct ptr_node, cache);
846                 rec = node->data;
847                 if (mod && rec->refs > 1) {
848                         node->data = clone_inode_rec(rec);
849                         if (IS_ERR(node->data))
850                                 return node->data;
851                         rec->refs--;
852                         rec = node->data;
853                 }
854         } else if (mod) {
855                 rec = calloc(1, sizeof(*rec));
856                 if (!rec)
857                         return ERR_PTR(-ENOMEM);
858                 rec->ino = ino;
859                 rec->extent_start = (u64)-1;
860                 rec->refs = 1;
861                 INIT_LIST_HEAD(&rec->backrefs);
862                 INIT_LIST_HEAD(&rec->orphan_extents);
863                 rec->holes = RB_ROOT;
864
865                 node = malloc(sizeof(*node));
866                 if (!node) {
867                         free(rec);
868                         return ERR_PTR(-ENOMEM);
869                 }
870                 node->cache.start = ino;
871                 node->cache.size = 1;
872                 node->data = rec;
873
874                 if (ino == BTRFS_FREE_INO_OBJECTID)
875                         rec->found_link = 1;
876
877                 ret = insert_cache_extent(inode_cache, &node->cache);
878                 if (ret)
879                         return ERR_PTR(-EEXIST);
880         }
881         return rec;
882 }
883
884 static void free_orphan_data_extents(struct list_head *orphan_extents)
885 {
886         struct orphan_data_extent *orphan;
887
888         while (!list_empty(orphan_extents)) {
889                 orphan = list_entry(orphan_extents->next,
890                                     struct orphan_data_extent, list);
891                 list_del(&orphan->list);
892                 free(orphan);
893         }
894 }
895
896 static void free_inode_rec(struct inode_record *rec)
897 {
898         struct inode_backref *backref;
899
900         if (--rec->refs > 0)
901                 return;
902
903         while (!list_empty(&rec->backrefs)) {
904                 backref = to_inode_backref(rec->backrefs.next);
905                 list_del(&backref->list);
906                 free(backref);
907         }
908         free_orphan_data_extents(&rec->orphan_extents);
909         free_file_extent_holes(&rec->holes);
910         free(rec);
911 }
912
913 static int can_free_inode_rec(struct inode_record *rec)
914 {
915         if (!rec->errors && rec->checked && rec->found_inode_item &&
916             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
917                 return 1;
918         return 0;
919 }
920
921 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
922                                  struct inode_record *rec)
923 {
924         struct cache_extent *cache;
925         struct inode_backref *tmp, *backref;
926         struct ptr_node *node;
927         unsigned char filetype;
928
929         if (!rec->found_inode_item)
930                 return;
931
932         filetype = imode_to_type(rec->imode);
933         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
934                 if (backref->found_dir_item && backref->found_dir_index) {
935                         if (backref->filetype != filetype)
936                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
937                         if (!backref->errors && backref->found_inode_ref &&
938                             rec->nlink == rec->found_link) {
939                                 list_del(&backref->list);
940                                 free(backref);
941                         }
942                 }
943         }
944
945         if (!rec->checked || rec->merging)
946                 return;
947
948         if (S_ISDIR(rec->imode)) {
949                 if (rec->found_size != rec->isize)
950                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
951                 if (rec->found_file_extent)
952                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
953         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
954                 if (rec->found_dir_item)
955                         rec->errors |= I_ERR_ODD_DIR_ITEM;
956                 if (rec->found_size != rec->nbytes)
957                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
958                 if (rec->nlink > 0 && !no_holes &&
959                     (rec->extent_end < rec->isize ||
960                      first_extent_gap(&rec->holes) < rec->isize))
961                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
962         }
963
964         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
965                 if (rec->found_csum_item && rec->nodatasum)
966                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
967                 if (rec->some_csum_missing && !rec->nodatasum)
968                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
969         }
970
971         BUG_ON(rec->refs != 1);
972         if (can_free_inode_rec(rec)) {
973                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
974                 node = container_of(cache, struct ptr_node, cache);
975                 BUG_ON(node->data != rec);
976                 remove_cache_extent(inode_cache, &node->cache);
977                 free(node);
978                 free_inode_rec(rec);
979         }
980 }
981
982 static int check_orphan_item(struct btrfs_root *root, u64 ino)
983 {
984         struct btrfs_path path;
985         struct btrfs_key key;
986         int ret;
987
988         key.objectid = BTRFS_ORPHAN_OBJECTID;
989         key.type = BTRFS_ORPHAN_ITEM_KEY;
990         key.offset = ino;
991
992         btrfs_init_path(&path);
993         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
994         btrfs_release_path(&path);
995         if (ret > 0)
996                 ret = -ENOENT;
997         return ret;
998 }
999
1000 static int process_inode_item(struct extent_buffer *eb,
1001                               int slot, struct btrfs_key *key,
1002                               struct shared_node *active_node)
1003 {
1004         struct inode_record *rec;
1005         struct btrfs_inode_item *item;
1006
1007         rec = active_node->current;
1008         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1009         if (rec->found_inode_item) {
1010                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1011                 return 1;
1012         }
1013         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1014         rec->nlink = btrfs_inode_nlink(eb, item);
1015         rec->isize = btrfs_inode_size(eb, item);
1016         rec->nbytes = btrfs_inode_nbytes(eb, item);
1017         rec->imode = btrfs_inode_mode(eb, item);
1018         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1019                 rec->nodatasum = 1;
1020         rec->found_inode_item = 1;
1021         if (rec->nlink == 0)
1022                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1023         maybe_free_inode_rec(&active_node->inode_cache, rec);
1024         return 0;
1025 }
1026
1027 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1028                                                 const char *name,
1029                                                 int namelen, u64 dir)
1030 {
1031         struct inode_backref *backref;
1032
1033         list_for_each_entry(backref, &rec->backrefs, list) {
1034                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1035                         break;
1036                 if (backref->dir != dir || backref->namelen != namelen)
1037                         continue;
1038                 if (memcmp(name, backref->name, namelen))
1039                         continue;
1040                 return backref;
1041         }
1042
1043         backref = malloc(sizeof(*backref) + namelen + 1);
1044         if (!backref)
1045                 return NULL;
1046         memset(backref, 0, sizeof(*backref));
1047         backref->dir = dir;
1048         backref->namelen = namelen;
1049         memcpy(backref->name, name, namelen);
1050         backref->name[namelen] = '\0';
1051         list_add_tail(&backref->list, &rec->backrefs);
1052         return backref;
1053 }
1054
1055 static int add_inode_backref(struct cache_tree *inode_cache,
1056                              u64 ino, u64 dir, u64 index,
1057                              const char *name, int namelen,
1058                              int filetype, int itemtype, int errors)
1059 {
1060         struct inode_record *rec;
1061         struct inode_backref *backref;
1062
1063         rec = get_inode_rec(inode_cache, ino, 1);
1064         BUG_ON(IS_ERR(rec));
1065         backref = get_inode_backref(rec, name, namelen, dir);
1066         BUG_ON(!backref);
1067         if (errors)
1068                 backref->errors |= errors;
1069         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1070                 if (backref->found_dir_index)
1071                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1072                 if (backref->found_inode_ref && backref->index != index)
1073                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1074                 if (backref->found_dir_item && backref->filetype != filetype)
1075                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1076
1077                 backref->index = index;
1078                 backref->filetype = filetype;
1079                 backref->found_dir_index = 1;
1080         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1081                 rec->found_link++;
1082                 if (backref->found_dir_item)
1083                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1084                 if (backref->found_dir_index && backref->filetype != filetype)
1085                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1086
1087                 backref->filetype = filetype;
1088                 backref->found_dir_item = 1;
1089         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1090                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1091                 if (backref->found_inode_ref)
1092                         backref->errors |= REF_ERR_DUP_INODE_REF;
1093                 if (backref->found_dir_index && backref->index != index)
1094                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1095                 else
1096                         backref->index = index;
1097
1098                 backref->ref_type = itemtype;
1099                 backref->found_inode_ref = 1;
1100         } else {
1101                 BUG_ON(1);
1102         }
1103
1104         maybe_free_inode_rec(inode_cache, rec);
1105         return 0;
1106 }
1107
1108 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1109                             struct cache_tree *dst_cache)
1110 {
1111         struct inode_backref *backref;
1112         u32 dir_count = 0;
1113         int ret = 0;
1114
1115         dst->merging = 1;
1116         list_for_each_entry(backref, &src->backrefs, list) {
1117                 if (backref->found_dir_index) {
1118                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1119                                         backref->index, backref->name,
1120                                         backref->namelen, backref->filetype,
1121                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1122                 }
1123                 if (backref->found_dir_item) {
1124                         dir_count++;
1125                         add_inode_backref(dst_cache, dst->ino,
1126                                         backref->dir, 0, backref->name,
1127                                         backref->namelen, backref->filetype,
1128                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1129                 }
1130                 if (backref->found_inode_ref) {
1131                         add_inode_backref(dst_cache, dst->ino,
1132                                         backref->dir, backref->index,
1133                                         backref->name, backref->namelen, 0,
1134                                         backref->ref_type, backref->errors);
1135                 }
1136         }
1137
1138         if (src->found_dir_item)
1139                 dst->found_dir_item = 1;
1140         if (src->found_file_extent)
1141                 dst->found_file_extent = 1;
1142         if (src->found_csum_item)
1143                 dst->found_csum_item = 1;
1144         if (src->some_csum_missing)
1145                 dst->some_csum_missing = 1;
1146         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1147                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1148                 if (ret < 0)
1149                         return ret;
1150         }
1151
1152         BUG_ON(src->found_link < dir_count);
1153         dst->found_link += src->found_link - dir_count;
1154         dst->found_size += src->found_size;
1155         if (src->extent_start != (u64)-1) {
1156                 if (dst->extent_start == (u64)-1) {
1157                         dst->extent_start = src->extent_start;
1158                         dst->extent_end = src->extent_end;
1159                 } else {
1160                         if (dst->extent_end > src->extent_start)
1161                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1162                         else if (dst->extent_end < src->extent_start) {
1163                                 ret = add_file_extent_hole(&dst->holes,
1164                                         dst->extent_end,
1165                                         src->extent_start - dst->extent_end);
1166                         }
1167                         if (dst->extent_end < src->extent_end)
1168                                 dst->extent_end = src->extent_end;
1169                 }
1170         }
1171
1172         dst->errors |= src->errors;
1173         if (src->found_inode_item) {
1174                 if (!dst->found_inode_item) {
1175                         dst->nlink = src->nlink;
1176                         dst->isize = src->isize;
1177                         dst->nbytes = src->nbytes;
1178                         dst->imode = src->imode;
1179                         dst->nodatasum = src->nodatasum;
1180                         dst->found_inode_item = 1;
1181                 } else {
1182                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1183                 }
1184         }
1185         dst->merging = 0;
1186
1187         return 0;
1188 }
1189
1190 static int splice_shared_node(struct shared_node *src_node,
1191                               struct shared_node *dst_node)
1192 {
1193         struct cache_extent *cache;
1194         struct ptr_node *node, *ins;
1195         struct cache_tree *src, *dst;
1196         struct inode_record *rec, *conflict;
1197         u64 current_ino = 0;
1198         int splice = 0;
1199         int ret;
1200
1201         if (--src_node->refs == 0)
1202                 splice = 1;
1203         if (src_node->current)
1204                 current_ino = src_node->current->ino;
1205
1206         src = &src_node->root_cache;
1207         dst = &dst_node->root_cache;
1208 again:
1209         cache = search_cache_extent(src, 0);
1210         while (cache) {
1211                 node = container_of(cache, struct ptr_node, cache);
1212                 rec = node->data;
1213                 cache = next_cache_extent(cache);
1214
1215                 if (splice) {
1216                         remove_cache_extent(src, &node->cache);
1217                         ins = node;
1218                 } else {
1219                         ins = malloc(sizeof(*ins));
1220                         BUG_ON(!ins);
1221                         ins->cache.start = node->cache.start;
1222                         ins->cache.size = node->cache.size;
1223                         ins->data = rec;
1224                         rec->refs++;
1225                 }
1226                 ret = insert_cache_extent(dst, &ins->cache);
1227                 if (ret == -EEXIST) {
1228                         conflict = get_inode_rec(dst, rec->ino, 1);
1229                         BUG_ON(IS_ERR(conflict));
1230                         merge_inode_recs(rec, conflict, dst);
1231                         if (rec->checked) {
1232                                 conflict->checked = 1;
1233                                 if (dst_node->current == conflict)
1234                                         dst_node->current = NULL;
1235                         }
1236                         maybe_free_inode_rec(dst, conflict);
1237                         free_inode_rec(rec);
1238                         free(ins);
1239                 } else {
1240                         BUG_ON(ret);
1241                 }
1242         }
1243
1244         if (src == &src_node->root_cache) {
1245                 src = &src_node->inode_cache;
1246                 dst = &dst_node->inode_cache;
1247                 goto again;
1248         }
1249
1250         if (current_ino > 0 && (!dst_node->current ||
1251             current_ino > dst_node->current->ino)) {
1252                 if (dst_node->current) {
1253                         dst_node->current->checked = 1;
1254                         maybe_free_inode_rec(dst, dst_node->current);
1255                 }
1256                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1257                 BUG_ON(IS_ERR(dst_node->current));
1258         }
1259         return 0;
1260 }
1261
1262 static void free_inode_ptr(struct cache_extent *cache)
1263 {
1264         struct ptr_node *node;
1265         struct inode_record *rec;
1266
1267         node = container_of(cache, struct ptr_node, cache);
1268         rec = node->data;
1269         free_inode_rec(rec);
1270         free(node);
1271 }
1272
1273 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1274
1275 static struct shared_node *find_shared_node(struct cache_tree *shared,
1276                                             u64 bytenr)
1277 {
1278         struct cache_extent *cache;
1279         struct shared_node *node;
1280
1281         cache = lookup_cache_extent(shared, bytenr, 1);
1282         if (cache) {
1283                 node = container_of(cache, struct shared_node, cache);
1284                 return node;
1285         }
1286         return NULL;
1287 }
1288
1289 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1290 {
1291         int ret;
1292         struct shared_node *node;
1293
1294         node = calloc(1, sizeof(*node));
1295         if (!node)
1296                 return -ENOMEM;
1297         node->cache.start = bytenr;
1298         node->cache.size = 1;
1299         cache_tree_init(&node->root_cache);
1300         cache_tree_init(&node->inode_cache);
1301         node->refs = refs;
1302
1303         ret = insert_cache_extent(shared, &node->cache);
1304
1305         return ret;
1306 }
1307
1308 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1309                              struct walk_control *wc, int level)
1310 {
1311         struct shared_node *node;
1312         struct shared_node *dest;
1313         int ret;
1314
1315         if (level == wc->active_node)
1316                 return 0;
1317
1318         BUG_ON(wc->active_node <= level);
1319         node = find_shared_node(&wc->shared, bytenr);
1320         if (!node) {
1321                 ret = add_shared_node(&wc->shared, bytenr, refs);
1322                 BUG_ON(ret);
1323                 node = find_shared_node(&wc->shared, bytenr);
1324                 wc->nodes[level] = node;
1325                 wc->active_node = level;
1326                 return 0;
1327         }
1328
1329         if (wc->root_level == wc->active_node &&
1330             btrfs_root_refs(&root->root_item) == 0) {
1331                 if (--node->refs == 0) {
1332                         free_inode_recs_tree(&node->root_cache);
1333                         free_inode_recs_tree(&node->inode_cache);
1334                         remove_cache_extent(&wc->shared, &node->cache);
1335                         free(node);
1336                 }
1337                 return 1;
1338         }
1339
1340         dest = wc->nodes[wc->active_node];
1341         splice_shared_node(node, dest);
1342         if (node->refs == 0) {
1343                 remove_cache_extent(&wc->shared, &node->cache);
1344                 free(node);
1345         }
1346         return 1;
1347 }
1348
1349 static int leave_shared_node(struct btrfs_root *root,
1350                              struct walk_control *wc, int level)
1351 {
1352         struct shared_node *node;
1353         struct shared_node *dest;
1354         int i;
1355
1356         if (level == wc->root_level)
1357                 return 0;
1358
1359         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1360                 if (wc->nodes[i])
1361                         break;
1362         }
1363         BUG_ON(i >= BTRFS_MAX_LEVEL);
1364
1365         node = wc->nodes[wc->active_node];
1366         wc->nodes[wc->active_node] = NULL;
1367         wc->active_node = i;
1368
1369         dest = wc->nodes[wc->active_node];
1370         if (wc->active_node < wc->root_level ||
1371             btrfs_root_refs(&root->root_item) > 0) {
1372                 BUG_ON(node->refs <= 1);
1373                 splice_shared_node(node, dest);
1374         } else {
1375                 BUG_ON(node->refs < 2);
1376                 node->refs--;
1377         }
1378         return 0;
1379 }
1380
1381 /*
1382  * Returns:
1383  * < 0 - on error
1384  * 1   - if the root with id child_root_id is a child of root parent_root_id
1385  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1386  *       has other root(s) as parent(s)
1387  * 2   - if the root child_root_id doesn't have any parent roots
1388  */
1389 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1390                          u64 child_root_id)
1391 {
1392         struct btrfs_path path;
1393         struct btrfs_key key;
1394         struct extent_buffer *leaf;
1395         int has_parent = 0;
1396         int ret;
1397
1398         btrfs_init_path(&path);
1399
1400         key.objectid = parent_root_id;
1401         key.type = BTRFS_ROOT_REF_KEY;
1402         key.offset = child_root_id;
1403         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1404                                 0, 0);
1405         if (ret < 0)
1406                 return ret;
1407         btrfs_release_path(&path);
1408         if (!ret)
1409                 return 1;
1410
1411         key.objectid = child_root_id;
1412         key.type = BTRFS_ROOT_BACKREF_KEY;
1413         key.offset = 0;
1414         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1415                                 0, 0);
1416         if (ret < 0)
1417                 goto out;
1418
1419         while (1) {
1420                 leaf = path.nodes[0];
1421                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1422                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1423                         if (ret)
1424                                 break;
1425                         leaf = path.nodes[0];
1426                 }
1427
1428                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1429                 if (key.objectid != child_root_id ||
1430                     key.type != BTRFS_ROOT_BACKREF_KEY)
1431                         break;
1432
1433                 has_parent = 1;
1434
1435                 if (key.offset == parent_root_id) {
1436                         btrfs_release_path(&path);
1437                         return 1;
1438                 }
1439
1440                 path.slots[0]++;
1441         }
1442 out:
1443         btrfs_release_path(&path);
1444         if (ret < 0)
1445                 return ret;
1446         return has_parent ? 0 : 2;
1447 }
1448
1449 static int process_dir_item(struct btrfs_root *root,
1450                             struct extent_buffer *eb,
1451                             int slot, struct btrfs_key *key,
1452                             struct shared_node *active_node)
1453 {
1454         u32 total;
1455         u32 cur = 0;
1456         u32 len;
1457         u32 name_len;
1458         u32 data_len;
1459         int error;
1460         int nritems = 0;
1461         int filetype;
1462         struct btrfs_dir_item *di;
1463         struct inode_record *rec;
1464         struct cache_tree *root_cache;
1465         struct cache_tree *inode_cache;
1466         struct btrfs_key location;
1467         char namebuf[BTRFS_NAME_LEN];
1468
1469         root_cache = &active_node->root_cache;
1470         inode_cache = &active_node->inode_cache;
1471         rec = active_node->current;
1472         rec->found_dir_item = 1;
1473
1474         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1475         total = btrfs_item_size_nr(eb, slot);
1476         while (cur < total) {
1477                 nritems++;
1478                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1479                 name_len = btrfs_dir_name_len(eb, di);
1480                 data_len = btrfs_dir_data_len(eb, di);
1481                 filetype = btrfs_dir_type(eb, di);
1482
1483                 rec->found_size += name_len;
1484                 if (name_len <= BTRFS_NAME_LEN) {
1485                         len = name_len;
1486                         error = 0;
1487                 } else {
1488                         len = BTRFS_NAME_LEN;
1489                         error = REF_ERR_NAME_TOO_LONG;
1490                 }
1491                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1492
1493                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1494                         add_inode_backref(inode_cache, location.objectid,
1495                                           key->objectid, key->offset, namebuf,
1496                                           len, filetype, key->type, error);
1497                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1498                         add_inode_backref(root_cache, location.objectid,
1499                                           key->objectid, key->offset,
1500                                           namebuf, len, filetype,
1501                                           key->type, error);
1502                 } else {
1503                         fprintf(stderr, "invalid location in dir item %u\n",
1504                                 location.type);
1505                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1506                                           key->objectid, key->offset, namebuf,
1507                                           len, filetype, key->type, error);
1508                 }
1509
1510                 len = sizeof(*di) + name_len + data_len;
1511                 di = (struct btrfs_dir_item *)((char *)di + len);
1512                 cur += len;
1513         }
1514         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1515                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1516
1517         return 0;
1518 }
1519
1520 static int process_inode_ref(struct extent_buffer *eb,
1521                              int slot, struct btrfs_key *key,
1522                              struct shared_node *active_node)
1523 {
1524         u32 total;
1525         u32 cur = 0;
1526         u32 len;
1527         u32 name_len;
1528         u64 index;
1529         int error;
1530         struct cache_tree *inode_cache;
1531         struct btrfs_inode_ref *ref;
1532         char namebuf[BTRFS_NAME_LEN];
1533
1534         inode_cache = &active_node->inode_cache;
1535
1536         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1537         total = btrfs_item_size_nr(eb, slot);
1538         while (cur < total) {
1539                 name_len = btrfs_inode_ref_name_len(eb, ref);
1540                 index = btrfs_inode_ref_index(eb, ref);
1541                 if (name_len <= BTRFS_NAME_LEN) {
1542                         len = name_len;
1543                         error = 0;
1544                 } else {
1545                         len = BTRFS_NAME_LEN;
1546                         error = REF_ERR_NAME_TOO_LONG;
1547                 }
1548                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1549                 add_inode_backref(inode_cache, key->objectid, key->offset,
1550                                   index, namebuf, len, 0, key->type, error);
1551
1552                 len = sizeof(*ref) + name_len;
1553                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1554                 cur += len;
1555         }
1556         return 0;
1557 }
1558
1559 static int process_inode_extref(struct extent_buffer *eb,
1560                                 int slot, struct btrfs_key *key,
1561                                 struct shared_node *active_node)
1562 {
1563         u32 total;
1564         u32 cur = 0;
1565         u32 len;
1566         u32 name_len;
1567         u64 index;
1568         u64 parent;
1569         int error;
1570         struct cache_tree *inode_cache;
1571         struct btrfs_inode_extref *extref;
1572         char namebuf[BTRFS_NAME_LEN];
1573
1574         inode_cache = &active_node->inode_cache;
1575
1576         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1577         total = btrfs_item_size_nr(eb, slot);
1578         while (cur < total) {
1579                 name_len = btrfs_inode_extref_name_len(eb, extref);
1580                 index = btrfs_inode_extref_index(eb, extref);
1581                 parent = btrfs_inode_extref_parent(eb, extref);
1582                 if (name_len <= BTRFS_NAME_LEN) {
1583                         len = name_len;
1584                         error = 0;
1585                 } else {
1586                         len = BTRFS_NAME_LEN;
1587                         error = REF_ERR_NAME_TOO_LONG;
1588                 }
1589                 read_extent_buffer(eb, namebuf,
1590                                    (unsigned long)(extref + 1), len);
1591                 add_inode_backref(inode_cache, key->objectid, parent,
1592                                   index, namebuf, len, 0, key->type, error);
1593
1594                 len = sizeof(*extref) + name_len;
1595                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1596                 cur += len;
1597         }
1598         return 0;
1599
1600 }
1601
1602 static int count_csum_range(struct btrfs_root *root, u64 start,
1603                             u64 len, u64 *found)
1604 {
1605         struct btrfs_key key;
1606         struct btrfs_path path;
1607         struct extent_buffer *leaf;
1608         int ret;
1609         size_t size;
1610         *found = 0;
1611         u64 csum_end;
1612         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1613
1614         btrfs_init_path(&path);
1615
1616         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1617         key.offset = start;
1618         key.type = BTRFS_EXTENT_CSUM_KEY;
1619
1620         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1621                                 &key, &path, 0, 0);
1622         if (ret < 0)
1623                 goto out;
1624         if (ret > 0 && path.slots[0] > 0) {
1625                 leaf = path.nodes[0];
1626                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1627                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1628                     key.type == BTRFS_EXTENT_CSUM_KEY)
1629                         path.slots[0]--;
1630         }
1631
1632         while (len > 0) {
1633                 leaf = path.nodes[0];
1634                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1635                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1636                         if (ret > 0)
1637                                 break;
1638                         else if (ret < 0)
1639                                 goto out;
1640                         leaf = path.nodes[0];
1641                 }
1642
1643                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1644                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1645                     key.type != BTRFS_EXTENT_CSUM_KEY)
1646                         break;
1647
1648                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1649                 if (key.offset >= start + len)
1650                         break;
1651
1652                 if (key.offset > start)
1653                         start = key.offset;
1654
1655                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1656                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1657                 if (csum_end > start) {
1658                         size = min(csum_end - start, len);
1659                         len -= size;
1660                         start += size;
1661                         *found += size;
1662                 }
1663
1664                 path.slots[0]++;
1665         }
1666 out:
1667         btrfs_release_path(&path);
1668         if (ret < 0)
1669                 return ret;
1670         return 0;
1671 }
1672
1673 static int process_file_extent(struct btrfs_root *root,
1674                                 struct extent_buffer *eb,
1675                                 int slot, struct btrfs_key *key,
1676                                 struct shared_node *active_node)
1677 {
1678         struct inode_record *rec;
1679         struct btrfs_file_extent_item *fi;
1680         u64 num_bytes = 0;
1681         u64 disk_bytenr = 0;
1682         u64 extent_offset = 0;
1683         u64 mask = root->sectorsize - 1;
1684         int extent_type;
1685         int ret;
1686
1687         rec = active_node->current;
1688         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1689         rec->found_file_extent = 1;
1690
1691         if (rec->extent_start == (u64)-1) {
1692                 rec->extent_start = key->offset;
1693                 rec->extent_end = key->offset;
1694         }
1695
1696         if (rec->extent_end > key->offset)
1697                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1698         else if (rec->extent_end < key->offset) {
1699                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1700                                            key->offset - rec->extent_end);
1701                 if (ret < 0)
1702                         return ret;
1703         }
1704
1705         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1706         extent_type = btrfs_file_extent_type(eb, fi);
1707
1708         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1709                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1710                 if (num_bytes == 0)
1711                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1712                 rec->found_size += num_bytes;
1713                 num_bytes = (num_bytes + mask) & ~mask;
1714         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1715                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1716                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1717                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1718                 extent_offset = btrfs_file_extent_offset(eb, fi);
1719                 if (num_bytes == 0 || (num_bytes & mask))
1720                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1721                 if (num_bytes + extent_offset >
1722                     btrfs_file_extent_ram_bytes(eb, fi))
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1725                     (btrfs_file_extent_compression(eb, fi) ||
1726                      btrfs_file_extent_encryption(eb, fi) ||
1727                      btrfs_file_extent_other_encoding(eb, fi)))
1728                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1729                 if (disk_bytenr > 0)
1730                         rec->found_size += num_bytes;
1731         } else {
1732                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733         }
1734         rec->extent_end = key->offset + num_bytes;
1735
1736         /*
1737          * The data reloc tree will copy full extents into its inode and then
1738          * copy the corresponding csums.  Because the extent it copied could be
1739          * a preallocated extent that hasn't been written to yet there may be no
1740          * csums to copy, ergo we won't have csums for our file extent.  This is
1741          * ok so just don't bother checking csums if the inode belongs to the
1742          * data reloc tree.
1743          */
1744         if (disk_bytenr > 0 &&
1745             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1746                 u64 found;
1747                 if (btrfs_file_extent_compression(eb, fi))
1748                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1749                 else
1750                         disk_bytenr += extent_offset;
1751
1752                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1753                 if (ret < 0)
1754                         return ret;
1755                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1756                         if (found > 0)
1757                                 rec->found_csum_item = 1;
1758                         if (found < num_bytes)
1759                                 rec->some_csum_missing = 1;
1760                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1761                         if (found > 0)
1762                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1763                 }
1764         }
1765         return 0;
1766 }
1767
1768 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1769                             struct walk_control *wc)
1770 {
1771         struct btrfs_key key;
1772         u32 nritems;
1773         int i;
1774         int ret = 0;
1775         struct cache_tree *inode_cache;
1776         struct shared_node *active_node;
1777
1778         if (wc->root_level == wc->active_node &&
1779             btrfs_root_refs(&root->root_item) == 0)
1780                 return 0;
1781
1782         active_node = wc->nodes[wc->active_node];
1783         inode_cache = &active_node->inode_cache;
1784         nritems = btrfs_header_nritems(eb);
1785         for (i = 0; i < nritems; i++) {
1786                 btrfs_item_key_to_cpu(eb, &key, i);
1787
1788                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1789                         continue;
1790                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1791                         continue;
1792
1793                 if (active_node->current == NULL ||
1794                     active_node->current->ino < key.objectid) {
1795                         if (active_node->current) {
1796                                 active_node->current->checked = 1;
1797                                 maybe_free_inode_rec(inode_cache,
1798                                                      active_node->current);
1799                         }
1800                         active_node->current = get_inode_rec(inode_cache,
1801                                                              key.objectid, 1);
1802                         BUG_ON(IS_ERR(active_node->current));
1803                 }
1804                 switch (key.type) {
1805                 case BTRFS_DIR_ITEM_KEY:
1806                 case BTRFS_DIR_INDEX_KEY:
1807                         ret = process_dir_item(root, eb, i, &key, active_node);
1808                         break;
1809                 case BTRFS_INODE_REF_KEY:
1810                         ret = process_inode_ref(eb, i, &key, active_node);
1811                         break;
1812                 case BTRFS_INODE_EXTREF_KEY:
1813                         ret = process_inode_extref(eb, i, &key, active_node);
1814                         break;
1815                 case BTRFS_INODE_ITEM_KEY:
1816                         ret = process_inode_item(eb, i, &key, active_node);
1817                         break;
1818                 case BTRFS_EXTENT_DATA_KEY:
1819                         ret = process_file_extent(root, eb, i, &key,
1820                                                   active_node);
1821                         break;
1822                 default:
1823                         break;
1824                 };
1825         }
1826         return ret;
1827 }
1828
1829 static void reada_walk_down(struct btrfs_root *root,
1830                             struct extent_buffer *node, int slot)
1831 {
1832         u64 bytenr;
1833         u64 ptr_gen;
1834         u32 nritems;
1835         u32 blocksize;
1836         int i;
1837         int level;
1838
1839         level = btrfs_header_level(node);
1840         if (level != 1)
1841                 return;
1842
1843         nritems = btrfs_header_nritems(node);
1844         blocksize = root->nodesize;
1845         for (i = slot; i < nritems; i++) {
1846                 bytenr = btrfs_node_blockptr(node, i);
1847                 ptr_gen = btrfs_node_ptr_generation(node, i);
1848                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1849         }
1850 }
1851
1852 /*
1853  * Check the child node/leaf by the following condition:
1854  * 1. the first item key of the node/leaf should be the same with the one
1855  *    in parent.
1856  * 2. block in parent node should match the child node/leaf.
1857  * 3. generation of parent node and child's header should be consistent.
1858  *
1859  * Or the child node/leaf pointed by the key in parent is not valid.
1860  *
1861  * We hope to check leaf owner too, but since subvol may share leaves,
1862  * which makes leaf owner check not so strong, key check should be
1863  * sufficient enough for that case.
1864  */
1865 static int check_child_node(struct btrfs_root *root,
1866                             struct extent_buffer *parent, int slot,
1867                             struct extent_buffer *child)
1868 {
1869         struct btrfs_key parent_key;
1870         struct btrfs_key child_key;
1871         int ret = 0;
1872
1873         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1874         if (btrfs_header_level(child) == 0)
1875                 btrfs_item_key_to_cpu(child, &child_key, 0);
1876         else
1877                 btrfs_node_key_to_cpu(child, &child_key, 0);
1878
1879         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1880                 ret = -EINVAL;
1881                 fprintf(stderr,
1882                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1883                         parent_key.objectid, parent_key.type, parent_key.offset,
1884                         child_key.objectid, child_key.type, child_key.offset);
1885         }
1886         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1887                 ret = -EINVAL;
1888                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1889                         btrfs_node_blockptr(parent, slot),
1890                         btrfs_header_bytenr(child));
1891         }
1892         if (btrfs_node_ptr_generation(parent, slot) !=
1893             btrfs_header_generation(child)) {
1894                 ret = -EINVAL;
1895                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1896                         btrfs_header_generation(child),
1897                         btrfs_node_ptr_generation(parent, slot));
1898         }
1899         return ret;
1900 }
1901
1902 struct node_refs {
1903         u64 bytenr[BTRFS_MAX_LEVEL];
1904         u64 refs[BTRFS_MAX_LEVEL];
1905 };
1906
1907 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1908                           struct walk_control *wc, int *level,
1909                           struct node_refs *nrefs)
1910 {
1911         enum btrfs_tree_block_status status;
1912         u64 bytenr;
1913         u64 ptr_gen;
1914         struct extent_buffer *next;
1915         struct extent_buffer *cur;
1916         u32 blocksize;
1917         int ret, err = 0;
1918         u64 refs;
1919
1920         WARN_ON(*level < 0);
1921         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1922
1923         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1924                 refs = nrefs->refs[*level];
1925                 ret = 0;
1926         } else {
1927                 ret = btrfs_lookup_extent_info(NULL, root,
1928                                        path->nodes[*level]->start,
1929                                        *level, 1, &refs, NULL);
1930                 if (ret < 0) {
1931                         err = ret;
1932                         goto out;
1933                 }
1934                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1935                 nrefs->refs[*level] = refs;
1936         }
1937
1938         if (refs > 1) {
1939                 ret = enter_shared_node(root, path->nodes[*level]->start,
1940                                         refs, wc, *level);
1941                 if (ret > 0) {
1942                         err = ret;
1943                         goto out;
1944                 }
1945         }
1946
1947         while (*level >= 0) {
1948                 WARN_ON(*level < 0);
1949                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1950                 cur = path->nodes[*level];
1951
1952                 if (btrfs_header_level(cur) != *level)
1953                         WARN_ON(1);
1954
1955                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1956                         break;
1957                 if (*level == 0) {
1958                         ret = process_one_leaf(root, cur, wc);
1959                         if (ret < 0)
1960                                 err = ret;
1961                         break;
1962                 }
1963                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1964                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1965                 blocksize = root->nodesize;
1966
1967                 if (bytenr == nrefs->bytenr[*level - 1]) {
1968                         refs = nrefs->refs[*level - 1];
1969                 } else {
1970                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1971                                         *level - 1, 1, &refs, NULL);
1972                         if (ret < 0) {
1973                                 refs = 0;
1974                         } else {
1975                                 nrefs->bytenr[*level - 1] = bytenr;
1976                                 nrefs->refs[*level - 1] = refs;
1977                         }
1978                 }
1979
1980                 if (refs > 1) {
1981                         ret = enter_shared_node(root, bytenr, refs,
1982                                                 wc, *level - 1);
1983                         if (ret > 0) {
1984                                 path->slots[*level]++;
1985                                 continue;
1986                         }
1987                 }
1988
1989                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1990                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1991                         free_extent_buffer(next);
1992                         reada_walk_down(root, cur, path->slots[*level]);
1993                         next = read_tree_block(root, bytenr, blocksize,
1994                                                ptr_gen);
1995                         if (!extent_buffer_uptodate(next)) {
1996                                 struct btrfs_key node_key;
1997
1998                                 btrfs_node_key_to_cpu(path->nodes[*level],
1999                                                       &node_key,
2000                                                       path->slots[*level]);
2001                                 btrfs_add_corrupt_extent_record(root->fs_info,
2002                                                 &node_key,
2003                                                 path->nodes[*level]->start,
2004                                                 root->nodesize, *level);
2005                                 err = -EIO;
2006                                 goto out;
2007                         }
2008                 }
2009
2010                 ret = check_child_node(root, cur, path->slots[*level], next);
2011                 if (ret) {
2012                         err = ret;
2013                         goto out;
2014                 }
2015
2016                 if (btrfs_is_leaf(next))
2017                         status = btrfs_check_leaf(root, NULL, next);
2018                 else
2019                         status = btrfs_check_node(root, NULL, next);
2020                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2021                         free_extent_buffer(next);
2022                         err = -EIO;
2023                         goto out;
2024                 }
2025
2026                 *level = *level - 1;
2027                 free_extent_buffer(path->nodes[*level]);
2028                 path->nodes[*level] = next;
2029                 path->slots[*level] = 0;
2030         }
2031 out:
2032         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2033         return err;
2034 }
2035
2036 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2037                         struct walk_control *wc, int *level)
2038 {
2039         int i;
2040         struct extent_buffer *leaf;
2041
2042         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2043                 leaf = path->nodes[i];
2044                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2045                         path->slots[i]++;
2046                         *level = i;
2047                         return 0;
2048                 } else {
2049                         free_extent_buffer(path->nodes[*level]);
2050                         path->nodes[*level] = NULL;
2051                         BUG_ON(*level > wc->active_node);
2052                         if (*level == wc->active_node)
2053                                 leave_shared_node(root, wc, *level);
2054                         *level = i + 1;
2055                 }
2056         }
2057         return 1;
2058 }
2059
2060 static int check_root_dir(struct inode_record *rec)
2061 {
2062         struct inode_backref *backref;
2063         int ret = -1;
2064
2065         if (!rec->found_inode_item || rec->errors)
2066                 goto out;
2067         if (rec->nlink != 1 || rec->found_link != 0)
2068                 goto out;
2069         if (list_empty(&rec->backrefs))
2070                 goto out;
2071         backref = to_inode_backref(rec->backrefs.next);
2072         if (!backref->found_inode_ref)
2073                 goto out;
2074         if (backref->index != 0 || backref->namelen != 2 ||
2075             memcmp(backref->name, "..", 2))
2076                 goto out;
2077         if (backref->found_dir_index || backref->found_dir_item)
2078                 goto out;
2079         ret = 0;
2080 out:
2081         return ret;
2082 }
2083
2084 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2085                               struct btrfs_root *root, struct btrfs_path *path,
2086                               struct inode_record *rec)
2087 {
2088         struct btrfs_inode_item *ei;
2089         struct btrfs_key key;
2090         int ret;
2091
2092         key.objectid = rec->ino;
2093         key.type = BTRFS_INODE_ITEM_KEY;
2094         key.offset = (u64)-1;
2095
2096         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2097         if (ret < 0)
2098                 goto out;
2099         if (ret) {
2100                 if (!path->slots[0]) {
2101                         ret = -ENOENT;
2102                         goto out;
2103                 }
2104                 path->slots[0]--;
2105                 ret = 0;
2106         }
2107         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2108         if (key.objectid != rec->ino) {
2109                 ret = -ENOENT;
2110                 goto out;
2111         }
2112
2113         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2114                             struct btrfs_inode_item);
2115         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2116         btrfs_mark_buffer_dirty(path->nodes[0]);
2117         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2118         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2119                root->root_key.objectid);
2120 out:
2121         btrfs_release_path(path);
2122         return ret;
2123 }
2124
2125 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2126                                     struct btrfs_root *root,
2127                                     struct btrfs_path *path,
2128                                     struct inode_record *rec)
2129 {
2130         int ret;
2131
2132         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2133         btrfs_release_path(path);
2134         if (!ret)
2135                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2136         return ret;
2137 }
2138
2139 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2140                                struct btrfs_root *root,
2141                                struct btrfs_path *path,
2142                                struct inode_record *rec)
2143 {
2144         struct btrfs_inode_item *ei;
2145         struct btrfs_key key;
2146         int ret = 0;
2147
2148         key.objectid = rec->ino;
2149         key.type = BTRFS_INODE_ITEM_KEY;
2150         key.offset = 0;
2151
2152         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2153         if (ret) {
2154                 if (ret > 0)
2155                         ret = -ENOENT;
2156                 goto out;
2157         }
2158
2159         /* Since ret == 0, no need to check anything */
2160         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2161                             struct btrfs_inode_item);
2162         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2163         btrfs_mark_buffer_dirty(path->nodes[0]);
2164         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2165         printf("reset nbytes for ino %llu root %llu\n",
2166                rec->ino, root->root_key.objectid);
2167 out:
2168         btrfs_release_path(path);
2169         return ret;
2170 }
2171
2172 static int add_missing_dir_index(struct btrfs_root *root,
2173                                  struct cache_tree *inode_cache,
2174                                  struct inode_record *rec,
2175                                  struct inode_backref *backref)
2176 {
2177         struct btrfs_path *path;
2178         struct btrfs_trans_handle *trans;
2179         struct btrfs_dir_item *dir_item;
2180         struct extent_buffer *leaf;
2181         struct btrfs_key key;
2182         struct btrfs_disk_key disk_key;
2183         struct inode_record *dir_rec;
2184         unsigned long name_ptr;
2185         u32 data_size = sizeof(*dir_item) + backref->namelen;
2186         int ret;
2187
2188         path = btrfs_alloc_path();
2189         if (!path)
2190                 return -ENOMEM;
2191
2192         trans = btrfs_start_transaction(root, 1);
2193         if (IS_ERR(trans)) {
2194                 btrfs_free_path(path);
2195                 return PTR_ERR(trans);
2196         }
2197
2198         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2199                 (unsigned long long)rec->ino);
2200         key.objectid = backref->dir;
2201         key.type = BTRFS_DIR_INDEX_KEY;
2202         key.offset = backref->index;
2203
2204         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2205         BUG_ON(ret);
2206
2207         leaf = path->nodes[0];
2208         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2209
2210         disk_key.objectid = cpu_to_le64(rec->ino);
2211         disk_key.type = BTRFS_INODE_ITEM_KEY;
2212         disk_key.offset = 0;
2213
2214         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2215         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2216         btrfs_set_dir_data_len(leaf, dir_item, 0);
2217         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2218         name_ptr = (unsigned long)(dir_item + 1);
2219         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2220         btrfs_mark_buffer_dirty(leaf);
2221         btrfs_free_path(path);
2222         btrfs_commit_transaction(trans, root);
2223
2224         backref->found_dir_index = 1;
2225         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2226         BUG_ON(IS_ERR(dir_rec));
2227         if (!dir_rec)
2228                 return 0;
2229         dir_rec->found_size += backref->namelen;
2230         if (dir_rec->found_size == dir_rec->isize &&
2231             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2232                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2233         if (dir_rec->found_size != dir_rec->isize)
2234                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2235
2236         return 0;
2237 }
2238
2239 static int delete_dir_index(struct btrfs_root *root,
2240                             struct cache_tree *inode_cache,
2241                             struct inode_record *rec,
2242                             struct inode_backref *backref)
2243 {
2244         struct btrfs_trans_handle *trans;
2245         struct btrfs_dir_item *di;
2246         struct btrfs_path *path;
2247         int ret = 0;
2248
2249         path = btrfs_alloc_path();
2250         if (!path)
2251                 return -ENOMEM;
2252
2253         trans = btrfs_start_transaction(root, 1);
2254         if (IS_ERR(trans)) {
2255                 btrfs_free_path(path);
2256                 return PTR_ERR(trans);
2257         }
2258
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2266                                     backref->name, backref->namelen,
2267                                     backref->index, -1);
2268         if (IS_ERR(di)) {
2269                 ret = PTR_ERR(di);
2270                 btrfs_free_path(path);
2271                 btrfs_commit_transaction(trans, root);
2272                 if (ret == -ENOENT)
2273                         return 0;
2274                 return ret;
2275         }
2276
2277         if (!di)
2278                 ret = btrfs_del_item(trans, root, path);
2279         else
2280                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2281         BUG_ON(ret);
2282         btrfs_free_path(path);
2283         btrfs_commit_transaction(trans, root);
2284         return ret;
2285 }
2286
2287 static int create_inode_item(struct btrfs_root *root,
2288                              struct inode_record *rec,
2289                              struct inode_backref *backref, int root_dir)
2290 {
2291         struct btrfs_trans_handle *trans;
2292         struct btrfs_inode_item inode_item;
2293         time_t now = time(NULL);
2294         int ret;
2295
2296         trans = btrfs_start_transaction(root, 1);
2297         if (IS_ERR(trans)) {
2298                 ret = PTR_ERR(trans);
2299                 return ret;
2300         }
2301
2302         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2303                 "be incomplete, please check permissions and content after "
2304                 "the fsck completes.\n", (unsigned long long)root->objectid,
2305                 (unsigned long long)rec->ino);
2306
2307         memset(&inode_item, 0, sizeof(inode_item));
2308         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2309         if (root_dir)
2310                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2311         else
2312                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2313         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2314         if (rec->found_dir_item) {
2315                 if (rec->found_file_extent)
2316                         fprintf(stderr, "root %llu inode %llu has both a dir "
2317                                 "item and extents, unsure if it is a dir or a "
2318                                 "regular file so setting it as a directory\n",
2319                                 (unsigned long long)root->objectid,
2320                                 (unsigned long long)rec->ino);
2321                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2322                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2323         } else if (!rec->found_dir_item) {
2324                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2325                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2326         }
2327         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2328         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2329         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2334         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2335
2336         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2337         BUG_ON(ret);
2338         btrfs_commit_transaction(trans, root);
2339         return 0;
2340 }
2341
2342 static int repair_inode_backrefs(struct btrfs_root *root,
2343                                  struct inode_record *rec,
2344                                  struct cache_tree *inode_cache,
2345                                  int delete)
2346 {
2347         struct inode_backref *tmp, *backref;
2348         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2349         int ret = 0;
2350         int repaired = 0;
2351
2352         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2353                 if (!delete && rec->ino == root_dirid) {
2354                         if (!rec->found_inode_item) {
2355                                 ret = create_inode_item(root, rec, backref, 1);
2356                                 if (ret)
2357                                         break;
2358                                 repaired++;
2359                         }
2360                 }
2361
2362                 /* Index 0 for root dir's are special, don't mess with it */
2363                 if (rec->ino == root_dirid && backref->index == 0)
2364                         continue;
2365
2366                 if (delete &&
2367                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2368                      (backref->found_dir_index && backref->found_inode_ref &&
2369                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2370                         ret = delete_dir_index(root, inode_cache, rec, backref);
2371                         if (ret)
2372                                 break;
2373                         repaired++;
2374                         list_del(&backref->list);
2375                         free(backref);
2376                 }
2377
2378                 if (!delete && !backref->found_dir_index &&
2379                     backref->found_dir_item && backref->found_inode_ref) {
2380                         ret = add_missing_dir_index(root, inode_cache, rec,
2381                                                     backref);
2382                         if (ret)
2383                                 break;
2384                         repaired++;
2385                         if (backref->found_dir_item &&
2386                             backref->found_dir_index &&
2387                             backref->found_dir_index) {
2388                                 if (!backref->errors &&
2389                                     backref->found_inode_ref) {
2390                                         list_del(&backref->list);
2391                                         free(backref);
2392                                 }
2393                         }
2394                 }
2395
2396                 if (!delete && (!backref->found_dir_index &&
2397                                 !backref->found_dir_item &&
2398                                 backref->found_inode_ref)) {
2399                         struct btrfs_trans_handle *trans;
2400                         struct btrfs_key location;
2401
2402                         ret = check_dir_conflict(root, backref->name,
2403                                                  backref->namelen,
2404                                                  backref->dir,
2405                                                  backref->index);
2406                         if (ret) {
2407                                 /*
2408                                  * let nlink fixing routine to handle it,
2409                                  * which can do it better.
2410                                  */
2411                                 ret = 0;
2412                                 break;
2413                         }
2414                         location.objectid = rec->ino;
2415                         location.type = BTRFS_INODE_ITEM_KEY;
2416                         location.offset = 0;
2417
2418                         trans = btrfs_start_transaction(root, 1);
2419                         if (IS_ERR(trans)) {
2420                                 ret = PTR_ERR(trans);
2421                                 break;
2422                         }
2423                         fprintf(stderr, "adding missing dir index/item pair "
2424                                 "for inode %llu\n",
2425                                 (unsigned long long)rec->ino);
2426                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2427                                                     backref->namelen,
2428                                                     backref->dir, &location,
2429                                                     imode_to_type(rec->imode),
2430                                                     backref->index);
2431                         BUG_ON(ret);
2432                         btrfs_commit_transaction(trans, root);
2433                         repaired++;
2434                 }
2435
2436                 if (!delete && (backref->found_inode_ref &&
2437                                 backref->found_dir_index &&
2438                                 backref->found_dir_item &&
2439                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2440                                 !rec->found_inode_item)) {
2441                         ret = create_inode_item(root, rec, backref, 0);
2442                         if (ret)
2443                                 break;
2444                         repaired++;
2445                 }
2446
2447         }
2448         return ret ? ret : repaired;
2449 }
2450
2451 /*
2452  * To determine the file type for nlink/inode_item repair
2453  *
2454  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2455  * Return -ENOENT if file type is not found.
2456  */
2457 static int find_file_type(struct inode_record *rec, u8 *type)
2458 {
2459         struct inode_backref *backref;
2460
2461         /* For inode item recovered case */
2462         if (rec->found_inode_item) {
2463                 *type = imode_to_type(rec->imode);
2464                 return 0;
2465         }
2466
2467         list_for_each_entry(backref, &rec->backrefs, list) {
2468                 if (backref->found_dir_index || backref->found_dir_item) {
2469                         *type = backref->filetype;
2470                         return 0;
2471                 }
2472         }
2473         return -ENOENT;
2474 }
2475
2476 /*
2477  * To determine the file name for nlink repair
2478  *
2479  * Return 0 if file name is found, set name and namelen.
2480  * Return -ENOENT if file name is not found.
2481  */
2482 static int find_file_name(struct inode_record *rec,
2483                           char *name, int *namelen)
2484 {
2485         struct inode_backref *backref;
2486
2487         list_for_each_entry(backref, &rec->backrefs, list) {
2488                 if (backref->found_dir_index || backref->found_dir_item ||
2489                     backref->found_inode_ref) {
2490                         memcpy(name, backref->name, backref->namelen);
2491                         *namelen = backref->namelen;
2492                         return 0;
2493                 }
2494         }
2495         return -ENOENT;
2496 }
2497
2498 /* Reset the nlink of the inode to the correct one */
2499 static int reset_nlink(struct btrfs_trans_handle *trans,
2500                        struct btrfs_root *root,
2501                        struct btrfs_path *path,
2502                        struct inode_record *rec)
2503 {
2504         struct inode_backref *backref;
2505         struct inode_backref *tmp;
2506         struct btrfs_key key;
2507         struct btrfs_inode_item *inode_item;
2508         int ret = 0;
2509
2510         /* We don't believe this either, reset it and iterate backref */
2511         rec->found_link = 0;
2512
2513         /* Remove all backref including the valid ones */
2514         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2515                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2516                                    backref->index, backref->name,
2517                                    backref->namelen, 0);
2518                 if (ret < 0)
2519                         goto out;
2520
2521                 /* remove invalid backref, so it won't be added back */
2522                 if (!(backref->found_dir_index &&
2523                       backref->found_dir_item &&
2524                       backref->found_inode_ref)) {
2525                         list_del(&backref->list);
2526                         free(backref);
2527                 } else {
2528                         rec->found_link++;
2529                 }
2530         }
2531
2532         /* Set nlink to 0 */
2533         key.objectid = rec->ino;
2534         key.type = BTRFS_INODE_ITEM_KEY;
2535         key.offset = 0;
2536         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2537         if (ret < 0)
2538                 goto out;
2539         if (ret > 0) {
2540                 ret = -ENOENT;
2541                 goto out;
2542         }
2543         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2544                                     struct btrfs_inode_item);
2545         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2546         btrfs_mark_buffer_dirty(path->nodes[0]);
2547         btrfs_release_path(path);
2548
2549         /*
2550          * Add back valid inode_ref/dir_item/dir_index,
2551          * add_link() will handle the nlink inc, so new nlink must be correct
2552          */
2553         list_for_each_entry(backref, &rec->backrefs, list) {
2554                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2555                                      backref->name, backref->namelen,
2556                                      backref->filetype, &backref->index, 1);
2557                 if (ret < 0)
2558                         goto out;
2559         }
2560 out:
2561         btrfs_release_path(path);
2562         return ret;
2563 }
2564
2565 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2566                                struct btrfs_root *root,
2567                                struct btrfs_path *path,
2568                                struct inode_record *rec)
2569 {
2570         char *dir_name = "lost+found";
2571         char namebuf[BTRFS_NAME_LEN] = {0};
2572         u64 lost_found_ino;
2573         u32 mode = 0700;
2574         u8 type = 0;
2575         int namelen = 0;
2576         int name_recovered = 0;
2577         int type_recovered = 0;
2578         int ret = 0;
2579
2580         /*
2581          * Get file name and type first before these invalid inode ref
2582          * are deleted by remove_all_invalid_backref()
2583          */
2584         name_recovered = !find_file_name(rec, namebuf, &namelen);
2585         type_recovered = !find_file_type(rec, &type);
2586
2587         if (!name_recovered) {
2588                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2589                        rec->ino, rec->ino);
2590                 namelen = count_digits(rec->ino);
2591                 sprintf(namebuf, "%llu", rec->ino);
2592                 name_recovered = 1;
2593         }
2594         if (!type_recovered) {
2595                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2596                        rec->ino);
2597                 type = BTRFS_FT_REG_FILE;
2598                 type_recovered = 1;
2599         }
2600
2601         ret = reset_nlink(trans, root, path, rec);
2602         if (ret < 0) {
2603                 fprintf(stderr,
2604                         "Failed to reset nlink for inode %llu: %s\n",
2605                         rec->ino, strerror(-ret));
2606                 goto out;
2607         }
2608
2609         if (rec->found_link == 0) {
2610                 lost_found_ino = root->highest_inode;
2611                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2612                         ret = -EOVERFLOW;
2613                         goto out;
2614                 }
2615                 lost_found_ino++;
2616                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2617                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2618                                   mode);
2619                 if (ret < 0) {
2620                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2621                                 dir_name, strerror(-ret));
2622                         goto out;
2623                 }
2624                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2625                                      namebuf, namelen, type, NULL, 1);
2626                 /*
2627                  * Add ".INO" suffix several times to handle case where
2628                  * "FILENAME.INO" is already taken by another file.
2629                  */
2630                 while (ret == -EEXIST) {
2631                         /*
2632                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2633                          */
2634                         if (namelen + count_digits(rec->ino) + 1 >
2635                             BTRFS_NAME_LEN) {
2636                                 ret = -EFBIG;
2637                                 goto out;
2638                         }
2639                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2640                                  ".%llu", rec->ino);
2641                         namelen += count_digits(rec->ino) + 1;
2642                         ret = btrfs_add_link(trans, root, rec->ino,
2643                                              lost_found_ino, namebuf,
2644                                              namelen, type, NULL, 1);
2645                 }
2646                 if (ret < 0) {
2647                         fprintf(stderr,
2648                                 "Failed to link the inode %llu to %s dir: %s\n",
2649                                 rec->ino, dir_name, strerror(-ret));
2650                         goto out;
2651                 }
2652                 /*
2653                  * Just increase the found_link, don't actually add the
2654                  * backref. This will make things easier and this inode
2655                  * record will be freed after the repair is done.
2656                  * So fsck will not report problem about this inode.
2657                  */
2658                 rec->found_link++;
2659                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2660                        namelen, namebuf, dir_name);
2661         }
2662         printf("Fixed the nlink of inode %llu\n", rec->ino);
2663 out:
2664         /*
2665          * Clear the flag anyway, or we will loop forever for the same inode
2666          * as it will not be removed from the bad inode list and the dead loop
2667          * happens.
2668          */
2669         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2670         btrfs_release_path(path);
2671         return ret;
2672 }
2673
2674 /*
2675  * Check if there is any normal(reg or prealloc) file extent for given
2676  * ino.
2677  * This is used to determine the file type when neither its dir_index/item or
2678  * inode_item exists.
2679  *
2680  * This will *NOT* report error, if any error happens, just consider it does
2681  * not have any normal file extent.
2682  */
2683 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2684 {
2685         struct btrfs_path *path;
2686         struct btrfs_key key;
2687         struct btrfs_key found_key;
2688         struct btrfs_file_extent_item *fi;
2689         u8 type;
2690         int ret = 0;
2691
2692         path = btrfs_alloc_path();
2693         if (!path)
2694                 goto out;
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2705                 ret = btrfs_next_leaf(root, path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2713                                       path->slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path->nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_free_path(path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path *path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         path = btrfs_alloc_path();
2930         if (!path)
2931                 return -ENOMEM;
2932
2933         /*
2934          * For nlink repair, it may create a dir and add link, so
2935          * 2 for parent(256)'s dir_index and dir_item
2936          * 2 for lost+found dir's inode_item and inode_ref
2937          * 1 for the new inode_ref of the file
2938          * 2 for lost+found dir's dir_index and dir_item for the file
2939          */
2940         trans = btrfs_start_transaction(root, 7);
2941         if (IS_ERR(trans)) {
2942                 btrfs_free_path(path);
2943                 return PTR_ERR(trans);
2944         }
2945
2946         if (rec->errors & I_ERR_NO_INODE_ITEM)
2947                 ret = repair_inode_no_item(trans, root, path, rec);
2948         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2949                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2950         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2951                 ret = repair_inode_discount_extent(trans, root, path, rec);
2952         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2953                 ret = repair_inode_isize(trans, root, path, rec);
2954         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2955                 ret = repair_inode_orphan_item(trans, root, path, rec);
2956         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2957                 ret = repair_inode_nlinks(trans, root, path, rec);
2958         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2959                 ret = repair_inode_nbytes(trans, root, path, rec);
2960         btrfs_commit_transaction(trans, root);
2961         btrfs_free_path(path);
2962         return ret;
2963 }
2964
2965 static int check_inode_recs(struct btrfs_root *root,
2966                             struct cache_tree *inode_cache)
2967 {
2968         struct cache_extent *cache;
2969         struct ptr_node *node;
2970         struct inode_record *rec;
2971         struct inode_backref *backref;
2972         int stage = 0;
2973         int ret = 0;
2974         int err = 0;
2975         u64 error = 0;
2976         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2977
2978         if (btrfs_root_refs(&root->root_item) == 0) {
2979                 if (!cache_tree_empty(inode_cache))
2980                         fprintf(stderr, "warning line %d\n", __LINE__);
2981                 return 0;
2982         }
2983
2984         /*
2985          * We need to record the highest inode number for later 'lost+found'
2986          * dir creation.
2987          * We must select an ino not used/referred by any existing inode, or
2988          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2989          * this may cause 'lost+found' dir has wrong nlinks.
2990          */
2991         cache = last_cache_extent(inode_cache);
2992         if (cache) {
2993                 node = container_of(cache, struct ptr_node, cache);
2994                 rec = node->data;
2995                 if (rec->ino > root->highest_inode)
2996                         root->highest_inode = rec->ino;
2997         }
2998
2999         /*
3000          * We need to repair backrefs first because we could change some of the
3001          * errors in the inode recs.
3002          *
3003          * We also need to go through and delete invalid backrefs first and then
3004          * add the correct ones second.  We do this because we may get EEXIST
3005          * when adding back the correct index because we hadn't yet deleted the
3006          * invalid index.
3007          *
3008          * For example, if we were missing a dir index then the directories
3009          * isize would be wrong, so if we fixed the isize to what we thought it
3010          * would be and then fixed the backref we'd still have a invalid fs, so
3011          * we need to add back the dir index and then check to see if the isize
3012          * is still wrong.
3013          */
3014         while (stage < 3) {
3015                 stage++;
3016                 if (stage == 3 && !err)
3017                         break;
3018
3019                 cache = search_cache_extent(inode_cache, 0);
3020                 while (repair && cache) {
3021                         node = container_of(cache, struct ptr_node, cache);
3022                         rec = node->data;
3023                         cache = next_cache_extent(cache);
3024
3025                         /* Need to free everything up and rescan */
3026                         if (stage == 3) {
3027                                 remove_cache_extent(inode_cache, &node->cache);
3028                                 free(node);
3029                                 free_inode_rec(rec);
3030                                 continue;
3031                         }
3032
3033                         if (list_empty(&rec->backrefs))
3034                                 continue;
3035
3036                         ret = repair_inode_backrefs(root, rec, inode_cache,
3037                                                     stage == 1);
3038                         if (ret < 0) {
3039                                 err = ret;
3040                                 stage = 2;
3041                                 break;
3042                         } if (ret > 0) {
3043                                 err = -EAGAIN;
3044                         }
3045                 }
3046         }
3047         if (err)
3048                 return err;
3049
3050         rec = get_inode_rec(inode_cache, root_dirid, 0);
3051         BUG_ON(IS_ERR(rec));
3052         if (rec) {
3053                 ret = check_root_dir(rec);
3054                 if (ret) {
3055                         fprintf(stderr, "root %llu root dir %llu error\n",
3056                                 (unsigned long long)root->root_key.objectid,
3057                                 (unsigned long long)root_dirid);
3058                         print_inode_error(root, rec);
3059                         error++;
3060                 }
3061         } else {
3062                 if (repair) {
3063                         struct btrfs_trans_handle *trans;
3064
3065                         trans = btrfs_start_transaction(root, 1);
3066                         if (IS_ERR(trans)) {
3067                                 err = PTR_ERR(trans);
3068                                 return err;
3069                         }
3070
3071                         fprintf(stderr,
3072                                 "root %llu missing its root dir, recreating\n",
3073                                 (unsigned long long)root->objectid);
3074
3075                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3076                         BUG_ON(ret);
3077
3078                         btrfs_commit_transaction(trans, root);
3079                         return -EAGAIN;
3080                 }
3081
3082                 fprintf(stderr, "root %llu root dir %llu not found\n",
3083                         (unsigned long long)root->root_key.objectid,
3084                         (unsigned long long)root_dirid);
3085         }
3086
3087         while (1) {
3088                 cache = search_cache_extent(inode_cache, 0);
3089                 if (!cache)
3090                         break;
3091                 node = container_of(cache, struct ptr_node, cache);
3092                 rec = node->data;
3093                 remove_cache_extent(inode_cache, &node->cache);
3094                 free(node);
3095                 if (rec->ino == root_dirid ||
3096                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3097                         free_inode_rec(rec);
3098                         continue;
3099                 }
3100
3101                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3102                         ret = check_orphan_item(root, rec->ino);
3103                         if (ret == 0)
3104                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3105                         if (can_free_inode_rec(rec)) {
3106                                 free_inode_rec(rec);
3107                                 continue;
3108                         }
3109                 }
3110
3111                 if (!rec->found_inode_item)
3112                         rec->errors |= I_ERR_NO_INODE_ITEM;
3113                 if (rec->found_link != rec->nlink)
3114                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3115                 if (repair) {
3116                         ret = try_repair_inode(root, rec);
3117                         if (ret == 0 && can_free_inode_rec(rec)) {
3118                                 free_inode_rec(rec);
3119                                 continue;
3120                         }
3121                         ret = 0;
3122                 }
3123
3124                 if (!(repair && ret == 0))
3125                         error++;
3126                 print_inode_error(root, rec);
3127                 list_for_each_entry(backref, &rec->backrefs, list) {
3128                         if (!backref->found_dir_item)
3129                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3130                         if (!backref->found_dir_index)
3131                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3132                         if (!backref->found_inode_ref)
3133                                 backref->errors |= REF_ERR_NO_INODE_REF;
3134                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3135                                 " namelen %u name %s filetype %d errors %x",
3136                                 (unsigned long long)backref->dir,
3137                                 (unsigned long long)backref->index,
3138                                 backref->namelen, backref->name,
3139                                 backref->filetype, backref->errors);
3140                         print_ref_error(backref->errors);
3141                 }
3142                 free_inode_rec(rec);
3143         }
3144         return (error > 0) ? -1 : 0;
3145 }
3146
3147 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3148                                         u64 objectid)
3149 {
3150         struct cache_extent *cache;
3151         struct root_record *rec = NULL;
3152         int ret;
3153
3154         cache = lookup_cache_extent(root_cache, objectid, 1);
3155         if (cache) {
3156                 rec = container_of(cache, struct root_record, cache);
3157         } else {
3158                 rec = calloc(1, sizeof(*rec));
3159                 if (!rec)
3160                         return ERR_PTR(-ENOMEM);
3161                 rec->objectid = objectid;
3162                 INIT_LIST_HEAD(&rec->backrefs);
3163                 rec->cache.start = objectid;
3164                 rec->cache.size = 1;
3165
3166                 ret = insert_cache_extent(root_cache, &rec->cache);
3167                 if (ret)
3168                         return ERR_PTR(-EEXIST);
3169         }
3170         return rec;
3171 }
3172
3173 static struct root_backref *get_root_backref(struct root_record *rec,
3174                                              u64 ref_root, u64 dir, u64 index,
3175                                              const char *name, int namelen)
3176 {
3177         struct root_backref *backref;
3178
3179         list_for_each_entry(backref, &rec->backrefs, list) {
3180                 if (backref->ref_root != ref_root || backref->dir != dir ||
3181                     backref->namelen != namelen)
3182                         continue;
3183                 if (memcmp(name, backref->name, namelen))
3184                         continue;
3185                 return backref;
3186         }
3187
3188         backref = calloc(1, sizeof(*backref) + namelen + 1);
3189         if (!backref)
3190                 return NULL;
3191         backref->ref_root = ref_root;
3192         backref->dir = dir;
3193         backref->index = index;
3194         backref->namelen = namelen;
3195         memcpy(backref->name, name, namelen);
3196         backref->name[namelen] = '\0';
3197         list_add_tail(&backref->list, &rec->backrefs);
3198         return backref;
3199 }
3200
3201 static void free_root_record(struct cache_extent *cache)
3202 {
3203         struct root_record *rec;
3204         struct root_backref *backref;
3205
3206         rec = container_of(cache, struct root_record, cache);
3207         while (!list_empty(&rec->backrefs)) {
3208                 backref = to_root_backref(rec->backrefs.next);
3209                 list_del(&backref->list);
3210                 free(backref);
3211         }
3212
3213         kfree(rec);
3214 }
3215
3216 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3217
3218 static int add_root_backref(struct cache_tree *root_cache,
3219                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3220                             const char *name, int namelen,
3221                             int item_type, int errors)
3222 {
3223         struct root_record *rec;
3224         struct root_backref *backref;
3225
3226         rec = get_root_rec(root_cache, root_id);
3227         BUG_ON(IS_ERR(rec));
3228         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3229         BUG_ON(!backref);
3230
3231         backref->errors |= errors;
3232
3233         if (item_type != BTRFS_DIR_ITEM_KEY) {
3234                 if (backref->found_dir_index || backref->found_back_ref ||
3235                     backref->found_forward_ref) {
3236                         if (backref->index != index)
3237                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3238                 } else {
3239                         backref->index = index;
3240                 }
3241         }
3242
3243         if (item_type == BTRFS_DIR_ITEM_KEY) {
3244                 if (backref->found_forward_ref)
3245                         rec->found_ref++;
3246                 backref->found_dir_item = 1;
3247         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3248                 backref->found_dir_index = 1;
3249         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3250                 if (backref->found_forward_ref)
3251                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3252                 else if (backref->found_dir_item)
3253                         rec->found_ref++;
3254                 backref->found_forward_ref = 1;
3255         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3256                 if (backref->found_back_ref)
3257                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3258                 backref->found_back_ref = 1;
3259         } else {
3260                 BUG_ON(1);
3261         }
3262
3263         if (backref->found_forward_ref && backref->found_dir_item)
3264                 backref->reachable = 1;
3265         return 0;
3266 }
3267
3268 static int merge_root_recs(struct btrfs_root *root,
3269                            struct cache_tree *src_cache,
3270                            struct cache_tree *dst_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int ret = 0;
3277
3278         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3279                 free_inode_recs_tree(src_cache);
3280                 return 0;
3281         }
3282
3283         while (1) {
3284                 cache = search_cache_extent(src_cache, 0);
3285                 if (!cache)
3286                         break;
3287                 node = container_of(cache, struct ptr_node, cache);
3288                 rec = node->data;
3289                 remove_cache_extent(src_cache, &node->cache);
3290                 free(node);
3291
3292                 ret = is_child_root(root, root->objectid, rec->ino);
3293                 if (ret < 0)
3294                         break;
3295                 else if (ret == 0)
3296                         goto skip;
3297
3298                 list_for_each_entry(backref, &rec->backrefs, list) {
3299                         BUG_ON(backref->found_inode_ref);
3300                         if (backref->found_dir_item)
3301                                 add_root_backref(dst_cache, rec->ino,
3302                                         root->root_key.objectid, backref->dir,
3303                                         backref->index, backref->name,
3304                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3305                                         backref->errors);
3306                         if (backref->found_dir_index)
3307                                 add_root_backref(dst_cache, rec->ino,
3308                                         root->root_key.objectid, backref->dir,
3309                                         backref->index, backref->name,
3310                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3311                                         backref->errors);
3312                 }
3313 skip:
3314                 free_inode_rec(rec);
3315         }
3316         if (ret < 0)
3317                 return ret;
3318         return 0;
3319 }
3320
3321 static int check_root_refs(struct btrfs_root *root,
3322                            struct cache_tree *root_cache)
3323 {
3324         struct root_record *rec;
3325         struct root_record *ref_root;
3326         struct root_backref *backref;
3327         struct cache_extent *cache;
3328         int loop = 1;
3329         int ret;
3330         int error;
3331         int errors = 0;
3332
3333         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3334         BUG_ON(IS_ERR(rec));
3335         rec->found_ref = 1;
3336
3337         /* fixme: this can not detect circular references */
3338         while (loop) {
3339                 loop = 0;
3340                 cache = search_cache_extent(root_cache, 0);
3341                 while (1) {
3342                         if (!cache)
3343                                 break;
3344                         rec = container_of(cache, struct root_record, cache);
3345                         cache = next_cache_extent(cache);
3346
3347                         if (rec->found_ref == 0)
3348                                 continue;
3349
3350                         list_for_each_entry(backref, &rec->backrefs, list) {
3351                                 if (!backref->reachable)
3352                                         continue;
3353
3354                                 ref_root = get_root_rec(root_cache,
3355                                                         backref->ref_root);
3356                                 BUG_ON(IS_ERR(ref_root));
3357                                 if (ref_root->found_ref > 0)
3358                                         continue;
3359
3360                                 backref->reachable = 0;
3361                                 rec->found_ref--;
3362                                 if (rec->found_ref == 0)
3363                                         loop = 1;
3364                         }
3365                 }
3366         }
3367
3368         cache = search_cache_extent(root_cache, 0);
3369         while (1) {
3370                 if (!cache)
3371                         break;
3372                 rec = container_of(cache, struct root_record, cache);
3373                 cache = next_cache_extent(cache);
3374
3375                 if (rec->found_ref == 0 &&
3376                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3377                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3378                         ret = check_orphan_item(root->fs_info->tree_root,
3379                                                 rec->objectid);
3380                         if (ret == 0)
3381                                 continue;
3382
3383                         /*
3384                          * If we don't have a root item then we likely just have
3385                          * a dir item in a snapshot for this root but no actual
3386                          * ref key or anything so it's meaningless.
3387                          */
3388                         if (!rec->found_root_item)
3389                                 continue;
3390                         errors++;
3391                         fprintf(stderr, "fs tree %llu not referenced\n",
3392                                 (unsigned long long)rec->objectid);
3393                 }
3394
3395                 error = 0;
3396                 if (rec->found_ref > 0 && !rec->found_root_item)
3397                         error = 1;
3398                 list_for_each_entry(backref, &rec->backrefs, list) {
3399                         if (!backref->found_dir_item)
3400                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3401                         if (!backref->found_dir_index)
3402                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3403                         if (!backref->found_back_ref)
3404                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3405                         if (!backref->found_forward_ref)
3406                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3407                         if (backref->reachable && backref->errors)
3408                                 error = 1;
3409                 }
3410                 if (!error)
3411                         continue;
3412
3413                 errors++;
3414                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3415                         (unsigned long long)rec->objectid, rec->found_ref,
3416                          rec->found_root_item ? "" : "not found");
3417
3418                 list_for_each_entry(backref, &rec->backrefs, list) {
3419                         if (!backref->reachable)
3420                                 continue;
3421                         if (!backref->errors && rec->found_root_item)
3422                                 continue;
3423                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3424                                 " index %llu namelen %u name %s errors %x\n",
3425                                 (unsigned long long)backref->ref_root,
3426                                 (unsigned long long)backref->dir,
3427                                 (unsigned long long)backref->index,
3428                                 backref->namelen, backref->name,
3429                                 backref->errors);
3430                         print_ref_error(backref->errors);
3431                 }
3432         }
3433         return errors > 0 ? 1 : 0;
3434 }
3435
3436 static int process_root_ref(struct extent_buffer *eb, int slot,
3437                             struct btrfs_key *key,
3438                             struct cache_tree *root_cache)
3439 {
3440         u64 dirid;
3441         u64 index;
3442         u32 len;
3443         u32 name_len;
3444         struct btrfs_root_ref *ref;
3445         char namebuf[BTRFS_NAME_LEN];
3446         int error;
3447
3448         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3449
3450         dirid = btrfs_root_ref_dirid(eb, ref);
3451         index = btrfs_root_ref_sequence(eb, ref);
3452         name_len = btrfs_root_ref_name_len(eb, ref);
3453
3454         if (name_len <= BTRFS_NAME_LEN) {
3455                 len = name_len;
3456                 error = 0;
3457         } else {
3458                 len = BTRFS_NAME_LEN;
3459                 error = REF_ERR_NAME_TOO_LONG;
3460         }
3461         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3462
3463         if (key->type == BTRFS_ROOT_REF_KEY) {
3464                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3465                                  index, namebuf, len, key->type, error);
3466         } else {
3467                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3468                                  index, namebuf, len, key->type, error);
3469         }
3470         return 0;
3471 }
3472
3473 static void free_corrupt_block(struct cache_extent *cache)
3474 {
3475         struct btrfs_corrupt_block *corrupt;
3476
3477         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3478         free(corrupt);
3479 }
3480
3481 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3482
3483 /*
3484  * Repair the btree of the given root.
3485  *
3486  * The fix is to remove the node key in corrupt_blocks cache_tree.
3487  * and rebalance the tree.
3488  * After the fix, the btree should be writeable.
3489  */
3490 static int repair_btree(struct btrfs_root *root,
3491                         struct cache_tree *corrupt_blocks)
3492 {
3493         struct btrfs_trans_handle *trans;
3494         struct btrfs_path *path;
3495         struct btrfs_corrupt_block *corrupt;
3496         struct cache_extent *cache;
3497         struct btrfs_key key;
3498         u64 offset;
3499         int level;
3500         int ret = 0;
3501
3502         if (cache_tree_empty(corrupt_blocks))
3503                 return 0;
3504
3505         path = btrfs_alloc_path();
3506         if (!path)
3507                 return -ENOMEM;
3508
3509         trans = btrfs_start_transaction(root, 1);
3510         if (IS_ERR(trans)) {
3511                 ret = PTR_ERR(trans);
3512                 fprintf(stderr, "Error starting transaction: %s\n",
3513                         strerror(-ret));
3514                 goto out_free_path;
3515         }
3516         cache = first_cache_extent(corrupt_blocks);
3517         while (cache) {
3518                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3519                                        cache);
3520                 level = corrupt->level;
3521                 path->lowest_level = level;
3522                 key.objectid = corrupt->key.objectid;
3523                 key.type = corrupt->key.type;
3524                 key.offset = corrupt->key.offset;
3525
3526                 /*
3527                  * Here we don't want to do any tree balance, since it may
3528                  * cause a balance with corrupted brother leaf/node,
3529                  * so ins_len set to 0 here.
3530                  * Balance will be done after all corrupt node/leaf is deleted.
3531                  */
3532                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3533                 if (ret < 0)
3534                         goto out;
3535                 offset = btrfs_node_blockptr(path->nodes[level],
3536                                              path->slots[level]);
3537
3538                 /* Remove the ptr */
3539                 ret = btrfs_del_ptr(trans, root, path, level,
3540                                     path->slots[level]);
3541                 if (ret < 0)
3542                         goto out;
3543                 /*
3544                  * Remove the corresponding extent
3545                  * return value is not concerned.
3546                  */
3547                 btrfs_release_path(path);
3548                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3549                                         0, root->root_key.objectid,
3550                                         level - 1, 0);
3551                 cache = next_cache_extent(cache);
3552         }
3553
3554         /* Balance the btree using btrfs_search_slot() */
3555         cache = first_cache_extent(corrupt_blocks);
3556         while (cache) {
3557                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3558                                        cache);
3559                 memcpy(&key, &corrupt->key, sizeof(key));
3560                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3561                 if (ret < 0)
3562                         goto out;
3563                 /* return will always >0 since it won't find the item */
3564                 ret = 0;
3565                 btrfs_release_path(path);
3566                 cache = next_cache_extent(cache);
3567         }
3568 out:
3569         btrfs_commit_transaction(trans, root);
3570 out_free_path:
3571         btrfs_free_path(path);
3572         return ret;
3573 }
3574
3575 static int check_fs_root(struct btrfs_root *root,
3576                          struct cache_tree *root_cache,
3577                          struct walk_control *wc)
3578 {
3579         int ret = 0;
3580         int err = 0;
3581         int wret;
3582         int level;
3583         struct btrfs_path path;
3584         struct shared_node root_node;
3585         struct root_record *rec;
3586         struct btrfs_root_item *root_item = &root->root_item;
3587         struct cache_tree corrupt_blocks;
3588         struct orphan_data_extent *orphan;
3589         struct orphan_data_extent *tmp;
3590         enum btrfs_tree_block_status status;
3591         struct node_refs nrefs;
3592
3593         /*
3594          * Reuse the corrupt_block cache tree to record corrupted tree block
3595          *
3596          * Unlike the usage in extent tree check, here we do it in a per
3597          * fs/subvol tree base.
3598          */
3599         cache_tree_init(&corrupt_blocks);
3600         root->fs_info->corrupt_blocks = &corrupt_blocks;
3601
3602         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3603                 rec = get_root_rec(root_cache, root->root_key.objectid);
3604                 BUG_ON(IS_ERR(rec));
3605                 if (btrfs_root_refs(root_item) > 0)
3606                         rec->found_root_item = 1;
3607         }
3608
3609         btrfs_init_path(&path);
3610         memset(&root_node, 0, sizeof(root_node));
3611         cache_tree_init(&root_node.root_cache);
3612         cache_tree_init(&root_node.inode_cache);
3613         memset(&nrefs, 0, sizeof(nrefs));
3614
3615         /* Move the orphan extent record to corresponding inode_record */
3616         list_for_each_entry_safe(orphan, tmp,
3617                                  &root->orphan_data_extents, list) {
3618                 struct inode_record *inode;
3619
3620                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3621                                       1);
3622                 BUG_ON(IS_ERR(inode));
3623                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3624                 list_move(&orphan->list, &inode->orphan_extents);
3625         }
3626
3627         level = btrfs_header_level(root->node);
3628         memset(wc->nodes, 0, sizeof(wc->nodes));
3629         wc->nodes[level] = &root_node;
3630         wc->active_node = level;
3631         wc->root_level = level;
3632
3633         /* We may not have checked the root block, lets do that now */
3634         if (btrfs_is_leaf(root->node))
3635                 status = btrfs_check_leaf(root, NULL, root->node);
3636         else
3637                 status = btrfs_check_node(root, NULL, root->node);
3638         if (status != BTRFS_TREE_BLOCK_CLEAN)
3639                 return -EIO;
3640
3641         if (btrfs_root_refs(root_item) > 0 ||
3642             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3643                 path.nodes[level] = root->node;
3644                 extent_buffer_get(root->node);
3645                 path.slots[level] = 0;
3646         } else {
3647                 struct btrfs_key key;
3648                 struct btrfs_disk_key found_key;
3649
3650                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3651                 level = root_item->drop_level;
3652                 path.lowest_level = level;
3653                 if (level > btrfs_header_level(root->node) ||
3654                     level >= BTRFS_MAX_LEVEL) {
3655                         error("ignoring invalid drop level: %u", level);
3656                         goto skip_walking;
3657                 }
3658                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3659                 if (wret < 0)
3660                         goto skip_walking;
3661                 btrfs_node_key(path.nodes[level], &found_key,
3662                                 path.slots[level]);
3663                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3664                                         sizeof(found_key)));
3665         }
3666
3667         while (1) {
3668                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3669                 if (wret < 0)
3670                         ret = wret;
3671                 if (wret != 0)
3672                         break;
3673
3674                 wret = walk_up_tree(root, &path, wc, &level);
3675                 if (wret < 0)
3676                         ret = wret;
3677                 if (wret != 0)
3678                         break;
3679         }
3680 skip_walking:
3681         btrfs_release_path(&path);
3682
3683         if (!cache_tree_empty(&corrupt_blocks)) {
3684                 struct cache_extent *cache;
3685                 struct btrfs_corrupt_block *corrupt;
3686
3687                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3688                        root->root_key.objectid);
3689                 cache = first_cache_extent(&corrupt_blocks);
3690                 while (cache) {
3691                         corrupt = container_of(cache,
3692                                                struct btrfs_corrupt_block,
3693                                                cache);
3694                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3695                                cache->start, corrupt->level,
3696                                corrupt->key.objectid, corrupt->key.type,
3697                                corrupt->key.offset);
3698                         cache = next_cache_extent(cache);
3699                 }
3700                 if (repair) {
3701                         printf("Try to repair the btree for root %llu\n",
3702                                root->root_key.objectid);
3703                         ret = repair_btree(root, &corrupt_blocks);
3704                         if (ret < 0)
3705                                 fprintf(stderr, "Failed to repair btree: %s\n",
3706                                         strerror(-ret));
3707                         if (!ret)
3708                                 printf("Btree for root %llu is fixed\n",
3709                                        root->root_key.objectid);
3710                 }
3711         }
3712
3713         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3714         if (err < 0)
3715                 ret = err;
3716
3717         if (root_node.current) {
3718                 root_node.current->checked = 1;
3719                 maybe_free_inode_rec(&root_node.inode_cache,
3720                                 root_node.current);
3721         }
3722
3723         err = check_inode_recs(root, &root_node.inode_cache);
3724         if (!ret)
3725                 ret = err;
3726
3727         free_corrupt_blocks_tree(&corrupt_blocks);
3728         root->fs_info->corrupt_blocks = NULL;
3729         free_orphan_data_extents(&root->orphan_data_extents);
3730         return ret;
3731 }
3732
3733 static int fs_root_objectid(u64 objectid)
3734 {
3735         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3736             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3737                 return 1;
3738         return is_fstree(objectid);
3739 }
3740
3741 static int check_fs_roots(struct btrfs_root *root,
3742                           struct cache_tree *root_cache)
3743 {
3744         struct btrfs_path path;
3745         struct btrfs_key key;
3746         struct walk_control wc;
3747         struct extent_buffer *leaf, *tree_node;
3748         struct btrfs_root *tmp_root;
3749         struct btrfs_root *tree_root = root->fs_info->tree_root;
3750         int ret;
3751         int err = 0;
3752
3753         if (ctx.progress_enabled) {
3754                 ctx.tp = TASK_FS_ROOTS;
3755                 task_start(ctx.info);
3756         }
3757
3758         /*
3759          * Just in case we made any changes to the extent tree that weren't
3760          * reflected into the free space cache yet.
3761          */
3762         if (repair)
3763                 reset_cached_block_groups(root->fs_info);
3764         memset(&wc, 0, sizeof(wc));
3765         cache_tree_init(&wc.shared);
3766         btrfs_init_path(&path);
3767
3768 again:
3769         key.offset = 0;
3770         key.objectid = 0;
3771         key.type = BTRFS_ROOT_ITEM_KEY;
3772         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3773         if (ret < 0) {
3774                 err = 1;
3775                 goto out;
3776         }
3777         tree_node = tree_root->node;
3778         while (1) {
3779                 if (tree_node != tree_root->node) {
3780                         free_root_recs_tree(root_cache);
3781                         btrfs_release_path(&path);
3782                         goto again;
3783                 }
3784                 leaf = path.nodes[0];
3785                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3786                         ret = btrfs_next_leaf(tree_root, &path);
3787                         if (ret) {
3788                                 if (ret < 0)
3789                                         err = 1;
3790                                 break;
3791                         }
3792                         leaf = path.nodes[0];
3793                 }
3794                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3795                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3796                     fs_root_objectid(key.objectid)) {
3797                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3798                                 tmp_root = btrfs_read_fs_root_no_cache(
3799                                                 root->fs_info, &key);
3800                         } else {
3801                                 key.offset = (u64)-1;
3802                                 tmp_root = btrfs_read_fs_root(
3803                                                 root->fs_info, &key);
3804                         }
3805                         if (IS_ERR(tmp_root)) {
3806                                 err = 1;
3807                                 goto next;
3808                         }
3809                         ret = check_fs_root(tmp_root, root_cache, &wc);
3810                         if (ret == -EAGAIN) {
3811                                 free_root_recs_tree(root_cache);
3812                                 btrfs_release_path(&path);
3813                                 goto again;
3814                         }
3815                         if (ret)
3816                                 err = 1;
3817                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3818                                 btrfs_free_fs_root(tmp_root);
3819                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3820                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3821                         process_root_ref(leaf, path.slots[0], &key,
3822                                          root_cache);
3823                 }
3824 next:
3825                 path.slots[0]++;
3826         }
3827 out:
3828         btrfs_release_path(&path);
3829         if (err)
3830                 free_extent_cache_tree(&wc.shared);
3831         if (!cache_tree_empty(&wc.shared))
3832                 fprintf(stderr, "warning line %d\n", __LINE__);
3833
3834         task_stop(ctx.info);
3835
3836         return err;
3837 }
3838
3839 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3840 {
3841         struct list_head *cur = rec->backrefs.next;
3842         struct extent_backref *back;
3843         struct tree_backref *tback;
3844         struct data_backref *dback;
3845         u64 found = 0;
3846         int err = 0;
3847
3848         while(cur != &rec->backrefs) {
3849                 back = to_extent_backref(cur);
3850                 cur = cur->next;
3851                 if (!back->found_extent_tree) {
3852                         err = 1;
3853                         if (!print_errs)
3854                                 goto out;
3855                         if (back->is_data) {
3856                                 dback = to_data_backref(back);
3857                                 fprintf(stderr, "Backref %llu %s %llu"
3858                                         " owner %llu offset %llu num_refs %lu"
3859                                         " not found in extent tree\n",
3860                                         (unsigned long long)rec->start,
3861                                         back->full_backref ?
3862                                         "parent" : "root",
3863                                         back->full_backref ?
3864                                         (unsigned long long)dback->parent:
3865                                         (unsigned long long)dback->root,
3866                                         (unsigned long long)dback->owner,
3867                                         (unsigned long long)dback->offset,
3868                                         (unsigned long)dback->num_refs);
3869                         } else {
3870                                 tback = to_tree_backref(back);
3871                                 fprintf(stderr, "Backref %llu parent %llu"
3872                                         " root %llu not found in extent tree\n",
3873                                         (unsigned long long)rec->start,
3874                                         (unsigned long long)tback->parent,
3875                                         (unsigned long long)tback->root);
3876                         }
3877                 }
3878                 if (!back->is_data && !back->found_ref) {
3879                         err = 1;
3880                         if (!print_errs)
3881                                 goto out;
3882                         tback = to_tree_backref(back);
3883                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3884                                 (unsigned long long)rec->start,
3885                                 back->full_backref ? "parent" : "root",
3886                                 back->full_backref ?
3887                                 (unsigned long long)tback->parent :
3888                                 (unsigned long long)tback->root, back);
3889                 }
3890                 if (back->is_data) {
3891                         dback = to_data_backref(back);
3892                         if (dback->found_ref != dback->num_refs) {
3893                                 err = 1;
3894                                 if (!print_errs)
3895                                         goto out;
3896                                 fprintf(stderr, "Incorrect local backref count"
3897                                         " on %llu %s %llu owner %llu"
3898                                         " offset %llu found %u wanted %u back %p\n",
3899                                         (unsigned long long)rec->start,
3900                                         back->full_backref ?
3901                                         "parent" : "root",
3902                                         back->full_backref ?
3903                                         (unsigned long long)dback->parent:
3904                                         (unsigned long long)dback->root,
3905                                         (unsigned long long)dback->owner,
3906                                         (unsigned long long)dback->offset,
3907                                         dback->found_ref, dback->num_refs, back);
3908                         }
3909                         if (dback->disk_bytenr != rec->start) {
3910                                 err = 1;
3911                                 if (!print_errs)
3912                                         goto out;
3913                                 fprintf(stderr, "Backref disk bytenr does not"
3914                                         " match extent record, bytenr=%llu, "
3915                                         "ref bytenr=%llu\n",
3916                                         (unsigned long long)rec->start,
3917                                         (unsigned long long)dback->disk_bytenr);
3918                         }
3919
3920                         if (dback->bytes != rec->nr) {
3921                                 err = 1;
3922                                 if (!print_errs)
3923                                         goto out;
3924                                 fprintf(stderr, "Backref bytes do not match "
3925                                         "extent backref, bytenr=%llu, ref "
3926                                         "bytes=%llu, backref bytes=%llu\n",
3927                                         (unsigned long long)rec->start,
3928                                         (unsigned long long)rec->nr,
3929                                         (unsigned long long)dback->bytes);
3930                         }
3931                 }
3932                 if (!back->is_data) {
3933                         found += 1;
3934                 } else {
3935                         dback = to_data_backref(back);
3936                         found += dback->found_ref;
3937                 }
3938         }
3939         if (found != rec->refs) {
3940                 err = 1;
3941                 if (!print_errs)
3942                         goto out;
3943                 fprintf(stderr, "Incorrect global backref count "
3944                         "on %llu found %llu wanted %llu\n",
3945                         (unsigned long long)rec->start,
3946                         (unsigned long long)found,
3947                         (unsigned long long)rec->refs);
3948         }
3949 out:
3950         return err;
3951 }
3952
3953 static int free_all_extent_backrefs(struct extent_record *rec)
3954 {
3955         struct extent_backref *back;
3956         struct list_head *cur;
3957         while (!list_empty(&rec->backrefs)) {
3958                 cur = rec->backrefs.next;
3959                 back = to_extent_backref(cur);
3960                 list_del(cur);
3961                 free(back);
3962         }
3963         return 0;
3964 }
3965
3966 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3967                                      struct cache_tree *extent_cache)
3968 {
3969         struct cache_extent *cache;
3970         struct extent_record *rec;
3971
3972         while (1) {
3973                 cache = first_cache_extent(extent_cache);
3974                 if (!cache)
3975                         break;
3976                 rec = container_of(cache, struct extent_record, cache);
3977                 remove_cache_extent(extent_cache, cache);
3978                 free_all_extent_backrefs(rec);
3979                 free(rec);
3980         }
3981 }
3982
3983 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3984                                  struct extent_record *rec)
3985 {
3986         if (rec->content_checked && rec->owner_ref_checked &&
3987             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3988             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3989             !rec->bad_full_backref && !rec->crossing_stripes &&
3990             !rec->wrong_chunk_type) {
3991                 remove_cache_extent(extent_cache, &rec->cache);
3992                 free_all_extent_backrefs(rec);
3993                 list_del_init(&rec->list);
3994                 free(rec);
3995         }
3996         return 0;
3997 }
3998
3999 static int check_owner_ref(struct btrfs_root *root,
4000                             struct extent_record *rec,
4001                             struct extent_buffer *buf)
4002 {
4003         struct extent_backref *node;
4004         struct tree_backref *back;
4005         struct btrfs_root *ref_root;
4006         struct btrfs_key key;
4007         struct btrfs_path path;
4008         struct extent_buffer *parent;
4009         int level;
4010         int found = 0;
4011         int ret;
4012
4013         list_for_each_entry(node, &rec->backrefs, list) {
4014                 if (node->is_data)
4015                         continue;
4016                 if (!node->found_ref)
4017                         continue;
4018                 if (node->full_backref)
4019                         continue;
4020                 back = to_tree_backref(node);
4021                 if (btrfs_header_owner(buf) == back->root)
4022                         return 0;
4023         }
4024         BUG_ON(rec->is_root);
4025
4026         /* try to find the block by search corresponding fs tree */
4027         key.objectid = btrfs_header_owner(buf);
4028         key.type = BTRFS_ROOT_ITEM_KEY;
4029         key.offset = (u64)-1;
4030
4031         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4032         if (IS_ERR(ref_root))
4033                 return 1;
4034
4035         level = btrfs_header_level(buf);
4036         if (level == 0)
4037                 btrfs_item_key_to_cpu(buf, &key, 0);
4038         else
4039                 btrfs_node_key_to_cpu(buf, &key, 0);
4040
4041         btrfs_init_path(&path);
4042         path.lowest_level = level + 1;
4043         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4044         if (ret < 0)
4045                 return 0;
4046
4047         parent = path.nodes[level + 1];
4048         if (parent && buf->start == btrfs_node_blockptr(parent,
4049                                                         path.slots[level + 1]))
4050                 found = 1;
4051
4052         btrfs_release_path(&path);
4053         return found ? 0 : 1;
4054 }
4055
4056 static int is_extent_tree_record(struct extent_record *rec)
4057 {
4058         struct list_head *cur = rec->backrefs.next;
4059         struct extent_backref *node;
4060         struct tree_backref *back;
4061         int is_extent = 0;
4062
4063         while(cur != &rec->backrefs) {
4064                 node = to_extent_backref(cur);
4065                 cur = cur->next;
4066                 if (node->is_data)
4067                         return 0;
4068                 back = to_tree_backref(node);
4069                 if (node->full_backref)
4070                         return 0;
4071                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4072                         is_extent = 1;
4073         }
4074         return is_extent;
4075 }
4076
4077
4078 static int record_bad_block_io(struct btrfs_fs_info *info,
4079                                struct cache_tree *extent_cache,
4080                                u64 start, u64 len)
4081 {
4082         struct extent_record *rec;
4083         struct cache_extent *cache;
4084         struct btrfs_key key;
4085
4086         cache = lookup_cache_extent(extent_cache, start, len);
4087         if (!cache)
4088                 return 0;
4089
4090         rec = container_of(cache, struct extent_record, cache);
4091         if (!is_extent_tree_record(rec))
4092                 return 0;
4093
4094         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4095         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4096 }
4097
4098 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4099                        struct extent_buffer *buf, int slot)
4100 {
4101         if (btrfs_header_level(buf)) {
4102                 struct btrfs_key_ptr ptr1, ptr2;
4103
4104                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4105                                    sizeof(struct btrfs_key_ptr));
4106                 read_extent_buffer(buf, &ptr2,
4107                                    btrfs_node_key_ptr_offset(slot + 1),
4108                                    sizeof(struct btrfs_key_ptr));
4109                 write_extent_buffer(buf, &ptr1,
4110                                     btrfs_node_key_ptr_offset(slot + 1),
4111                                     sizeof(struct btrfs_key_ptr));
4112                 write_extent_buffer(buf, &ptr2,
4113                                     btrfs_node_key_ptr_offset(slot),
4114                                     sizeof(struct btrfs_key_ptr));
4115                 if (slot == 0) {
4116                         struct btrfs_disk_key key;
4117                         btrfs_node_key(buf, &key, 0);
4118                         btrfs_fixup_low_keys(root, path, &key,
4119                                              btrfs_header_level(buf) + 1);
4120                 }
4121         } else {
4122                 struct btrfs_item *item1, *item2;
4123                 struct btrfs_key k1, k2;
4124                 char *item1_data, *item2_data;
4125                 u32 item1_offset, item2_offset, item1_size, item2_size;
4126
4127                 item1 = btrfs_item_nr(slot);
4128                 item2 = btrfs_item_nr(slot + 1);
4129                 btrfs_item_key_to_cpu(buf, &k1, slot);
4130                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4131                 item1_offset = btrfs_item_offset(buf, item1);
4132                 item2_offset = btrfs_item_offset(buf, item2);
4133                 item1_size = btrfs_item_size(buf, item1);
4134                 item2_size = btrfs_item_size(buf, item2);
4135
4136                 item1_data = malloc(item1_size);
4137                 if (!item1_data)
4138                         return -ENOMEM;
4139                 item2_data = malloc(item2_size);
4140                 if (!item2_data) {
4141                         free(item1_data);
4142                         return -ENOMEM;
4143                 }
4144
4145                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4146                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4147
4148                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4149                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4150                 free(item1_data);
4151                 free(item2_data);
4152
4153                 btrfs_set_item_offset(buf, item1, item2_offset);
4154                 btrfs_set_item_offset(buf, item2, item1_offset);
4155                 btrfs_set_item_size(buf, item1, item2_size);
4156                 btrfs_set_item_size(buf, item2, item1_size);
4157
4158                 path->slots[0] = slot;
4159                 btrfs_set_item_key_unsafe(root, path, &k2);
4160                 path->slots[0] = slot + 1;
4161                 btrfs_set_item_key_unsafe(root, path, &k1);
4162         }
4163         return 0;
4164 }
4165
4166 static int fix_key_order(struct btrfs_trans_handle *trans,
4167                          struct btrfs_root *root,
4168                          struct btrfs_path *path)
4169 {
4170         struct extent_buffer *buf;
4171         struct btrfs_key k1, k2;
4172         int i;
4173         int level = path->lowest_level;
4174         int ret = -EIO;
4175
4176         buf = path->nodes[level];
4177         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4178                 if (level) {
4179                         btrfs_node_key_to_cpu(buf, &k1, i);
4180                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4181                 } else {
4182                         btrfs_item_key_to_cpu(buf, &k1, i);
4183                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4184                 }
4185                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4186                         continue;
4187                 ret = swap_values(root, path, buf, i);
4188                 if (ret)
4189                         break;
4190                 btrfs_mark_buffer_dirty(buf);
4191                 i = 0;
4192         }
4193         return ret;
4194 }
4195
4196 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4197                              struct btrfs_root *root,
4198                              struct btrfs_path *path,
4199                              struct extent_buffer *buf, int slot)
4200 {
4201         struct btrfs_key key;
4202         int nritems = btrfs_header_nritems(buf);
4203
4204         btrfs_item_key_to_cpu(buf, &key, slot);
4205
4206         /* These are all the keys we can deal with missing. */
4207         if (key.type != BTRFS_DIR_INDEX_KEY &&
4208             key.type != BTRFS_EXTENT_ITEM_KEY &&
4209             key.type != BTRFS_METADATA_ITEM_KEY &&
4210             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4211             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4212                 return -1;
4213
4214         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4215                (unsigned long long)key.objectid, key.type,
4216                (unsigned long long)key.offset, slot, buf->start);
4217         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4218                               btrfs_item_nr_offset(slot + 1),
4219                               sizeof(struct btrfs_item) *
4220                               (nritems - slot - 1));
4221         btrfs_set_header_nritems(buf, nritems - 1);
4222         if (slot == 0) {
4223                 struct btrfs_disk_key disk_key;
4224
4225                 btrfs_item_key(buf, &disk_key, 0);
4226                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4227         }
4228         btrfs_mark_buffer_dirty(buf);
4229         return 0;
4230 }
4231
4232 static int fix_item_offset(struct btrfs_trans_handle *trans,
4233                            struct btrfs_root *root,
4234                            struct btrfs_path *path)
4235 {
4236         struct extent_buffer *buf;
4237         int i;
4238         int ret = 0;
4239
4240         /* We should only get this for leaves */
4241         BUG_ON(path->lowest_level);
4242         buf = path->nodes[0];
4243 again:
4244         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4245                 unsigned int shift = 0, offset;
4246
4247                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4248                     BTRFS_LEAF_DATA_SIZE(root)) {
4249                         if (btrfs_item_end_nr(buf, i) >
4250                             BTRFS_LEAF_DATA_SIZE(root)) {
4251                                 ret = delete_bogus_item(trans, root, path,
4252                                                         buf, i);
4253                                 if (!ret)
4254                                         goto again;
4255                                 fprintf(stderr, "item is off the end of the "
4256                                         "leaf, can't fix\n");
4257                                 ret = -EIO;
4258                                 break;
4259                         }
4260                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4261                                 btrfs_item_end_nr(buf, i);
4262                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4263                            btrfs_item_offset_nr(buf, i - 1)) {
4264                         if (btrfs_item_end_nr(buf, i) >
4265                             btrfs_item_offset_nr(buf, i - 1)) {
4266                                 ret = delete_bogus_item(trans, root, path,
4267                                                         buf, i);
4268                                 if (!ret)
4269                                         goto again;
4270                                 fprintf(stderr, "items overlap, can't fix\n");
4271                                 ret = -EIO;
4272                                 break;
4273                         }
4274                         shift = btrfs_item_offset_nr(buf, i - 1) -
4275                                 btrfs_item_end_nr(buf, i);
4276                 }
4277                 if (!shift)
4278                         continue;
4279
4280                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4281                        i, shift, (unsigned long long)buf->start);
4282                 offset = btrfs_item_offset_nr(buf, i);
4283                 memmove_extent_buffer(buf,
4284                                       btrfs_leaf_data(buf) + offset + shift,
4285                                       btrfs_leaf_data(buf) + offset,
4286                                       btrfs_item_size_nr(buf, i));
4287                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4288                                       offset + shift);
4289                 btrfs_mark_buffer_dirty(buf);
4290         }
4291
4292         /*
4293          * We may have moved things, in which case we want to exit so we don't
4294          * write those changes out.  Once we have proper abort functionality in
4295          * progs this can be changed to something nicer.
4296          */
4297         BUG_ON(ret);
4298         return ret;
4299 }
4300
4301 /*
4302  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4303  * then just return -EIO.
4304  */
4305 static int try_to_fix_bad_block(struct btrfs_root *root,
4306                                 struct extent_buffer *buf,
4307                                 enum btrfs_tree_block_status status)
4308 {
4309         struct btrfs_trans_handle *trans;
4310         struct ulist *roots;
4311         struct ulist_node *node;
4312         struct btrfs_root *search_root;
4313         struct btrfs_path *path;
4314         struct ulist_iterator iter;
4315         struct btrfs_key root_key, key;
4316         int ret;
4317
4318         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4319             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4320                 return -EIO;
4321
4322         path = btrfs_alloc_path();
4323         if (!path)
4324                 return -EIO;
4325
4326         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4327                                    0, &roots);
4328         if (ret) {
4329                 btrfs_free_path(path);
4330                 return -EIO;
4331         }
4332
4333         ULIST_ITER_INIT(&iter);
4334         while ((node = ulist_next(roots, &iter))) {
4335                 root_key.objectid = node->val;
4336                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4337                 root_key.offset = (u64)-1;
4338
4339                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4340                 if (IS_ERR(root)) {
4341                         ret = -EIO;
4342                         break;
4343                 }
4344
4345
4346                 trans = btrfs_start_transaction(search_root, 0);
4347                 if (IS_ERR(trans)) {
4348                         ret = PTR_ERR(trans);
4349                         break;
4350                 }
4351
4352                 path->lowest_level = btrfs_header_level(buf);
4353                 path->skip_check_block = 1;
4354                 if (path->lowest_level)
4355                         btrfs_node_key_to_cpu(buf, &key, 0);
4356                 else
4357                         btrfs_item_key_to_cpu(buf, &key, 0);
4358                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4359                 if (ret) {
4360                         ret = -EIO;
4361                         btrfs_commit_transaction(trans, search_root);
4362                         break;
4363                 }
4364                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4365                         ret = fix_key_order(trans, search_root, path);
4366                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4367                         ret = fix_item_offset(trans, search_root, path);
4368                 if (ret) {
4369                         btrfs_commit_transaction(trans, search_root);
4370                         break;
4371                 }
4372                 btrfs_release_path(path);
4373                 btrfs_commit_transaction(trans, search_root);
4374         }
4375         ulist_free(roots);
4376         btrfs_free_path(path);
4377         return ret;
4378 }
4379
4380 static int check_block(struct btrfs_root *root,
4381                        struct cache_tree *extent_cache,
4382                        struct extent_buffer *buf, u64 flags)
4383 {
4384         struct extent_record *rec;
4385         struct cache_extent *cache;
4386         struct btrfs_key key;
4387         enum btrfs_tree_block_status status;
4388         int ret = 0;
4389         int level;
4390
4391         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4392         if (!cache)
4393                 return 1;
4394         rec = container_of(cache, struct extent_record, cache);
4395         rec->generation = btrfs_header_generation(buf);
4396
4397         level = btrfs_header_level(buf);
4398         if (btrfs_header_nritems(buf) > 0) {
4399
4400                 if (level == 0)
4401                         btrfs_item_key_to_cpu(buf, &key, 0);
4402                 else
4403                         btrfs_node_key_to_cpu(buf, &key, 0);
4404
4405                 rec->info_objectid = key.objectid;
4406         }
4407         rec->info_level = level;
4408
4409         if (btrfs_is_leaf(buf))
4410                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4411         else
4412                 status = btrfs_check_node(root, &rec->parent_key, buf);
4413
4414         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4415                 if (repair)
4416                         status = try_to_fix_bad_block(root, buf, status);
4417                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4418                         ret = -EIO;
4419                         fprintf(stderr, "bad block %llu\n",
4420                                 (unsigned long long)buf->start);
4421                 } else {
4422                         /*
4423                          * Signal to callers we need to start the scan over
4424                          * again since we'll have cowed blocks.
4425                          */
4426                         ret = -EAGAIN;
4427                 }
4428         } else {
4429                 rec->content_checked = 1;
4430                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4431                         rec->owner_ref_checked = 1;
4432                 else {
4433                         ret = check_owner_ref(root, rec, buf);
4434                         if (!ret)
4435                                 rec->owner_ref_checked = 1;
4436                 }
4437         }
4438         if (!ret)
4439                 maybe_free_extent_rec(extent_cache, rec);
4440         return ret;
4441 }
4442
4443 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4444                                                 u64 parent, u64 root)
4445 {
4446         struct list_head *cur = rec->backrefs.next;
4447         struct extent_backref *node;
4448         struct tree_backref *back;
4449
4450         while(cur != &rec->backrefs) {
4451                 node = to_extent_backref(cur);
4452                 cur = cur->next;
4453                 if (node->is_data)
4454                         continue;
4455                 back = to_tree_backref(node);
4456                 if (parent > 0) {
4457                         if (!node->full_backref)
4458                                 continue;
4459                         if (parent == back->parent)
4460                                 return back;
4461                 } else {
4462                         if (node->full_backref)
4463                                 continue;
4464                         if (back->root == root)
4465                                 return back;
4466                 }
4467         }
4468         return NULL;
4469 }
4470
4471 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4472                                                 u64 parent, u64 root)
4473 {
4474         struct tree_backref *ref = malloc(sizeof(*ref));
4475
4476         if (!ref)
4477                 return NULL;
4478         memset(&ref->node, 0, sizeof(ref->node));
4479         if (parent > 0) {
4480                 ref->parent = parent;
4481                 ref->node.full_backref = 1;
4482         } else {
4483                 ref->root = root;
4484                 ref->node.full_backref = 0;
4485         }
4486         list_add_tail(&ref->node.list, &rec->backrefs);
4487
4488         return ref;
4489 }
4490
4491 static struct data_backref *find_data_backref(struct extent_record *rec,
4492                                                 u64 parent, u64 root,
4493                                                 u64 owner, u64 offset,
4494                                                 int found_ref,
4495                                                 u64 disk_bytenr, u64 bytes)
4496 {
4497         struct list_head *cur = rec->backrefs.next;
4498         struct extent_backref *node;
4499         struct data_backref *back;
4500
4501         while(cur != &rec->backrefs) {
4502                 node = to_extent_backref(cur);
4503                 cur = cur->next;
4504                 if (!node->is_data)
4505                         continue;
4506                 back = to_data_backref(node);
4507                 if (parent > 0) {
4508                         if (!node->full_backref)
4509                                 continue;
4510                         if (parent == back->parent)
4511                                 return back;
4512                 } else {
4513                         if (node->full_backref)
4514                                 continue;
4515                         if (back->root == root && back->owner == owner &&
4516                             back->offset == offset) {
4517                                 if (found_ref && node->found_ref &&
4518                                     (back->bytes != bytes ||
4519                                     back->disk_bytenr != disk_bytenr))
4520                                         continue;
4521                                 return back;
4522                         }
4523                 }
4524         }
4525         return NULL;
4526 }
4527
4528 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root,
4530                                                 u64 owner, u64 offset,
4531                                                 u64 max_size)
4532 {
4533         struct data_backref *ref = malloc(sizeof(*ref));
4534
4535         if (!ref)
4536                 return NULL;
4537         memset(&ref->node, 0, sizeof(ref->node));
4538         ref->node.is_data = 1;
4539
4540         if (parent > 0) {
4541                 ref->parent = parent;
4542                 ref->owner = 0;
4543                 ref->offset = 0;
4544                 ref->node.full_backref = 1;
4545         } else {
4546                 ref->root = root;
4547                 ref->owner = owner;
4548                 ref->offset = offset;
4549                 ref->node.full_backref = 0;
4550         }
4551         ref->bytes = max_size;
4552         ref->found_ref = 0;
4553         ref->num_refs = 0;
4554         list_add_tail(&ref->node.list, &rec->backrefs);
4555         if (max_size > rec->max_size)
4556                 rec->max_size = max_size;
4557         return ref;
4558 }
4559
4560 /* Check if the type of extent matches with its chunk */
4561 static void check_extent_type(struct extent_record *rec)
4562 {
4563         struct btrfs_block_group_cache *bg_cache;
4564
4565         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4566         if (!bg_cache)
4567                 return;
4568
4569         /* data extent, check chunk directly*/
4570         if (!rec->metadata) {
4571                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4572                         rec->wrong_chunk_type = 1;
4573                 return;
4574         }
4575
4576         /* metadata extent, check the obvious case first */
4577         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4578                                  BTRFS_BLOCK_GROUP_METADATA))) {
4579                 rec->wrong_chunk_type = 1;
4580                 return;
4581         }
4582
4583         /*
4584          * Check SYSTEM extent, as it's also marked as metadata, we can only
4585          * make sure it's a SYSTEM extent by its backref
4586          */
4587         if (!list_empty(&rec->backrefs)) {
4588                 struct extent_backref *node;
4589                 struct tree_backref *tback;
4590                 u64 bg_type;
4591
4592                 node = to_extent_backref(rec->backrefs.next);
4593                 if (node->is_data) {
4594                         /* tree block shouldn't have data backref */
4595                         rec->wrong_chunk_type = 1;
4596                         return;
4597                 }
4598                 tback = container_of(node, struct tree_backref, node);
4599
4600                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4601                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4602                 else
4603                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4604                 if (!(bg_cache->flags & bg_type))
4605                         rec->wrong_chunk_type = 1;
4606         }
4607 }
4608
4609 /*
4610  * Allocate a new extent record, fill default values from @tmpl and insert int
4611  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4612  * the cache, otherwise it fails.
4613  */
4614 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4615                 struct extent_record *tmpl)
4616 {
4617         struct extent_record *rec;
4618         int ret = 0;
4619
4620         rec = malloc(sizeof(*rec));
4621         if (!rec)
4622                 return -ENOMEM;
4623         rec->start = tmpl->start;
4624         rec->max_size = tmpl->max_size;
4625         rec->nr = max(tmpl->nr, tmpl->max_size);
4626         rec->found_rec = tmpl->found_rec;
4627         rec->content_checked = tmpl->content_checked;
4628         rec->owner_ref_checked = tmpl->owner_ref_checked;
4629         rec->num_duplicates = 0;
4630         rec->metadata = tmpl->metadata;
4631         rec->flag_block_full_backref = FLAG_UNSET;
4632         rec->bad_full_backref = 0;
4633         rec->crossing_stripes = 0;
4634         rec->wrong_chunk_type = 0;
4635         rec->is_root = tmpl->is_root;
4636         rec->refs = tmpl->refs;
4637         rec->extent_item_refs = tmpl->extent_item_refs;
4638         rec->parent_generation = tmpl->parent_generation;
4639         INIT_LIST_HEAD(&rec->backrefs);
4640         INIT_LIST_HEAD(&rec->dups);
4641         INIT_LIST_HEAD(&rec->list);
4642         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4643         rec->cache.start = tmpl->start;
4644         rec->cache.size = tmpl->nr;
4645         ret = insert_cache_extent(extent_cache, &rec->cache);
4646         BUG_ON(ret);
4647         bytes_used += rec->nr;
4648
4649         if (tmpl->metadata)
4650                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4651                                 global_info->tree_root->nodesize);
4652         check_extent_type(rec);
4653         return ret;
4654 }
4655
4656 /*
4657  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4658  * some are hints:
4659  * - refs              - if found, increase refs
4660  * - is_root           - if found, set
4661  * - content_checked   - if found, set
4662  * - owner_ref_checked - if found, set
4663  *
4664  * If not found, create a new one, initialize and insert.
4665  */
4666 static int add_extent_rec(struct cache_tree *extent_cache,
4667                 struct extent_record *tmpl)
4668 {
4669         struct extent_record *rec;
4670         struct cache_extent *cache;
4671         int ret = 0;
4672         int dup = 0;
4673
4674         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4675         if (cache) {
4676                 rec = container_of(cache, struct extent_record, cache);
4677                 if (tmpl->refs)
4678                         rec->refs++;
4679                 if (rec->nr == 1)
4680                         rec->nr = max(tmpl->nr, tmpl->max_size);
4681
4682                 /*
4683                  * We need to make sure to reset nr to whatever the extent
4684                  * record says was the real size, this way we can compare it to
4685                  * the backrefs.
4686                  */
4687                 if (tmpl->found_rec) {
4688                         if (tmpl->start != rec->start || rec->found_rec) {
4689                                 struct extent_record *tmp;
4690
4691                                 dup = 1;
4692                                 if (list_empty(&rec->list))
4693                                         list_add_tail(&rec->list,
4694                                                       &duplicate_extents);
4695
4696                                 /*
4697                                  * We have to do this song and dance in case we
4698                                  * find an extent record that falls inside of
4699                                  * our current extent record but does not have
4700                                  * the same objectid.
4701                                  */
4702                                 tmp = malloc(sizeof(*tmp));
4703                                 if (!tmp)
4704                                         return -ENOMEM;
4705                                 tmp->start = tmpl->start;
4706                                 tmp->max_size = tmpl->max_size;
4707                                 tmp->nr = tmpl->nr;
4708                                 tmp->found_rec = 1;
4709                                 tmp->metadata = tmpl->metadata;
4710                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4711                                 INIT_LIST_HEAD(&tmp->list);
4712                                 list_add_tail(&tmp->list, &rec->dups);
4713                                 rec->num_duplicates++;
4714                         } else {
4715                                 rec->nr = tmpl->nr;
4716                                 rec->found_rec = 1;
4717                         }
4718                 }
4719
4720                 if (tmpl->extent_item_refs && !dup) {
4721                         if (rec->extent_item_refs) {
4722                                 fprintf(stderr, "block %llu rec "
4723                                         "extent_item_refs %llu, passed %llu\n",
4724                                         (unsigned long long)tmpl->start,
4725                                         (unsigned long long)
4726                                                         rec->extent_item_refs,
4727                                         (unsigned long long)tmpl->extent_item_refs);
4728                         }
4729                         rec->extent_item_refs = tmpl->extent_item_refs;
4730                 }
4731                 if (tmpl->is_root)
4732                         rec->is_root = 1;
4733                 if (tmpl->content_checked)
4734                         rec->content_checked = 1;
4735                 if (tmpl->owner_ref_checked)
4736                         rec->owner_ref_checked = 1;
4737                 memcpy(&rec->parent_key, &tmpl->parent_key,
4738                                 sizeof(tmpl->parent_key));
4739                 if (tmpl->parent_generation)
4740                         rec->parent_generation = tmpl->parent_generation;
4741                 if (rec->max_size < tmpl->max_size)
4742                         rec->max_size = tmpl->max_size;
4743
4744                 /*
4745                  * A metadata extent can't cross stripe_len boundary, otherwise
4746                  * kernel scrub won't be able to handle it.
4747                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4748                  * it.
4749                  */
4750                 if (tmpl->metadata)
4751                         rec->crossing_stripes = check_crossing_stripes(
4752                                 rec->start, global_info->tree_root->nodesize);
4753                 check_extent_type(rec);
4754                 maybe_free_extent_rec(extent_cache, rec);
4755                 return ret;
4756         }
4757
4758         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4759
4760         return ret;
4761 }
4762
4763 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4764                             u64 parent, u64 root, int found_ref)
4765 {
4766         struct extent_record *rec;
4767         struct tree_backref *back;
4768         struct cache_extent *cache;
4769
4770         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4771         if (!cache) {
4772                 struct extent_record tmpl;
4773
4774                 memset(&tmpl, 0, sizeof(tmpl));
4775                 tmpl.start = bytenr;
4776                 tmpl.nr = 1;
4777                 tmpl.metadata = 1;
4778
4779                 add_extent_rec_nolookup(extent_cache, &tmpl);
4780
4781                 /* really a bug in cache_extent implement now */
4782                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4783                 if (!cache)
4784                         return -ENOENT;
4785         }
4786
4787         rec = container_of(cache, struct extent_record, cache);
4788         if (rec->start != bytenr) {
4789                 /*
4790                  * Several cause, from unaligned bytenr to over lapping extents
4791                  */
4792                 return -EEXIST;
4793         }
4794
4795         back = find_tree_backref(rec, parent, root);
4796         if (!back) {
4797                 back = alloc_tree_backref(rec, parent, root);
4798                 if (!back)
4799                         return -ENOMEM;
4800         }
4801
4802         if (found_ref) {
4803                 if (back->node.found_ref) {
4804                         fprintf(stderr, "Extent back ref already exists "
4805                                 "for %llu parent %llu root %llu \n",
4806                                 (unsigned long long)bytenr,
4807                                 (unsigned long long)parent,
4808                                 (unsigned long long)root);
4809                 }
4810                 back->node.found_ref = 1;
4811         } else {
4812                 if (back->node.found_extent_tree) {
4813                         fprintf(stderr, "Extent back ref already exists "
4814                                 "for %llu parent %llu root %llu \n",
4815                                 (unsigned long long)bytenr,
4816                                 (unsigned long long)parent,
4817                                 (unsigned long long)root);
4818                 }
4819                 back->node.found_extent_tree = 1;
4820         }
4821         check_extent_type(rec);
4822         maybe_free_extent_rec(extent_cache, rec);
4823         return 0;
4824 }
4825
4826 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4827                             u64 parent, u64 root, u64 owner, u64 offset,
4828                             u32 num_refs, int found_ref, u64 max_size)
4829 {
4830         struct extent_record *rec;
4831         struct data_backref *back;
4832         struct cache_extent *cache;
4833
4834         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4835         if (!cache) {
4836                 struct extent_record tmpl;
4837
4838                 memset(&tmpl, 0, sizeof(tmpl));
4839                 tmpl.start = bytenr;
4840                 tmpl.nr = 1;
4841                 tmpl.max_size = max_size;
4842
4843                 add_extent_rec_nolookup(extent_cache, &tmpl);
4844
4845                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4846                 if (!cache)
4847                         abort();
4848         }
4849
4850         rec = container_of(cache, struct extent_record, cache);
4851         if (rec->max_size < max_size)
4852                 rec->max_size = max_size;
4853
4854         /*
4855          * If found_ref is set then max_size is the real size and must match the
4856          * existing refs.  So if we have already found a ref then we need to
4857          * make sure that this ref matches the existing one, otherwise we need
4858          * to add a new backref so we can notice that the backrefs don't match
4859          * and we need to figure out who is telling the truth.  This is to
4860          * account for that awful fsync bug I introduced where we'd end up with
4861          * a btrfs_file_extent_item that would have its length include multiple
4862          * prealloc extents or point inside of a prealloc extent.
4863          */
4864         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4865                                  bytenr, max_size);
4866         if (!back) {
4867                 back = alloc_data_backref(rec, parent, root, owner, offset,
4868                                           max_size);
4869                 BUG_ON(!back);
4870         }
4871
4872         if (found_ref) {
4873                 BUG_ON(num_refs != 1);
4874                 if (back->node.found_ref)
4875                         BUG_ON(back->bytes != max_size);
4876                 back->node.found_ref = 1;
4877                 back->found_ref += 1;
4878                 back->bytes = max_size;
4879                 back->disk_bytenr = bytenr;
4880                 rec->refs += 1;
4881                 rec->content_checked = 1;
4882                 rec->owner_ref_checked = 1;
4883         } else {
4884                 if (back->node.found_extent_tree) {
4885                         fprintf(stderr, "Extent back ref already exists "
4886                                 "for %llu parent %llu root %llu "
4887                                 "owner %llu offset %llu num_refs %lu\n",
4888                                 (unsigned long long)bytenr,
4889                                 (unsigned long long)parent,
4890                                 (unsigned long long)root,
4891                                 (unsigned long long)owner,
4892                                 (unsigned long long)offset,
4893                                 (unsigned long)num_refs);
4894                 }
4895                 back->num_refs = num_refs;
4896                 back->node.found_extent_tree = 1;
4897         }
4898         maybe_free_extent_rec(extent_cache, rec);
4899         return 0;
4900 }
4901
4902 static int add_pending(struct cache_tree *pending,
4903                        struct cache_tree *seen, u64 bytenr, u32 size)
4904 {
4905         int ret;
4906         ret = add_cache_extent(seen, bytenr, size);
4907         if (ret)
4908                 return ret;
4909         add_cache_extent(pending, bytenr, size);
4910         return 0;
4911 }
4912
4913 static int pick_next_pending(struct cache_tree *pending,
4914                         struct cache_tree *reada,
4915                         struct cache_tree *nodes,
4916                         u64 last, struct block_info *bits, int bits_nr,
4917                         int *reada_bits)
4918 {
4919         unsigned long node_start = last;
4920         struct cache_extent *cache;
4921         int ret;
4922
4923         cache = search_cache_extent(reada, 0);
4924         if (cache) {
4925                 bits[0].start = cache->start;
4926                 bits[0].size = cache->size;
4927                 *reada_bits = 1;
4928                 return 1;
4929         }
4930         *reada_bits = 0;
4931         if (node_start > 32768)
4932                 node_start -= 32768;
4933
4934         cache = search_cache_extent(nodes, node_start);
4935         if (!cache)
4936                 cache = search_cache_extent(nodes, 0);
4937
4938         if (!cache) {
4939                  cache = search_cache_extent(pending, 0);
4940                  if (!cache)
4941                          return 0;
4942                  ret = 0;
4943                  do {
4944                          bits[ret].start = cache->start;
4945                          bits[ret].size = cache->size;
4946                          cache = next_cache_extent(cache);
4947                          ret++;
4948                  } while (cache && ret < bits_nr);
4949                  return ret;
4950         }
4951
4952         ret = 0;
4953         do {
4954                 bits[ret].start = cache->start;
4955                 bits[ret].size = cache->size;
4956                 cache = next_cache_extent(cache);
4957                 ret++;
4958         } while (cache && ret < bits_nr);
4959
4960         if (bits_nr - ret > 8) {
4961                 u64 lookup = bits[0].start + bits[0].size;
4962                 struct cache_extent *next;
4963                 next = search_cache_extent(pending, lookup);
4964                 while(next) {
4965                         if (next->start - lookup > 32768)
4966                                 break;
4967                         bits[ret].start = next->start;
4968                         bits[ret].size = next->size;
4969                         lookup = next->start + next->size;
4970                         ret++;
4971                         if (ret == bits_nr)
4972                                 break;
4973                         next = next_cache_extent(next);
4974                         if (!next)
4975                                 break;
4976                 }
4977         }
4978         return ret;
4979 }
4980
4981 static void free_chunk_record(struct cache_extent *cache)
4982 {
4983         struct chunk_record *rec;
4984
4985         rec = container_of(cache, struct chunk_record, cache);
4986         list_del_init(&rec->list);
4987         list_del_init(&rec->dextents);
4988         free(rec);
4989 }
4990
4991 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4992 {
4993         cache_tree_free_extents(chunk_cache, free_chunk_record);
4994 }
4995
4996 static void free_device_record(struct rb_node *node)
4997 {
4998         struct device_record *rec;
4999
5000         rec = container_of(node, struct device_record, node);
5001         free(rec);
5002 }
5003
5004 FREE_RB_BASED_TREE(device_cache, free_device_record);
5005
5006 int insert_block_group_record(struct block_group_tree *tree,
5007                               struct block_group_record *bg_rec)
5008 {
5009         int ret;
5010
5011         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5012         if (ret)
5013                 return ret;
5014
5015         list_add_tail(&bg_rec->list, &tree->block_groups);
5016         return 0;
5017 }
5018
5019 static void free_block_group_record(struct cache_extent *cache)
5020 {
5021         struct block_group_record *rec;
5022
5023         rec = container_of(cache, struct block_group_record, cache);
5024         list_del_init(&rec->list);
5025         free(rec);
5026 }
5027
5028 void free_block_group_tree(struct block_group_tree *tree)
5029 {
5030         cache_tree_free_extents(&tree->tree, free_block_group_record);
5031 }
5032
5033 int insert_device_extent_record(struct device_extent_tree *tree,
5034                                 struct device_extent_record *de_rec)
5035 {
5036         int ret;
5037
5038         /*
5039          * Device extent is a bit different from the other extents, because
5040          * the extents which belong to the different devices may have the
5041          * same start and size, so we need use the special extent cache
5042          * search/insert functions.
5043          */
5044         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5045         if (ret)
5046                 return ret;
5047
5048         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5049         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5050         return 0;
5051 }
5052
5053 static void free_device_extent_record(struct cache_extent *cache)
5054 {
5055         struct device_extent_record *rec;
5056
5057         rec = container_of(cache, struct device_extent_record, cache);
5058         if (!list_empty(&rec->chunk_list))
5059                 list_del_init(&rec->chunk_list);
5060         if (!list_empty(&rec->device_list))
5061                 list_del_init(&rec->device_list);
5062         free(rec);
5063 }
5064
5065 void free_device_extent_tree(struct device_extent_tree *tree)
5066 {
5067         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5068 }
5069
5070 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5071 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5072                                  struct extent_buffer *leaf, int slot)
5073 {
5074         struct btrfs_extent_ref_v0 *ref0;
5075         struct btrfs_key key;
5076         int ret;
5077
5078         btrfs_item_key_to_cpu(leaf, &key, slot);
5079         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5080         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5081                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5082                                 0, 0);
5083         } else {
5084                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5085                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5086         }
5087         return ret;
5088 }
5089 #endif
5090
5091 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5092                                             struct btrfs_key *key,
5093                                             int slot)
5094 {
5095         struct btrfs_chunk *ptr;
5096         struct chunk_record *rec;
5097         int num_stripes, i;
5098
5099         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5100         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5101
5102         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5103         if (!rec) {
5104                 fprintf(stderr, "memory allocation failed\n");
5105                 exit(-1);
5106         }
5107
5108         INIT_LIST_HEAD(&rec->list);
5109         INIT_LIST_HEAD(&rec->dextents);
5110         rec->bg_rec = NULL;
5111
5112         rec->cache.start = key->offset;
5113         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5114
5115         rec->generation = btrfs_header_generation(leaf);
5116
5117         rec->objectid = key->objectid;
5118         rec->type = key->type;
5119         rec->offset = key->offset;
5120
5121         rec->length = rec->cache.size;
5122         rec->owner = btrfs_chunk_owner(leaf, ptr);
5123         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5124         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5125         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5126         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5127         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5128         rec->num_stripes = num_stripes;
5129         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5130
5131         for (i = 0; i < rec->num_stripes; ++i) {
5132                 rec->stripes[i].devid =
5133                         btrfs_stripe_devid_nr(leaf, ptr, i);
5134                 rec->stripes[i].offset =
5135                         btrfs_stripe_offset_nr(leaf, ptr, i);
5136                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5137                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5138                                 BTRFS_UUID_SIZE);
5139         }
5140
5141         return rec;
5142 }
5143
5144 static int process_chunk_item(struct cache_tree *chunk_cache,
5145                               struct btrfs_key *key, struct extent_buffer *eb,
5146                               int slot)
5147 {
5148         struct chunk_record *rec;
5149         struct btrfs_chunk *chunk;
5150         int ret = 0;
5151
5152         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5153         /*
5154          * Do extra check for this chunk item,
5155          *
5156          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5157          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5158          * and owner<->key_type check.
5159          */
5160         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5161                                       key->offset);
5162         if (ret < 0) {
5163                 error("chunk(%llu, %llu) is not valid, ignore it",
5164                       key->offset, btrfs_chunk_length(eb, chunk));
5165                 return 0;
5166         }
5167         rec = btrfs_new_chunk_record(eb, key, slot);
5168         ret = insert_cache_extent(chunk_cache, &rec->cache);
5169         if (ret) {
5170                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5171                         rec->offset, rec->length);
5172                 free(rec);
5173         }
5174
5175         return ret;
5176 }
5177
5178 static int process_device_item(struct rb_root *dev_cache,
5179                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5180 {
5181         struct btrfs_dev_item *ptr;
5182         struct device_record *rec;
5183         int ret = 0;
5184
5185         ptr = btrfs_item_ptr(eb,
5186                 slot, struct btrfs_dev_item);
5187
5188         rec = malloc(sizeof(*rec));
5189         if (!rec) {
5190                 fprintf(stderr, "memory allocation failed\n");
5191                 return -ENOMEM;
5192         }
5193
5194         rec->devid = key->offset;
5195         rec->generation = btrfs_header_generation(eb);
5196
5197         rec->objectid = key->objectid;
5198         rec->type = key->type;
5199         rec->offset = key->offset;
5200
5201         rec->devid = btrfs_device_id(eb, ptr);
5202         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5203         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5204
5205         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5206         if (ret) {
5207                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5208                 free(rec);
5209         }
5210
5211         return ret;
5212 }
5213
5214 struct block_group_record *
5215 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5216                              int slot)
5217 {
5218         struct btrfs_block_group_item *ptr;
5219         struct block_group_record *rec;
5220
5221         rec = calloc(1, sizeof(*rec));
5222         if (!rec) {
5223                 fprintf(stderr, "memory allocation failed\n");
5224                 exit(-1);
5225         }
5226
5227         rec->cache.start = key->objectid;
5228         rec->cache.size = key->offset;
5229
5230         rec->generation = btrfs_header_generation(leaf);
5231
5232         rec->objectid = key->objectid;
5233         rec->type = key->type;
5234         rec->offset = key->offset;
5235
5236         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5237         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5238
5239         INIT_LIST_HEAD(&rec->list);
5240
5241         return rec;
5242 }
5243
5244 static int process_block_group_item(struct block_group_tree *block_group_cache,
5245                                     struct btrfs_key *key,
5246                                     struct extent_buffer *eb, int slot)
5247 {
5248         struct block_group_record *rec;
5249         int ret = 0;
5250
5251         rec = btrfs_new_block_group_record(eb, key, slot);
5252         ret = insert_block_group_record(block_group_cache, rec);
5253         if (ret) {
5254                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5255                         rec->objectid, rec->offset);
5256                 free(rec);
5257         }
5258
5259         return ret;
5260 }
5261
5262 struct device_extent_record *
5263 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5264                                struct btrfs_key *key, int slot)
5265 {
5266         struct device_extent_record *rec;
5267         struct btrfs_dev_extent *ptr;
5268
5269         rec = calloc(1, sizeof(*rec));
5270         if (!rec) {
5271                 fprintf(stderr, "memory allocation failed\n");
5272                 exit(-1);
5273         }
5274
5275         rec->cache.objectid = key->objectid;
5276         rec->cache.start = key->offset;
5277
5278         rec->generation = btrfs_header_generation(leaf);
5279
5280         rec->objectid = key->objectid;
5281         rec->type = key->type;
5282         rec->offset = key->offset;
5283
5284         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5285         rec->chunk_objecteid =
5286                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5287         rec->chunk_offset =
5288                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5289         rec->length = btrfs_dev_extent_length(leaf, ptr);
5290         rec->cache.size = rec->length;
5291
5292         INIT_LIST_HEAD(&rec->chunk_list);
5293         INIT_LIST_HEAD(&rec->device_list);
5294
5295         return rec;
5296 }
5297
5298 static int
5299 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5300                            struct btrfs_key *key, struct extent_buffer *eb,
5301                            int slot)
5302 {
5303         struct device_extent_record *rec;
5304         int ret;
5305
5306         rec = btrfs_new_device_extent_record(eb, key, slot);
5307         ret = insert_device_extent_record(dev_extent_cache, rec);
5308         if (ret) {
5309                 fprintf(stderr,
5310                         "Device extent[%llu, %llu, %llu] existed.\n",
5311                         rec->objectid, rec->offset, rec->length);
5312                 free(rec);
5313         }
5314
5315         return ret;
5316 }
5317
5318 static int process_extent_item(struct btrfs_root *root,
5319                                struct cache_tree *extent_cache,
5320                                struct extent_buffer *eb, int slot)
5321 {
5322         struct btrfs_extent_item *ei;
5323         struct btrfs_extent_inline_ref *iref;
5324         struct btrfs_extent_data_ref *dref;
5325         struct btrfs_shared_data_ref *sref;
5326         struct btrfs_key key;
5327         struct extent_record tmpl;
5328         unsigned long end;
5329         unsigned long ptr;
5330         int ret;
5331         int type;
5332         u32 item_size = btrfs_item_size_nr(eb, slot);
5333         u64 refs = 0;
5334         u64 offset;
5335         u64 num_bytes;
5336         int metadata = 0;
5337
5338         btrfs_item_key_to_cpu(eb, &key, slot);
5339
5340         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5341                 metadata = 1;
5342                 num_bytes = root->nodesize;
5343         } else {
5344                 num_bytes = key.offset;
5345         }
5346
5347         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5348                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5349                       key.objectid, root->sectorsize);
5350                 return -EIO;
5351         }
5352         if (item_size < sizeof(*ei)) {
5353 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5354                 struct btrfs_extent_item_v0 *ei0;
5355                 BUG_ON(item_size != sizeof(*ei0));
5356                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5357                 refs = btrfs_extent_refs_v0(eb, ei0);
5358 #else
5359                 BUG();
5360 #endif
5361                 memset(&tmpl, 0, sizeof(tmpl));
5362                 tmpl.start = key.objectid;
5363                 tmpl.nr = num_bytes;
5364                 tmpl.extent_item_refs = refs;
5365                 tmpl.metadata = metadata;
5366                 tmpl.found_rec = 1;
5367                 tmpl.max_size = num_bytes;
5368
5369                 return add_extent_rec(extent_cache, &tmpl);
5370         }
5371
5372         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5373         refs = btrfs_extent_refs(eb, ei);
5374         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5375                 metadata = 1;
5376         else
5377                 metadata = 0;
5378         if (metadata && num_bytes != root->nodesize) {
5379                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5380                       num_bytes, root->nodesize);
5381                 return -EIO;
5382         }
5383         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5384                 error("ignore invalid data extent, length %llu is not aligned to %u",
5385                       num_bytes, root->sectorsize);
5386                 return -EIO;
5387         }
5388
5389         memset(&tmpl, 0, sizeof(tmpl));
5390         tmpl.start = key.objectid;
5391         tmpl.nr = num_bytes;
5392         tmpl.extent_item_refs = refs;
5393         tmpl.metadata = metadata;
5394         tmpl.found_rec = 1;
5395         tmpl.max_size = num_bytes;
5396         add_extent_rec(extent_cache, &tmpl);
5397
5398         ptr = (unsigned long)(ei + 1);
5399         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5400             key.type == BTRFS_EXTENT_ITEM_KEY)
5401                 ptr += sizeof(struct btrfs_tree_block_info);
5402
5403         end = (unsigned long)ei + item_size;
5404         while (ptr < end) {
5405                 iref = (struct btrfs_extent_inline_ref *)ptr;
5406                 type = btrfs_extent_inline_ref_type(eb, iref);
5407                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5408                 switch (type) {
5409                 case BTRFS_TREE_BLOCK_REF_KEY:
5410                         ret = add_tree_backref(extent_cache, key.objectid,
5411                                         0, offset, 0);
5412                         if (ret < 0)
5413                                 error("add_tree_backref failed: %s",
5414                                       strerror(-ret));
5415                         break;
5416                 case BTRFS_SHARED_BLOCK_REF_KEY:
5417                         ret = add_tree_backref(extent_cache, key.objectid,
5418                                         offset, 0, 0);
5419                         if (ret < 0)
5420                                 error("add_tree_backref failed: %s",
5421                                       strerror(-ret));
5422                         break;
5423                 case BTRFS_EXTENT_DATA_REF_KEY:
5424                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5425                         add_data_backref(extent_cache, key.objectid, 0,
5426                                         btrfs_extent_data_ref_root(eb, dref),
5427                                         btrfs_extent_data_ref_objectid(eb,
5428                                                                        dref),
5429                                         btrfs_extent_data_ref_offset(eb, dref),
5430                                         btrfs_extent_data_ref_count(eb, dref),
5431                                         0, num_bytes);
5432                         break;
5433                 case BTRFS_SHARED_DATA_REF_KEY:
5434                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5435                         add_data_backref(extent_cache, key.objectid, offset,
5436                                         0, 0, 0,
5437                                         btrfs_shared_data_ref_count(eb, sref),
5438                                         0, num_bytes);
5439                         break;
5440                 default:
5441                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5442                                 key.objectid, key.type, num_bytes);
5443                         goto out;
5444                 }
5445                 ptr += btrfs_extent_inline_ref_size(type);
5446         }
5447         WARN_ON(ptr > end);
5448 out:
5449         return 0;
5450 }
5451
5452 static int check_cache_range(struct btrfs_root *root,
5453                              struct btrfs_block_group_cache *cache,
5454                              u64 offset, u64 bytes)
5455 {
5456         struct btrfs_free_space *entry;
5457         u64 *logical;
5458         u64 bytenr;
5459         int stripe_len;
5460         int i, nr, ret;
5461
5462         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5463                 bytenr = btrfs_sb_offset(i);
5464                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5465                                        cache->key.objectid, bytenr, 0,
5466                                        &logical, &nr, &stripe_len);
5467                 if (ret)
5468                         return ret;
5469
5470                 while (nr--) {
5471                         if (logical[nr] + stripe_len <= offset)
5472                                 continue;
5473                         if (offset + bytes <= logical[nr])
5474                                 continue;
5475                         if (logical[nr] == offset) {
5476                                 if (stripe_len >= bytes) {
5477                                         kfree(logical);
5478                                         return 0;
5479                                 }
5480                                 bytes -= stripe_len;
5481                                 offset += stripe_len;
5482                         } else if (logical[nr] < offset) {
5483                                 if (logical[nr] + stripe_len >=
5484                                     offset + bytes) {
5485                                         kfree(logical);
5486                                         return 0;
5487                                 }
5488                                 bytes = (offset + bytes) -
5489                                         (logical[nr] + stripe_len);
5490                                 offset = logical[nr] + stripe_len;
5491                         } else {
5492                                 /*
5493                                  * Could be tricky, the super may land in the
5494                                  * middle of the area we're checking.  First
5495                                  * check the easiest case, it's at the end.
5496                                  */
5497                                 if (logical[nr] + stripe_len >=
5498                                     bytes + offset) {
5499                                         bytes = logical[nr] - offset;
5500                                         continue;
5501                                 }
5502
5503                                 /* Check the left side */
5504                                 ret = check_cache_range(root, cache,
5505                                                         offset,
5506                                                         logical[nr] - offset);
5507                                 if (ret) {
5508                                         kfree(logical);
5509                                         return ret;
5510                                 }
5511
5512                                 /* Now we continue with the right side */
5513                                 bytes = (offset + bytes) -
5514                                         (logical[nr] + stripe_len);
5515                                 offset = logical[nr] + stripe_len;
5516                         }
5517                 }
5518
5519                 kfree(logical);
5520         }
5521
5522         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5523         if (!entry) {
5524                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5525                         offset, offset+bytes);
5526                 return -EINVAL;
5527         }
5528
5529         if (entry->offset != offset) {
5530                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5531                         entry->offset);
5532                 return -EINVAL;
5533         }
5534
5535         if (entry->bytes != bytes) {
5536                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5537                         bytes, entry->bytes, offset);
5538                 return -EINVAL;
5539         }
5540
5541         unlink_free_space(cache->free_space_ctl, entry);
5542         free(entry);
5543         return 0;
5544 }
5545
5546 static int verify_space_cache(struct btrfs_root *root,
5547                               struct btrfs_block_group_cache *cache)
5548 {
5549         struct btrfs_path *path;
5550         struct extent_buffer *leaf;
5551         struct btrfs_key key;
5552         u64 last;
5553         int ret = 0;
5554
5555         path = btrfs_alloc_path();
5556         if (!path)
5557                 return -ENOMEM;
5558
5559         root = root->fs_info->extent_root;
5560
5561         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5562
5563         key.objectid = last;
5564         key.offset = 0;
5565         key.type = BTRFS_EXTENT_ITEM_KEY;
5566
5567         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5568         if (ret < 0)
5569                 goto out;
5570         ret = 0;
5571         while (1) {
5572                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5573                         ret = btrfs_next_leaf(root, path);
5574                         if (ret < 0)
5575                                 goto out;
5576                         if (ret > 0) {
5577                                 ret = 0;
5578                                 break;
5579                         }
5580                 }
5581                 leaf = path->nodes[0];
5582                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5583                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5584                         break;
5585                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5586                     key.type != BTRFS_METADATA_ITEM_KEY) {
5587                         path->slots[0]++;
5588                         continue;
5589                 }
5590
5591                 if (last == key.objectid) {
5592                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5593                                 last = key.objectid + key.offset;
5594                         else
5595                                 last = key.objectid + root->nodesize;
5596                         path->slots[0]++;
5597                         continue;
5598                 }
5599
5600                 ret = check_cache_range(root, cache, last,
5601                                         key.objectid - last);
5602                 if (ret)
5603                         break;
5604                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5605                         last = key.objectid + key.offset;
5606                 else
5607                         last = key.objectid + root->nodesize;
5608                 path->slots[0]++;
5609         }
5610
5611         if (last < cache->key.objectid + cache->key.offset)
5612                 ret = check_cache_range(root, cache, last,
5613                                         cache->key.objectid +
5614                                         cache->key.offset - last);
5615
5616 out:
5617         btrfs_free_path(path);
5618
5619         if (!ret &&
5620             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5621                 fprintf(stderr, "There are still entries left in the space "
5622                         "cache\n");
5623                 ret = -EINVAL;
5624         }
5625
5626         return ret;
5627 }
5628
5629 static int check_space_cache(struct btrfs_root *root)
5630 {
5631         struct btrfs_block_group_cache *cache;
5632         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5633         int ret;
5634         int error = 0;
5635
5636         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5637             btrfs_super_generation(root->fs_info->super_copy) !=
5638             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5639                 printf("cache and super generation don't match, space cache "
5640                        "will be invalidated\n");
5641                 return 0;
5642         }
5643
5644         if (ctx.progress_enabled) {
5645                 ctx.tp = TASK_FREE_SPACE;
5646                 task_start(ctx.info);
5647         }
5648
5649         while (1) {
5650                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5651                 if (!cache)
5652                         break;
5653
5654                 start = cache->key.objectid + cache->key.offset;
5655                 if (!cache->free_space_ctl) {
5656                         if (btrfs_init_free_space_ctl(cache,
5657                                                       root->sectorsize)) {
5658                                 ret = -ENOMEM;
5659                                 break;
5660                         }
5661                 } else {
5662                         btrfs_remove_free_space_cache(cache);
5663                 }
5664
5665                 if (btrfs_fs_compat_ro(root->fs_info,
5666                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5667                         ret = exclude_super_stripes(root, cache);
5668                         if (ret) {
5669                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5670                                         strerror(-ret));
5671                                 error++;
5672                                 continue;
5673                         }
5674                         ret = load_free_space_tree(root->fs_info, cache);
5675                         free_excluded_extents(root, cache);
5676                         if (ret < 0) {
5677                                 fprintf(stderr, "could not load free space tree: %s\n",
5678                                         strerror(-ret));
5679                                 error++;
5680                                 continue;
5681                         }
5682                         error += ret;
5683                 } else {
5684                         ret = load_free_space_cache(root->fs_info, cache);
5685                         if (!ret)
5686                                 continue;
5687                 }
5688
5689                 ret = verify_space_cache(root, cache);
5690                 if (ret) {
5691                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5692                                 cache->key.objectid);
5693                         error++;
5694                 }
5695         }
5696
5697         task_stop(ctx.info);
5698
5699         return error ? -EINVAL : 0;
5700 }
5701
5702 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5703                         u64 num_bytes, unsigned long leaf_offset,
5704                         struct extent_buffer *eb) {
5705
5706         u64 offset = 0;
5707         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5708         char *data;
5709         unsigned long csum_offset;
5710         u32 csum;
5711         u32 csum_expected;
5712         u64 read_len;
5713         u64 data_checked = 0;
5714         u64 tmp;
5715         int ret = 0;
5716         int mirror;
5717         int num_copies;
5718
5719         if (num_bytes % root->sectorsize)
5720                 return -EINVAL;
5721
5722         data = malloc(num_bytes);
5723         if (!data)
5724                 return -ENOMEM;
5725
5726         while (offset < num_bytes) {
5727                 mirror = 0;
5728 again:
5729                 read_len = num_bytes - offset;
5730                 /* read as much space once a time */
5731                 ret = read_extent_data(root, data + offset,
5732                                 bytenr + offset, &read_len, mirror);
5733                 if (ret)
5734                         goto out;
5735                 data_checked = 0;
5736                 /* verify every 4k data's checksum */
5737                 while (data_checked < read_len) {
5738                         csum = ~(u32)0;
5739                         tmp = offset + data_checked;
5740
5741                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5742                                                csum, root->sectorsize);
5743                         btrfs_csum_final(csum, (char *)&csum);
5744
5745                         csum_offset = leaf_offset +
5746                                  tmp / root->sectorsize * csum_size;
5747                         read_extent_buffer(eb, (char *)&csum_expected,
5748                                            csum_offset, csum_size);
5749                         /* try another mirror */
5750                         if (csum != csum_expected) {
5751                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5752                                                 mirror, bytenr + tmp,
5753                                                 csum, csum_expected);
5754                                 num_copies = btrfs_num_copies(
5755                                                 &root->fs_info->mapping_tree,
5756                                                 bytenr, num_bytes);
5757                                 if (mirror < num_copies - 1) {
5758                                         mirror += 1;
5759                                         goto again;
5760                                 }
5761                         }
5762                         data_checked += root->sectorsize;
5763                 }
5764                 offset += read_len;
5765         }
5766 out:
5767         free(data);
5768         return ret;
5769 }
5770
5771 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5772                                u64 num_bytes)
5773 {
5774         struct btrfs_path *path;
5775         struct extent_buffer *leaf;
5776         struct btrfs_key key;
5777         int ret;
5778
5779         path = btrfs_alloc_path();
5780         if (!path) {
5781                 fprintf(stderr, "Error allocating path\n");
5782                 return -ENOMEM;
5783         }
5784
5785         key.objectid = bytenr;
5786         key.type = BTRFS_EXTENT_ITEM_KEY;
5787         key.offset = (u64)-1;
5788
5789 again:
5790         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5791                                 0, 0);
5792         if (ret < 0) {
5793                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5794                 btrfs_free_path(path);
5795                 return ret;
5796         } else if (ret) {
5797                 if (path->slots[0] > 0) {
5798                         path->slots[0]--;
5799                 } else {
5800                         ret = btrfs_prev_leaf(root, path);
5801                         if (ret < 0) {
5802                                 goto out;
5803                         } else if (ret > 0) {
5804                                 ret = 0;
5805                                 goto out;
5806                         }
5807                 }
5808         }
5809
5810         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5811
5812         /*
5813          * Block group items come before extent items if they have the same
5814          * bytenr, so walk back one more just in case.  Dear future traveller,
5815          * first congrats on mastering time travel.  Now if it's not too much
5816          * trouble could you go back to 2006 and tell Chris to make the
5817          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5818          * EXTENT_ITEM_KEY please?
5819          */
5820         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5821                 if (path->slots[0] > 0) {
5822                         path->slots[0]--;
5823                 } else {
5824                         ret = btrfs_prev_leaf(root, path);
5825                         if (ret < 0) {
5826                                 goto out;
5827                         } else if (ret > 0) {
5828                                 ret = 0;
5829                                 goto out;
5830                         }
5831                 }
5832                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5833         }
5834
5835         while (num_bytes) {
5836                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5837                         ret = btrfs_next_leaf(root, path);
5838                         if (ret < 0) {
5839                                 fprintf(stderr, "Error going to next leaf "
5840                                         "%d\n", ret);
5841                                 btrfs_free_path(path);
5842                                 return ret;
5843                         } else if (ret) {
5844                                 break;
5845                         }
5846                 }
5847                 leaf = path->nodes[0];
5848                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5849                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5850                         path->slots[0]++;
5851                         continue;
5852                 }
5853                 if (key.objectid + key.offset < bytenr) {
5854                         path->slots[0]++;
5855                         continue;
5856                 }
5857                 if (key.objectid > bytenr + num_bytes)
5858                         break;
5859
5860                 if (key.objectid == bytenr) {
5861                         if (key.offset >= num_bytes) {
5862                                 num_bytes = 0;
5863                                 break;
5864                         }
5865                         num_bytes -= key.offset;
5866                         bytenr += key.offset;
5867                 } else if (key.objectid < bytenr) {
5868                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5869                                 num_bytes = 0;
5870                                 break;
5871                         }
5872                         num_bytes = (bytenr + num_bytes) -
5873                                 (key.objectid + key.offset);
5874                         bytenr = key.objectid + key.offset;
5875                 } else {
5876                         if (key.objectid + key.offset < bytenr + num_bytes) {
5877                                 u64 new_start = key.objectid + key.offset;
5878                                 u64 new_bytes = bytenr + num_bytes - new_start;
5879
5880                                 /*
5881                                  * Weird case, the extent is in the middle of
5882                                  * our range, we'll have to search one side
5883                                  * and then the other.  Not sure if this happens
5884                                  * in real life, but no harm in coding it up
5885                                  * anyway just in case.
5886                                  */
5887                                 btrfs_release_path(path);
5888                                 ret = check_extent_exists(root, new_start,
5889                                                           new_bytes);
5890                                 if (ret) {
5891                                         fprintf(stderr, "Right section didn't "
5892                                                 "have a record\n");
5893                                         break;
5894                                 }
5895                                 num_bytes = key.objectid - bytenr;
5896                                 goto again;
5897                         }
5898                         num_bytes = key.objectid - bytenr;
5899                 }
5900                 path->slots[0]++;
5901         }
5902         ret = 0;
5903
5904 out:
5905         if (num_bytes && !ret) {
5906                 fprintf(stderr, "There are no extents for csum range "
5907                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5908                 ret = 1;
5909         }
5910
5911         btrfs_free_path(path);
5912         return ret;
5913 }
5914
5915 static int check_csums(struct btrfs_root *root)
5916 {
5917         struct btrfs_path *path;
5918         struct extent_buffer *leaf;
5919         struct btrfs_key key;
5920         u64 offset = 0, num_bytes = 0;
5921         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5922         int errors = 0;
5923         int ret;
5924         u64 data_len;
5925         unsigned long leaf_offset;
5926
5927         root = root->fs_info->csum_root;
5928         if (!extent_buffer_uptodate(root->node)) {
5929                 fprintf(stderr, "No valid csum tree found\n");
5930                 return -ENOENT;
5931         }
5932
5933         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5934         key.type = BTRFS_EXTENT_CSUM_KEY;
5935         key.offset = 0;
5936
5937         path = btrfs_alloc_path();
5938         if (!path)
5939                 return -ENOMEM;
5940
5941         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5942         if (ret < 0) {
5943                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5944                 btrfs_free_path(path);
5945                 return ret;
5946         }
5947
5948         if (ret > 0 && path->slots[0])
5949                 path->slots[0]--;
5950         ret = 0;
5951
5952         while (1) {
5953                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5954                         ret = btrfs_next_leaf(root, path);
5955                         if (ret < 0) {
5956                                 fprintf(stderr, "Error going to next leaf "
5957                                         "%d\n", ret);
5958                                 break;
5959                         }
5960                         if (ret)
5961                                 break;
5962                 }
5963                 leaf = path->nodes[0];
5964
5965                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5966                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5967                         path->slots[0]++;
5968                         continue;
5969                 }
5970
5971                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5972                               csum_size) * root->sectorsize;
5973                 if (!check_data_csum)
5974                         goto skip_csum_check;
5975                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5976                 ret = check_extent_csums(root, key.offset, data_len,
5977                                          leaf_offset, leaf);
5978                 if (ret)
5979                         break;
5980 skip_csum_check:
5981                 if (!num_bytes) {
5982                         offset = key.offset;
5983                 } else if (key.offset != offset + num_bytes) {
5984                         ret = check_extent_exists(root, offset, num_bytes);
5985                         if (ret) {
5986                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5987                                         "there is no extent record\n",
5988                                         offset, offset+num_bytes);
5989                                 errors++;
5990                         }
5991                         offset = key.offset;
5992                         num_bytes = 0;
5993                 }
5994                 num_bytes += data_len;
5995                 path->slots[0]++;
5996         }
5997
5998         btrfs_free_path(path);
5999         return errors;
6000 }
6001
6002 static int is_dropped_key(struct btrfs_key *key,
6003                           struct btrfs_key *drop_key) {
6004         if (key->objectid < drop_key->objectid)
6005                 return 1;
6006         else if (key->objectid == drop_key->objectid) {
6007                 if (key->type < drop_key->type)
6008                         return 1;
6009                 else if (key->type == drop_key->type) {
6010                         if (key->offset < drop_key->offset)
6011                                 return 1;
6012                 }
6013         }
6014         return 0;
6015 }
6016
6017 /*
6018  * Here are the rules for FULL_BACKREF.
6019  *
6020  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6021  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6022  *      FULL_BACKREF set.
6023  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6024  *    if it happened after the relocation occurred since we'll have dropped the
6025  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6026  *    have no real way to know for sure.
6027  *
6028  * We process the blocks one root at a time, and we start from the lowest root
6029  * objectid and go to the highest.  So we can just lookup the owner backref for
6030  * the record and if we don't find it then we know it doesn't exist and we have
6031  * a FULL BACKREF.
6032  *
6033  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6034  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6035  * be set or not and then we can check later once we've gathered all the refs.
6036  */
6037 static int calc_extent_flag(struct btrfs_root *root,
6038                            struct cache_tree *extent_cache,
6039                            struct extent_buffer *buf,
6040                            struct root_item_record *ri,
6041                            u64 *flags)
6042 {
6043         struct extent_record *rec;
6044         struct cache_extent *cache;
6045         struct tree_backref *tback;
6046         u64 owner = 0;
6047
6048         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6049         /* we have added this extent before */
6050         if (!cache)
6051                 return -ENOENT;
6052
6053         rec = container_of(cache, struct extent_record, cache);
6054
6055         /*
6056          * Except file/reloc tree, we can not have
6057          * FULL BACKREF MODE
6058          */
6059         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6060                 goto normal;
6061         /*
6062          * root node
6063          */
6064         if (buf->start == ri->bytenr)
6065                 goto normal;
6066
6067         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6068                 goto full_backref;
6069
6070         owner = btrfs_header_owner(buf);
6071         if (owner == ri->objectid)
6072                 goto normal;
6073
6074         tback = find_tree_backref(rec, 0, owner);
6075         if (!tback)
6076                 goto full_backref;
6077 normal:
6078         *flags = 0;
6079         if (rec->flag_block_full_backref != FLAG_UNSET &&
6080             rec->flag_block_full_backref != 0)
6081                 rec->bad_full_backref = 1;
6082         return 0;
6083 full_backref:
6084         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6085         if (rec->flag_block_full_backref != FLAG_UNSET &&
6086             rec->flag_block_full_backref != 1)
6087                 rec->bad_full_backref = 1;
6088         return 0;
6089 }
6090
6091 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6092 {
6093         fprintf(stderr, "Invalid key type(");
6094         print_key_type(stderr, 0, key_type);
6095         fprintf(stderr, ") found in root(");
6096         print_objectid(stderr, rootid, 0);
6097         fprintf(stderr, ")\n");
6098 }
6099
6100 /*
6101  * Check if the key is valid with its extent buffer.
6102  *
6103  * This is a early check in case invalid key exists in a extent buffer
6104  * This is not comprehensive yet, but should prevent wrong key/item passed
6105  * further
6106  */
6107 static int check_type_with_root(u64 rootid, u8 key_type)
6108 {
6109         switch (key_type) {
6110         /* Only valid in chunk tree */
6111         case BTRFS_DEV_ITEM_KEY:
6112         case BTRFS_CHUNK_ITEM_KEY:
6113                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6114                         goto err;
6115                 break;
6116         /* valid in csum and log tree */
6117         case BTRFS_CSUM_TREE_OBJECTID:
6118                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6119                       is_fstree(rootid)))
6120                         goto err;
6121                 break;
6122         case BTRFS_EXTENT_ITEM_KEY:
6123         case BTRFS_METADATA_ITEM_KEY:
6124         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6125                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6126                         goto err;
6127                 break;
6128         case BTRFS_ROOT_ITEM_KEY:
6129                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6130                         goto err;
6131                 break;
6132         case BTRFS_DEV_EXTENT_KEY:
6133                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6134                         goto err;
6135                 break;
6136         }
6137         return 0;
6138 err:
6139         report_mismatch_key_root(key_type, rootid);
6140         return -EINVAL;
6141 }
6142
6143 static int run_next_block(struct btrfs_root *root,
6144                           struct block_info *bits,
6145                           int bits_nr,
6146                           u64 *last,
6147                           struct cache_tree *pending,
6148                           struct cache_tree *seen,
6149                           struct cache_tree *reada,
6150                           struct cache_tree *nodes,
6151                           struct cache_tree *extent_cache,
6152                           struct cache_tree *chunk_cache,
6153                           struct rb_root *dev_cache,
6154                           struct block_group_tree *block_group_cache,
6155                           struct device_extent_tree *dev_extent_cache,
6156                           struct root_item_record *ri)
6157 {
6158         struct extent_buffer *buf;
6159         struct extent_record *rec = NULL;
6160         u64 bytenr;
6161         u32 size;
6162         u64 parent;
6163         u64 owner;
6164         u64 flags;
6165         u64 ptr;
6166         u64 gen = 0;
6167         int ret = 0;
6168         int i;
6169         int nritems;
6170         struct btrfs_key key;
6171         struct cache_extent *cache;
6172         int reada_bits;
6173
6174         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6175                                     bits_nr, &reada_bits);
6176         if (nritems == 0)
6177                 return 1;
6178
6179         if (!reada_bits) {
6180                 for(i = 0; i < nritems; i++) {
6181                         ret = add_cache_extent(reada, bits[i].start,
6182                                                bits[i].size);
6183                         if (ret == -EEXIST)
6184                                 continue;
6185
6186                         /* fixme, get the parent transid */
6187                         readahead_tree_block(root, bits[i].start,
6188                                              bits[i].size, 0);
6189                 }
6190         }
6191         *last = bits[0].start;
6192         bytenr = bits[0].start;
6193         size = bits[0].size;
6194
6195         cache = lookup_cache_extent(pending, bytenr, size);
6196         if (cache) {
6197                 remove_cache_extent(pending, cache);
6198                 free(cache);
6199         }
6200         cache = lookup_cache_extent(reada, bytenr, size);
6201         if (cache) {
6202                 remove_cache_extent(reada, cache);
6203                 free(cache);
6204         }
6205         cache = lookup_cache_extent(nodes, bytenr, size);
6206         if (cache) {
6207                 remove_cache_extent(nodes, cache);
6208                 free(cache);
6209         }
6210         cache = lookup_cache_extent(extent_cache, bytenr, size);
6211         if (cache) {
6212                 rec = container_of(cache, struct extent_record, cache);
6213                 gen = rec->parent_generation;
6214         }
6215
6216         /* fixme, get the real parent transid */
6217         buf = read_tree_block(root, bytenr, size, gen);
6218         if (!extent_buffer_uptodate(buf)) {
6219                 record_bad_block_io(root->fs_info,
6220                                     extent_cache, bytenr, size);
6221                 goto out;
6222         }
6223
6224         nritems = btrfs_header_nritems(buf);
6225
6226         flags = 0;
6227         if (!init_extent_tree) {
6228                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6229                                        btrfs_header_level(buf), 1, NULL,
6230                                        &flags);
6231                 if (ret < 0) {
6232                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6233                         if (ret < 0) {
6234                                 fprintf(stderr, "Couldn't calc extent flags\n");
6235                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6236                         }
6237                 }
6238         } else {
6239                 flags = 0;
6240                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6241                 if (ret < 0) {
6242                         fprintf(stderr, "Couldn't calc extent flags\n");
6243                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6244                 }
6245         }
6246
6247         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6248                 if (ri != NULL &&
6249                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6250                     ri->objectid == btrfs_header_owner(buf)) {
6251                         /*
6252                          * Ok we got to this block from it's original owner and
6253                          * we have FULL_BACKREF set.  Relocation can leave
6254                          * converted blocks over so this is altogether possible,
6255                          * however it's not possible if the generation > the
6256                          * last snapshot, so check for this case.
6257                          */
6258                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6259                             btrfs_header_generation(buf) > ri->last_snapshot) {
6260                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6261                                 rec->bad_full_backref = 1;
6262                         }
6263                 }
6264         } else {
6265                 if (ri != NULL &&
6266                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6267                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6268                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6269                         rec->bad_full_backref = 1;
6270                 }
6271         }
6272
6273         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6274                 rec->flag_block_full_backref = 1;
6275                 parent = bytenr;
6276                 owner = 0;
6277         } else {
6278                 rec->flag_block_full_backref = 0;
6279                 parent = 0;
6280                 owner = btrfs_header_owner(buf);
6281         }
6282
6283         ret = check_block(root, extent_cache, buf, flags);
6284         if (ret)
6285                 goto out;
6286
6287         if (btrfs_is_leaf(buf)) {
6288                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6289                 for (i = 0; i < nritems; i++) {
6290                         struct btrfs_file_extent_item *fi;
6291                         btrfs_item_key_to_cpu(buf, &key, i);
6292                         /*
6293                          * Check key type against the leaf owner.
6294                          * Could filter quite a lot of early error if
6295                          * owner is correct
6296                          */
6297                         if (check_type_with_root(btrfs_header_owner(buf),
6298                                                  key.type)) {
6299                                 fprintf(stderr, "ignoring invalid key\n");
6300                                 continue;
6301                         }
6302                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6303                                 process_extent_item(root, extent_cache, buf,
6304                                                     i);
6305                                 continue;
6306                         }
6307                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6308                                 process_extent_item(root, extent_cache, buf,
6309                                                     i);
6310                                 continue;
6311                         }
6312                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6313                                 total_csum_bytes +=
6314                                         btrfs_item_size_nr(buf, i);
6315                                 continue;
6316                         }
6317                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6318                                 process_chunk_item(chunk_cache, &key, buf, i);
6319                                 continue;
6320                         }
6321                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6322                                 process_device_item(dev_cache, &key, buf, i);
6323                                 continue;
6324                         }
6325                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6326                                 process_block_group_item(block_group_cache,
6327                                         &key, buf, i);
6328                                 continue;
6329                         }
6330                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6331                                 process_device_extent_item(dev_extent_cache,
6332                                         &key, buf, i);
6333                                 continue;
6334
6335                         }
6336                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6337 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6338                                 process_extent_ref_v0(extent_cache, buf, i);
6339 #else
6340                                 BUG();
6341 #endif
6342                                 continue;
6343                         }
6344
6345                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6346                                 ret = add_tree_backref(extent_cache,
6347                                                 key.objectid, 0, key.offset, 0);
6348                                 if (ret < 0)
6349                                         error("add_tree_backref failed: %s",
6350                                               strerror(-ret));
6351                                 continue;
6352                         }
6353                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6354                                 ret = add_tree_backref(extent_cache,
6355                                                 key.objectid, key.offset, 0, 0);
6356                                 if (ret < 0)
6357                                         error("add_tree_backref failed: %s",
6358                                               strerror(-ret));
6359                                 continue;
6360                         }
6361                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6362                                 struct btrfs_extent_data_ref *ref;
6363                                 ref = btrfs_item_ptr(buf, i,
6364                                                 struct btrfs_extent_data_ref);
6365                                 add_data_backref(extent_cache,
6366                                         key.objectid, 0,
6367                                         btrfs_extent_data_ref_root(buf, ref),
6368                                         btrfs_extent_data_ref_objectid(buf,
6369                                                                        ref),
6370                                         btrfs_extent_data_ref_offset(buf, ref),
6371                                         btrfs_extent_data_ref_count(buf, ref),
6372                                         0, root->sectorsize);
6373                                 continue;
6374                         }
6375                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6376                                 struct btrfs_shared_data_ref *ref;
6377                                 ref = btrfs_item_ptr(buf, i,
6378                                                 struct btrfs_shared_data_ref);
6379                                 add_data_backref(extent_cache,
6380                                         key.objectid, key.offset, 0, 0, 0,
6381                                         btrfs_shared_data_ref_count(buf, ref),
6382                                         0, root->sectorsize);
6383                                 continue;
6384                         }
6385                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6386                                 struct bad_item *bad;
6387
6388                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6389                                         continue;
6390                                 if (!owner)
6391                                         continue;
6392                                 bad = malloc(sizeof(struct bad_item));
6393                                 if (!bad)
6394                                         continue;
6395                                 INIT_LIST_HEAD(&bad->list);
6396                                 memcpy(&bad->key, &key,
6397                                        sizeof(struct btrfs_key));
6398                                 bad->root_id = owner;
6399                                 list_add_tail(&bad->list, &delete_items);
6400                                 continue;
6401                         }
6402                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6403                                 continue;
6404                         fi = btrfs_item_ptr(buf, i,
6405                                             struct btrfs_file_extent_item);
6406                         if (btrfs_file_extent_type(buf, fi) ==
6407                             BTRFS_FILE_EXTENT_INLINE)
6408                                 continue;
6409                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6410                                 continue;
6411
6412                         data_bytes_allocated +=
6413                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6414                         if (data_bytes_allocated < root->sectorsize) {
6415                                 abort();
6416                         }
6417                         data_bytes_referenced +=
6418                                 btrfs_file_extent_num_bytes(buf, fi);
6419                         add_data_backref(extent_cache,
6420                                 btrfs_file_extent_disk_bytenr(buf, fi),
6421                                 parent, owner, key.objectid, key.offset -
6422                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6423                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6424                 }
6425         } else {
6426                 int level;
6427                 struct btrfs_key first_key;
6428
6429                 first_key.objectid = 0;
6430
6431                 if (nritems > 0)
6432                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6433                 level = btrfs_header_level(buf);
6434                 for (i = 0; i < nritems; i++) {
6435                         struct extent_record tmpl;
6436
6437                         ptr = btrfs_node_blockptr(buf, i);
6438                         size = root->nodesize;
6439                         btrfs_node_key_to_cpu(buf, &key, i);
6440                         if (ri != NULL) {
6441                                 if ((level == ri->drop_level)
6442                                     && is_dropped_key(&key, &ri->drop_key)) {
6443                                         continue;
6444                                 }
6445                         }
6446
6447                         memset(&tmpl, 0, sizeof(tmpl));
6448                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6449                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6450                         tmpl.start = ptr;
6451                         tmpl.nr = size;
6452                         tmpl.refs = 1;
6453                         tmpl.metadata = 1;
6454                         tmpl.max_size = size;
6455                         ret = add_extent_rec(extent_cache, &tmpl);
6456                         if (ret < 0)
6457                                 goto out;
6458
6459                         ret = add_tree_backref(extent_cache, ptr, parent,
6460                                         owner, 1);
6461                         if (ret < 0) {
6462                                 error("add_tree_backref failed: %s",
6463                                       strerror(-ret));
6464                                 continue;
6465                         }
6466
6467                         if (level > 1) {
6468                                 add_pending(nodes, seen, ptr, size);
6469                         } else {
6470                                 add_pending(pending, seen, ptr, size);
6471                         }
6472                 }
6473                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6474                                       nritems) * sizeof(struct btrfs_key_ptr);
6475         }
6476         total_btree_bytes += buf->len;
6477         if (fs_root_objectid(btrfs_header_owner(buf)))
6478                 total_fs_tree_bytes += buf->len;
6479         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6480                 total_extent_tree_bytes += buf->len;
6481         if (!found_old_backref &&
6482             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6483             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6484             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6485                 found_old_backref = 1;
6486 out:
6487         free_extent_buffer(buf);
6488         return ret;
6489 }
6490
6491 static int add_root_to_pending(struct extent_buffer *buf,
6492                                struct cache_tree *extent_cache,
6493                                struct cache_tree *pending,
6494                                struct cache_tree *seen,
6495                                struct cache_tree *nodes,
6496                                u64 objectid)
6497 {
6498         struct extent_record tmpl;
6499         int ret;
6500
6501         if (btrfs_header_level(buf) > 0)
6502                 add_pending(nodes, seen, buf->start, buf->len);
6503         else
6504                 add_pending(pending, seen, buf->start, buf->len);
6505
6506         memset(&tmpl, 0, sizeof(tmpl));
6507         tmpl.start = buf->start;
6508         tmpl.nr = buf->len;
6509         tmpl.is_root = 1;
6510         tmpl.refs = 1;
6511         tmpl.metadata = 1;
6512         tmpl.max_size = buf->len;
6513         add_extent_rec(extent_cache, &tmpl);
6514
6515         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6516             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6517                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6518                                 0, 1);
6519         else
6520                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6521                                 1);
6522         return ret;
6523 }
6524
6525 /* as we fix the tree, we might be deleting blocks that
6526  * we're tracking for repair.  This hook makes sure we
6527  * remove any backrefs for blocks as we are fixing them.
6528  */
6529 static int free_extent_hook(struct btrfs_trans_handle *trans,
6530                             struct btrfs_root *root,
6531                             u64 bytenr, u64 num_bytes, u64 parent,
6532                             u64 root_objectid, u64 owner, u64 offset,
6533                             int refs_to_drop)
6534 {
6535         struct extent_record *rec;
6536         struct cache_extent *cache;
6537         int is_data;
6538         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6539
6540         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6541         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6542         if (!cache)
6543                 return 0;
6544
6545         rec = container_of(cache, struct extent_record, cache);
6546         if (is_data) {
6547                 struct data_backref *back;
6548                 back = find_data_backref(rec, parent, root_objectid, owner,
6549                                          offset, 1, bytenr, num_bytes);
6550                 if (!back)
6551                         goto out;
6552                 if (back->node.found_ref) {
6553                         back->found_ref -= refs_to_drop;
6554                         if (rec->refs)
6555                                 rec->refs -= refs_to_drop;
6556                 }
6557                 if (back->node.found_extent_tree) {
6558                         back->num_refs -= refs_to_drop;
6559                         if (rec->extent_item_refs)
6560                                 rec->extent_item_refs -= refs_to_drop;
6561                 }
6562                 if (back->found_ref == 0)
6563                         back->node.found_ref = 0;
6564                 if (back->num_refs == 0)
6565                         back->node.found_extent_tree = 0;
6566
6567                 if (!back->node.found_extent_tree && back->node.found_ref) {
6568                         list_del(&back->node.list);
6569                         free(back);
6570                 }
6571         } else {
6572                 struct tree_backref *back;
6573                 back = find_tree_backref(rec, parent, root_objectid);
6574                 if (!back)
6575                         goto out;
6576                 if (back->node.found_ref) {
6577                         if (rec->refs)
6578                                 rec->refs--;
6579                         back->node.found_ref = 0;
6580                 }
6581                 if (back->node.found_extent_tree) {
6582                         if (rec->extent_item_refs)
6583                                 rec->extent_item_refs--;
6584                         back->node.found_extent_tree = 0;
6585                 }
6586                 if (!back->node.found_extent_tree && back->node.found_ref) {
6587                         list_del(&back->node.list);
6588                         free(back);
6589                 }
6590         }
6591         maybe_free_extent_rec(extent_cache, rec);
6592 out:
6593         return 0;
6594 }
6595
6596 static int delete_extent_records(struct btrfs_trans_handle *trans,
6597                                  struct btrfs_root *root,
6598                                  struct btrfs_path *path,
6599                                  u64 bytenr, u64 new_len)
6600 {
6601         struct btrfs_key key;
6602         struct btrfs_key found_key;
6603         struct extent_buffer *leaf;
6604         int ret;
6605         int slot;
6606
6607
6608         key.objectid = bytenr;
6609         key.type = (u8)-1;
6610         key.offset = (u64)-1;
6611
6612         while(1) {
6613                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6614                                         &key, path, 0, 1);
6615                 if (ret < 0)
6616                         break;
6617
6618                 if (ret > 0) {
6619                         ret = 0;
6620                         if (path->slots[0] == 0)
6621                                 break;
6622                         path->slots[0]--;
6623                 }
6624                 ret = 0;
6625
6626                 leaf = path->nodes[0];
6627                 slot = path->slots[0];
6628
6629                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6630                 if (found_key.objectid != bytenr)
6631                         break;
6632
6633                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6634                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6635                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6636                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6637                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6638                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6639                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6640                         btrfs_release_path(path);
6641                         if (found_key.type == 0) {
6642                                 if (found_key.offset == 0)
6643                                         break;
6644                                 key.offset = found_key.offset - 1;
6645                                 key.type = found_key.type;
6646                         }
6647                         key.type = found_key.type - 1;
6648                         key.offset = (u64)-1;
6649                         continue;
6650                 }
6651
6652                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6653                         found_key.objectid, found_key.type, found_key.offset);
6654
6655                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6656                 if (ret)
6657                         break;
6658                 btrfs_release_path(path);
6659
6660                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6661                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6662                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6663                                 found_key.offset : root->nodesize;
6664
6665                         ret = btrfs_update_block_group(trans, root, bytenr,
6666                                                        bytes, 0, 0);
6667                         if (ret)
6668                                 break;
6669                 }
6670         }
6671
6672         btrfs_release_path(path);
6673         return ret;
6674 }
6675
6676 /*
6677  * for a single backref, this will allocate a new extent
6678  * and add the backref to it.
6679  */
6680 static int record_extent(struct btrfs_trans_handle *trans,
6681                          struct btrfs_fs_info *info,
6682                          struct btrfs_path *path,
6683                          struct extent_record *rec,
6684                          struct extent_backref *back,
6685                          int allocated, u64 flags)
6686 {
6687         int ret;
6688         struct btrfs_root *extent_root = info->extent_root;
6689         struct extent_buffer *leaf;
6690         struct btrfs_key ins_key;
6691         struct btrfs_extent_item *ei;
6692         struct tree_backref *tback;
6693         struct data_backref *dback;
6694         struct btrfs_tree_block_info *bi;
6695
6696         if (!back->is_data)
6697                 rec->max_size = max_t(u64, rec->max_size,
6698                                     info->extent_root->nodesize);
6699
6700         if (!allocated) {
6701                 u32 item_size = sizeof(*ei);
6702
6703                 if (!back->is_data)
6704                         item_size += sizeof(*bi);
6705
6706                 ins_key.objectid = rec->start;
6707                 ins_key.offset = rec->max_size;
6708                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6709
6710                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6711                                         &ins_key, item_size);
6712                 if (ret)
6713                         goto fail;
6714
6715                 leaf = path->nodes[0];
6716                 ei = btrfs_item_ptr(leaf, path->slots[0],
6717                                     struct btrfs_extent_item);
6718
6719                 btrfs_set_extent_refs(leaf, ei, 0);
6720                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6721
6722                 if (back->is_data) {
6723                         btrfs_set_extent_flags(leaf, ei,
6724                                                BTRFS_EXTENT_FLAG_DATA);
6725                 } else {
6726                         struct btrfs_disk_key copy_key;;
6727
6728                         tback = to_tree_backref(back);
6729                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6730                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6731                                              sizeof(*bi));
6732
6733                         btrfs_set_disk_key_objectid(&copy_key,
6734                                                     rec->info_objectid);
6735                         btrfs_set_disk_key_type(&copy_key, 0);
6736                         btrfs_set_disk_key_offset(&copy_key, 0);
6737
6738                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6739                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6740
6741                         btrfs_set_extent_flags(leaf, ei,
6742                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6743                 }
6744
6745                 btrfs_mark_buffer_dirty(leaf);
6746                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6747                                                rec->max_size, 1, 0);
6748                 if (ret)
6749                         goto fail;
6750                 btrfs_release_path(path);
6751         }
6752
6753         if (back->is_data) {
6754                 u64 parent;
6755                 int i;
6756
6757                 dback = to_data_backref(back);
6758                 if (back->full_backref)
6759                         parent = dback->parent;
6760                 else
6761                         parent = 0;
6762
6763                 for (i = 0; i < dback->found_ref; i++) {
6764                         /* if parent != 0, we're doing a full backref
6765                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6766                          * just makes the backref allocator create a data
6767                          * backref
6768                          */
6769                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6770                                                    rec->start, rec->max_size,
6771                                                    parent,
6772                                                    dback->root,
6773                                                    parent ?
6774                                                    BTRFS_FIRST_FREE_OBJECTID :
6775                                                    dback->owner,
6776                                                    dback->offset);
6777                         if (ret)
6778                                 break;
6779                 }
6780                 fprintf(stderr, "adding new data backref"
6781                                 " on %llu %s %llu owner %llu"
6782                                 " offset %llu found %d\n",
6783                                 (unsigned long long)rec->start,
6784                                 back->full_backref ?
6785                                 "parent" : "root",
6786                                 back->full_backref ?
6787                                 (unsigned long long)parent :
6788                                 (unsigned long long)dback->root,
6789                                 (unsigned long long)dback->owner,
6790                                 (unsigned long long)dback->offset,
6791                                 dback->found_ref);
6792         } else {
6793                 u64 parent;
6794
6795                 tback = to_tree_backref(back);
6796                 if (back->full_backref)
6797                         parent = tback->parent;
6798                 else
6799                         parent = 0;
6800
6801                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6802                                            rec->start, rec->max_size,
6803                                            parent, tback->root, 0, 0);
6804                 fprintf(stderr, "adding new tree backref on "
6805                         "start %llu len %llu parent %llu root %llu\n",
6806                         rec->start, rec->max_size, parent, tback->root);
6807         }
6808 fail:
6809         btrfs_release_path(path);
6810         return ret;
6811 }
6812
6813 static struct extent_entry *find_entry(struct list_head *entries,
6814                                        u64 bytenr, u64 bytes)
6815 {
6816         struct extent_entry *entry = NULL;
6817
6818         list_for_each_entry(entry, entries, list) {
6819                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6820                         return entry;
6821         }
6822
6823         return NULL;
6824 }
6825
6826 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6827 {
6828         struct extent_entry *entry, *best = NULL, *prev = NULL;
6829
6830         list_for_each_entry(entry, entries, list) {
6831                 if (!prev) {
6832                         prev = entry;
6833                         continue;
6834                 }
6835
6836                 /*
6837                  * If there are as many broken entries as entries then we know
6838                  * not to trust this particular entry.
6839                  */
6840                 if (entry->broken == entry->count)
6841                         continue;
6842
6843                 /*
6844                  * If our current entry == best then we can't be sure our best
6845                  * is really the best, so we need to keep searching.
6846                  */
6847                 if (best && best->count == entry->count) {
6848                         prev = entry;
6849                         best = NULL;
6850                         continue;
6851                 }
6852
6853                 /* Prev == entry, not good enough, have to keep searching */
6854                 if (!prev->broken && prev->count == entry->count)
6855                         continue;
6856
6857                 if (!best)
6858                         best = (prev->count > entry->count) ? prev : entry;
6859                 else if (best->count < entry->count)
6860                         best = entry;
6861                 prev = entry;
6862         }
6863
6864         return best;
6865 }
6866
6867 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6868                       struct data_backref *dback, struct extent_entry *entry)
6869 {
6870         struct btrfs_trans_handle *trans;
6871         struct btrfs_root *root;
6872         struct btrfs_file_extent_item *fi;
6873         struct extent_buffer *leaf;
6874         struct btrfs_key key;
6875         u64 bytenr, bytes;
6876         int ret, err;
6877
6878         key.objectid = dback->root;
6879         key.type = BTRFS_ROOT_ITEM_KEY;
6880         key.offset = (u64)-1;
6881         root = btrfs_read_fs_root(info, &key);
6882         if (IS_ERR(root)) {
6883                 fprintf(stderr, "Couldn't find root for our ref\n");
6884                 return -EINVAL;
6885         }
6886
6887         /*
6888          * The backref points to the original offset of the extent if it was
6889          * split, so we need to search down to the offset we have and then walk
6890          * forward until we find the backref we're looking for.
6891          */
6892         key.objectid = dback->owner;
6893         key.type = BTRFS_EXTENT_DATA_KEY;
6894         key.offset = dback->offset;
6895         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6896         if (ret < 0) {
6897                 fprintf(stderr, "Error looking up ref %d\n", ret);
6898                 return ret;
6899         }
6900
6901         while (1) {
6902                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6903                         ret = btrfs_next_leaf(root, path);
6904                         if (ret) {
6905                                 fprintf(stderr, "Couldn't find our ref, next\n");
6906                                 return -EINVAL;
6907                         }
6908                 }
6909                 leaf = path->nodes[0];
6910                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6911                 if (key.objectid != dback->owner ||
6912                     key.type != BTRFS_EXTENT_DATA_KEY) {
6913                         fprintf(stderr, "Couldn't find our ref, search\n");
6914                         return -EINVAL;
6915                 }
6916                 fi = btrfs_item_ptr(leaf, path->slots[0],
6917                                     struct btrfs_file_extent_item);
6918                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6919                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6920
6921                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6922                         break;
6923                 path->slots[0]++;
6924         }
6925
6926         btrfs_release_path(path);
6927
6928         trans = btrfs_start_transaction(root, 1);
6929         if (IS_ERR(trans))
6930                 return PTR_ERR(trans);
6931
6932         /*
6933          * Ok we have the key of the file extent we want to fix, now we can cow
6934          * down to the thing and fix it.
6935          */
6936         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6937         if (ret < 0) {
6938                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6939                         key.objectid, key.type, key.offset, ret);
6940                 goto out;
6941         }
6942         if (ret > 0) {
6943                 fprintf(stderr, "Well that's odd, we just found this key "
6944                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6945                         key.offset);
6946                 ret = -EINVAL;
6947                 goto out;
6948         }
6949         leaf = path->nodes[0];
6950         fi = btrfs_item_ptr(leaf, path->slots[0],
6951                             struct btrfs_file_extent_item);
6952
6953         if (btrfs_file_extent_compression(leaf, fi) &&
6954             dback->disk_bytenr != entry->bytenr) {
6955                 fprintf(stderr, "Ref doesn't match the record start and is "
6956                         "compressed, please take a btrfs-image of this file "
6957                         "system and send it to a btrfs developer so they can "
6958                         "complete this functionality for bytenr %Lu\n",
6959                         dback->disk_bytenr);
6960                 ret = -EINVAL;
6961                 goto out;
6962         }
6963
6964         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6965                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6966         } else if (dback->disk_bytenr > entry->bytenr) {
6967                 u64 off_diff, offset;
6968
6969                 off_diff = dback->disk_bytenr - entry->bytenr;
6970                 offset = btrfs_file_extent_offset(leaf, fi);
6971                 if (dback->disk_bytenr + offset +
6972                     btrfs_file_extent_num_bytes(leaf, fi) >
6973                     entry->bytenr + entry->bytes) {
6974                         fprintf(stderr, "Ref is past the entry end, please "
6975                                 "take a btrfs-image of this file system and "
6976                                 "send it to a btrfs developer, ref %Lu\n",
6977                                 dback->disk_bytenr);
6978                         ret = -EINVAL;
6979                         goto out;
6980                 }
6981                 offset += off_diff;
6982                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6983                 btrfs_set_file_extent_offset(leaf, fi, offset);
6984         } else if (dback->disk_bytenr < entry->bytenr) {
6985                 u64 offset;
6986
6987                 offset = btrfs_file_extent_offset(leaf, fi);
6988                 if (dback->disk_bytenr + offset < entry->bytenr) {
6989                         fprintf(stderr, "Ref is before the entry start, please"
6990                                 " take a btrfs-image of this file system and "
6991                                 "send it to a btrfs developer, ref %Lu\n",
6992                                 dback->disk_bytenr);
6993                         ret = -EINVAL;
6994                         goto out;
6995                 }
6996
6997                 offset += dback->disk_bytenr;
6998                 offset -= entry->bytenr;
6999                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7000                 btrfs_set_file_extent_offset(leaf, fi, offset);
7001         }
7002
7003         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7004
7005         /*
7006          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7007          * only do this if we aren't using compression, otherwise it's a
7008          * trickier case.
7009          */
7010         if (!btrfs_file_extent_compression(leaf, fi))
7011                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7012         else
7013                 printf("ram bytes may be wrong?\n");
7014         btrfs_mark_buffer_dirty(leaf);
7015 out:
7016         err = btrfs_commit_transaction(trans, root);
7017         btrfs_release_path(path);
7018         return ret ? ret : err;
7019 }
7020
7021 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7022                            struct extent_record *rec)
7023 {
7024         struct extent_backref *back;
7025         struct data_backref *dback;
7026         struct extent_entry *entry, *best = NULL;
7027         LIST_HEAD(entries);
7028         int nr_entries = 0;
7029         int broken_entries = 0;
7030         int ret = 0;
7031         short mismatch = 0;
7032
7033         /*
7034          * Metadata is easy and the backrefs should always agree on bytenr and
7035          * size, if not we've got bigger issues.
7036          */
7037         if (rec->metadata)
7038                 return 0;
7039
7040         list_for_each_entry(back, &rec->backrefs, list) {
7041                 if (back->full_backref || !back->is_data)
7042                         continue;
7043
7044                 dback = to_data_backref(back);
7045
7046                 /*
7047                  * We only pay attention to backrefs that we found a real
7048                  * backref for.
7049                  */
7050                 if (dback->found_ref == 0)
7051                         continue;
7052
7053                 /*
7054                  * For now we only catch when the bytes don't match, not the
7055                  * bytenr.  We can easily do this at the same time, but I want
7056                  * to have a fs image to test on before we just add repair
7057                  * functionality willy-nilly so we know we won't screw up the
7058                  * repair.
7059                  */
7060
7061                 entry = find_entry(&entries, dback->disk_bytenr,
7062                                    dback->bytes);
7063                 if (!entry) {
7064                         entry = malloc(sizeof(struct extent_entry));
7065                         if (!entry) {
7066                                 ret = -ENOMEM;
7067                                 goto out;
7068                         }
7069                         memset(entry, 0, sizeof(*entry));
7070                         entry->bytenr = dback->disk_bytenr;
7071                         entry->bytes = dback->bytes;
7072                         list_add_tail(&entry->list, &entries);
7073                         nr_entries++;
7074                 }
7075
7076                 /*
7077                  * If we only have on entry we may think the entries agree when
7078                  * in reality they don't so we have to do some extra checking.
7079                  */
7080                 if (dback->disk_bytenr != rec->start ||
7081                     dback->bytes != rec->nr || back->broken)
7082                         mismatch = 1;
7083
7084                 if (back->broken) {
7085                         entry->broken++;
7086                         broken_entries++;
7087                 }
7088
7089                 entry->count++;
7090         }
7091
7092         /* Yay all the backrefs agree, carry on good sir */
7093         if (nr_entries <= 1 && !mismatch)
7094                 goto out;
7095
7096         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7097                 "%Lu\n", rec->start);
7098
7099         /*
7100          * First we want to see if the backrefs can agree amongst themselves who
7101          * is right, so figure out which one of the entries has the highest
7102          * count.
7103          */
7104         best = find_most_right_entry(&entries);
7105
7106         /*
7107          * Ok so we may have an even split between what the backrefs think, so
7108          * this is where we use the extent ref to see what it thinks.
7109          */
7110         if (!best) {
7111                 entry = find_entry(&entries, rec->start, rec->nr);
7112                 if (!entry && (!broken_entries || !rec->found_rec)) {
7113                         fprintf(stderr, "Backrefs don't agree with each other "
7114                                 "and extent record doesn't agree with anybody,"
7115                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7116                                 rec->start, rec->nr);
7117                         ret = -EINVAL;
7118                         goto out;
7119                 } else if (!entry) {
7120                         /*
7121                          * Ok our backrefs were broken, we'll assume this is the
7122                          * correct value and add an entry for this range.
7123                          */
7124                         entry = malloc(sizeof(struct extent_entry));
7125                         if (!entry) {
7126                                 ret = -ENOMEM;
7127                                 goto out;
7128                         }
7129                         memset(entry, 0, sizeof(*entry));
7130                         entry->bytenr = rec->start;
7131                         entry->bytes = rec->nr;
7132                         list_add_tail(&entry->list, &entries);
7133                         nr_entries++;
7134                 }
7135                 entry->count++;
7136                 best = find_most_right_entry(&entries);
7137                 if (!best) {
7138                         fprintf(stderr, "Backrefs and extent record evenly "
7139                                 "split on who is right, this is going to "
7140                                 "require user input to fix bytenr %Lu bytes "
7141                                 "%Lu\n", rec->start, rec->nr);
7142                         ret = -EINVAL;
7143                         goto out;
7144                 }
7145         }
7146
7147         /*
7148          * I don't think this can happen currently as we'll abort() if we catch
7149          * this case higher up, but in case somebody removes that we still can't
7150          * deal with it properly here yet, so just bail out of that's the case.
7151          */
7152         if (best->bytenr != rec->start) {
7153                 fprintf(stderr, "Extent start and backref starts don't match, "
7154                         "please use btrfs-image on this file system and send "
7155                         "it to a btrfs developer so they can make fsck fix "
7156                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7157                         rec->start, rec->nr);
7158                 ret = -EINVAL;
7159                 goto out;
7160         }
7161
7162         /*
7163          * Ok great we all agreed on an extent record, let's go find the real
7164          * references and fix up the ones that don't match.
7165          */
7166         list_for_each_entry(back, &rec->backrefs, list) {
7167                 if (back->full_backref || !back->is_data)
7168                         continue;
7169
7170                 dback = to_data_backref(back);
7171
7172                 /*
7173                  * Still ignoring backrefs that don't have a real ref attached
7174                  * to them.
7175                  */
7176                 if (dback->found_ref == 0)
7177                         continue;
7178
7179                 if (dback->bytes == best->bytes &&
7180                     dback->disk_bytenr == best->bytenr)
7181                         continue;
7182
7183                 ret = repair_ref(info, path, dback, best);
7184                 if (ret)
7185                         goto out;
7186         }
7187
7188         /*
7189          * Ok we messed with the actual refs, which means we need to drop our
7190          * entire cache and go back and rescan.  I know this is a huge pain and
7191          * adds a lot of extra work, but it's the only way to be safe.  Once all
7192          * the backrefs agree we may not need to do anything to the extent
7193          * record itself.
7194          */
7195         ret = -EAGAIN;
7196 out:
7197         while (!list_empty(&entries)) {
7198                 entry = list_entry(entries.next, struct extent_entry, list);
7199                 list_del_init(&entry->list);
7200                 free(entry);
7201         }
7202         return ret;
7203 }
7204
7205 static int process_duplicates(struct btrfs_root *root,
7206                               struct cache_tree *extent_cache,
7207                               struct extent_record *rec)
7208 {
7209         struct extent_record *good, *tmp;
7210         struct cache_extent *cache;
7211         int ret;
7212
7213         /*
7214          * If we found a extent record for this extent then return, or if we
7215          * have more than one duplicate we are likely going to need to delete
7216          * something.
7217          */
7218         if (rec->found_rec || rec->num_duplicates > 1)
7219                 return 0;
7220
7221         /* Shouldn't happen but just in case */
7222         BUG_ON(!rec->num_duplicates);
7223
7224         /*
7225          * So this happens if we end up with a backref that doesn't match the
7226          * actual extent entry.  So either the backref is bad or the extent
7227          * entry is bad.  Either way we want to have the extent_record actually
7228          * reflect what we found in the extent_tree, so we need to take the
7229          * duplicate out and use that as the extent_record since the only way we
7230          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7231          */
7232         remove_cache_extent(extent_cache, &rec->cache);
7233
7234         good = to_extent_record(rec->dups.next);
7235         list_del_init(&good->list);
7236         INIT_LIST_HEAD(&good->backrefs);
7237         INIT_LIST_HEAD(&good->dups);
7238         good->cache.start = good->start;
7239         good->cache.size = good->nr;
7240         good->content_checked = 0;
7241         good->owner_ref_checked = 0;
7242         good->num_duplicates = 0;
7243         good->refs = rec->refs;
7244         list_splice_init(&rec->backrefs, &good->backrefs);
7245         while (1) {
7246                 cache = lookup_cache_extent(extent_cache, good->start,
7247                                             good->nr);
7248                 if (!cache)
7249                         break;
7250                 tmp = container_of(cache, struct extent_record, cache);
7251
7252                 /*
7253                  * If we find another overlapping extent and it's found_rec is
7254                  * set then it's a duplicate and we need to try and delete
7255                  * something.
7256                  */
7257                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7258                         if (list_empty(&good->list))
7259                                 list_add_tail(&good->list,
7260                                               &duplicate_extents);
7261                         good->num_duplicates += tmp->num_duplicates + 1;
7262                         list_splice_init(&tmp->dups, &good->dups);
7263                         list_del_init(&tmp->list);
7264                         list_add_tail(&tmp->list, &good->dups);
7265                         remove_cache_extent(extent_cache, &tmp->cache);
7266                         continue;
7267                 }
7268
7269                 /*
7270                  * Ok we have another non extent item backed extent rec, so lets
7271                  * just add it to this extent and carry on like we did above.
7272                  */
7273                 good->refs += tmp->refs;
7274                 list_splice_init(&tmp->backrefs, &good->backrefs);
7275                 remove_cache_extent(extent_cache, &tmp->cache);
7276                 free(tmp);
7277         }
7278         ret = insert_cache_extent(extent_cache, &good->cache);
7279         BUG_ON(ret);
7280         free(rec);
7281         return good->num_duplicates ? 0 : 1;
7282 }
7283
7284 static int delete_duplicate_records(struct btrfs_root *root,
7285                                     struct extent_record *rec)
7286 {
7287         struct btrfs_trans_handle *trans;
7288         LIST_HEAD(delete_list);
7289         struct btrfs_path *path;
7290         struct extent_record *tmp, *good, *n;
7291         int nr_del = 0;
7292         int ret = 0, err;
7293         struct btrfs_key key;
7294
7295         path = btrfs_alloc_path();
7296         if (!path) {
7297                 ret = -ENOMEM;
7298                 goto out;
7299         }
7300
7301         good = rec;
7302         /* Find the record that covers all of the duplicates. */
7303         list_for_each_entry(tmp, &rec->dups, list) {
7304                 if (good->start < tmp->start)
7305                         continue;
7306                 if (good->nr > tmp->nr)
7307                         continue;
7308
7309                 if (tmp->start + tmp->nr < good->start + good->nr) {
7310                         fprintf(stderr, "Ok we have overlapping extents that "
7311                                 "aren't completely covered by each other, this "
7312                                 "is going to require more careful thought.  "
7313                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7314                                 tmp->start, tmp->nr, good->start, good->nr);
7315                         abort();
7316                 }
7317                 good = tmp;
7318         }
7319
7320         if (good != rec)
7321                 list_add_tail(&rec->list, &delete_list);
7322
7323         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7324                 if (tmp == good)
7325                         continue;
7326                 list_move_tail(&tmp->list, &delete_list);
7327         }
7328
7329         root = root->fs_info->extent_root;
7330         trans = btrfs_start_transaction(root, 1);
7331         if (IS_ERR(trans)) {
7332                 ret = PTR_ERR(trans);
7333                 goto out;
7334         }
7335
7336         list_for_each_entry(tmp, &delete_list, list) {
7337                 if (tmp->found_rec == 0)
7338                         continue;
7339                 key.objectid = tmp->start;
7340                 key.type = BTRFS_EXTENT_ITEM_KEY;
7341                 key.offset = tmp->nr;
7342
7343                 /* Shouldn't happen but just in case */
7344                 if (tmp->metadata) {
7345                         fprintf(stderr, "Well this shouldn't happen, extent "
7346                                 "record overlaps but is metadata? "
7347                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7348                         abort();
7349                 }
7350
7351                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7352                 if (ret) {
7353                         if (ret > 0)
7354                                 ret = -EINVAL;
7355                         break;
7356                 }
7357                 ret = btrfs_del_item(trans, root, path);
7358                 if (ret)
7359                         break;
7360                 btrfs_release_path(path);
7361                 nr_del++;
7362         }
7363         err = btrfs_commit_transaction(trans, root);
7364         if (err && !ret)
7365                 ret = err;
7366 out:
7367         while (!list_empty(&delete_list)) {
7368                 tmp = to_extent_record(delete_list.next);
7369                 list_del_init(&tmp->list);
7370                 if (tmp == rec)
7371                         continue;
7372                 free(tmp);
7373         }
7374
7375         while (!list_empty(&rec->dups)) {
7376                 tmp = to_extent_record(rec->dups.next);
7377                 list_del_init(&tmp->list);
7378                 free(tmp);
7379         }
7380
7381         btrfs_free_path(path);
7382
7383         if (!ret && !nr_del)
7384                 rec->num_duplicates = 0;
7385
7386         return ret ? ret : nr_del;
7387 }
7388
7389 static int find_possible_backrefs(struct btrfs_fs_info *info,
7390                                   struct btrfs_path *path,
7391                                   struct cache_tree *extent_cache,
7392                                   struct extent_record *rec)
7393 {
7394         struct btrfs_root *root;
7395         struct extent_backref *back;
7396         struct data_backref *dback;
7397         struct cache_extent *cache;
7398         struct btrfs_file_extent_item *fi;
7399         struct btrfs_key key;
7400         u64 bytenr, bytes;
7401         int ret;
7402
7403         list_for_each_entry(back, &rec->backrefs, list) {
7404                 /* Don't care about full backrefs (poor unloved backrefs) */
7405                 if (back->full_backref || !back->is_data)
7406                         continue;
7407
7408                 dback = to_data_backref(back);
7409
7410                 /* We found this one, we don't need to do a lookup */
7411                 if (dback->found_ref)
7412                         continue;
7413
7414                 key.objectid = dback->root;
7415                 key.type = BTRFS_ROOT_ITEM_KEY;
7416                 key.offset = (u64)-1;
7417
7418                 root = btrfs_read_fs_root(info, &key);
7419
7420                 /* No root, definitely a bad ref, skip */
7421                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7422                         continue;
7423                 /* Other err, exit */
7424                 if (IS_ERR(root))
7425                         return PTR_ERR(root);
7426
7427                 key.objectid = dback->owner;
7428                 key.type = BTRFS_EXTENT_DATA_KEY;
7429                 key.offset = dback->offset;
7430                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7431                 if (ret) {
7432                         btrfs_release_path(path);
7433                         if (ret < 0)
7434                                 return ret;
7435                         /* Didn't find it, we can carry on */
7436                         ret = 0;
7437                         continue;
7438                 }
7439
7440                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7441                                     struct btrfs_file_extent_item);
7442                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7443                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7444                 btrfs_release_path(path);
7445                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7446                 if (cache) {
7447                         struct extent_record *tmp;
7448                         tmp = container_of(cache, struct extent_record, cache);
7449
7450                         /*
7451                          * If we found an extent record for the bytenr for this
7452                          * particular backref then we can't add it to our
7453                          * current extent record.  We only want to add backrefs
7454                          * that don't have a corresponding extent item in the
7455                          * extent tree since they likely belong to this record
7456                          * and we need to fix it if it doesn't match bytenrs.
7457                          */
7458                         if  (tmp->found_rec)
7459                                 continue;
7460                 }
7461
7462                 dback->found_ref += 1;
7463                 dback->disk_bytenr = bytenr;
7464                 dback->bytes = bytes;
7465
7466                 /*
7467                  * Set this so the verify backref code knows not to trust the
7468                  * values in this backref.
7469                  */
7470                 back->broken = 1;
7471         }
7472
7473         return 0;
7474 }
7475
7476 /*
7477  * Record orphan data ref into corresponding root.
7478  *
7479  * Return 0 if the extent item contains data ref and recorded.
7480  * Return 1 if the extent item contains no useful data ref
7481  *   On that case, it may contains only shared_dataref or metadata backref
7482  *   or the file extent exists(this should be handled by the extent bytenr
7483  *   recovery routine)
7484  * Return <0 if something goes wrong.
7485  */
7486 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7487                                       struct extent_record *rec)
7488 {
7489         struct btrfs_key key;
7490         struct btrfs_root *dest_root;
7491         struct extent_backref *back;
7492         struct data_backref *dback;
7493         struct orphan_data_extent *orphan;
7494         struct btrfs_path *path;
7495         int recorded_data_ref = 0;
7496         int ret = 0;
7497
7498         if (rec->metadata)
7499                 return 1;
7500         path = btrfs_alloc_path();
7501         if (!path)
7502                 return -ENOMEM;
7503         list_for_each_entry(back, &rec->backrefs, list) {
7504                 if (back->full_backref || !back->is_data ||
7505                     !back->found_extent_tree)
7506                         continue;
7507                 dback = to_data_backref(back);
7508                 if (dback->found_ref)
7509                         continue;
7510                 key.objectid = dback->root;
7511                 key.type = BTRFS_ROOT_ITEM_KEY;
7512                 key.offset = (u64)-1;
7513
7514                 dest_root = btrfs_read_fs_root(fs_info, &key);
7515
7516                 /* For non-exist root we just skip it */
7517                 if (IS_ERR(dest_root) || !dest_root)
7518                         continue;
7519
7520                 key.objectid = dback->owner;
7521                 key.type = BTRFS_EXTENT_DATA_KEY;
7522                 key.offset = dback->offset;
7523
7524                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7525                 /*
7526                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7527                  * we need to record it for inode/file extent rebuild.
7528                  * For ret > 0, we record it only for file extent rebuild.
7529                  * For ret == 0, the file extent exists but only bytenr
7530                  * mismatch, let the original bytenr fix routine to handle,
7531                  * don't record it.
7532                  */
7533                 if (ret == 0)
7534                         continue;
7535                 ret = 0;
7536                 orphan = malloc(sizeof(*orphan));
7537                 if (!orphan) {
7538                         ret = -ENOMEM;
7539                         goto out;
7540                 }
7541                 INIT_LIST_HEAD(&orphan->list);
7542                 orphan->root = dback->root;
7543                 orphan->objectid = dback->owner;
7544                 orphan->offset = dback->offset;
7545                 orphan->disk_bytenr = rec->cache.start;
7546                 orphan->disk_len = rec->cache.size;
7547                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7548                 recorded_data_ref = 1;
7549         }
7550 out:
7551         btrfs_free_path(path);
7552         if (!ret)
7553                 return !recorded_data_ref;
7554         else
7555                 return ret;
7556 }
7557
7558 /*
7559  * when an incorrect extent item is found, this will delete
7560  * all of the existing entries for it and recreate them
7561  * based on what the tree scan found.
7562  */
7563 static int fixup_extent_refs(struct btrfs_fs_info *info,
7564                              struct cache_tree *extent_cache,
7565                              struct extent_record *rec)
7566 {
7567         struct btrfs_trans_handle *trans = NULL;
7568         int ret;
7569         struct btrfs_path *path;
7570         struct list_head *cur = rec->backrefs.next;
7571         struct cache_extent *cache;
7572         struct extent_backref *back;
7573         int allocated = 0;
7574         u64 flags = 0;
7575
7576         if (rec->flag_block_full_backref)
7577                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7578
7579         path = btrfs_alloc_path();
7580         if (!path)
7581                 return -ENOMEM;
7582
7583         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7584                 /*
7585                  * Sometimes the backrefs themselves are so broken they don't
7586                  * get attached to any meaningful rec, so first go back and
7587                  * check any of our backrefs that we couldn't find and throw
7588                  * them into the list if we find the backref so that
7589                  * verify_backrefs can figure out what to do.
7590                  */
7591                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7592                 if (ret < 0)
7593                         goto out;
7594         }
7595
7596         /* step one, make sure all of the backrefs agree */
7597         ret = verify_backrefs(info, path, rec);
7598         if (ret < 0)
7599                 goto out;
7600
7601         trans = btrfs_start_transaction(info->extent_root, 1);
7602         if (IS_ERR(trans)) {
7603                 ret = PTR_ERR(trans);
7604                 goto out;
7605         }
7606
7607         /* step two, delete all the existing records */
7608         ret = delete_extent_records(trans, info->extent_root, path,
7609                                     rec->start, rec->max_size);
7610
7611         if (ret < 0)
7612                 goto out;
7613
7614         /* was this block corrupt?  If so, don't add references to it */
7615         cache = lookup_cache_extent(info->corrupt_blocks,
7616                                     rec->start, rec->max_size);
7617         if (cache) {
7618                 ret = 0;
7619                 goto out;
7620         }
7621
7622         /* step three, recreate all the refs we did find */
7623         while(cur != &rec->backrefs) {
7624                 back = to_extent_backref(cur);
7625                 cur = cur->next;
7626
7627                 /*
7628                  * if we didn't find any references, don't create a
7629                  * new extent record
7630                  */
7631                 if (!back->found_ref)
7632                         continue;
7633
7634                 rec->bad_full_backref = 0;
7635                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7636                 allocated = 1;
7637
7638                 if (ret)
7639                         goto out;
7640         }
7641 out:
7642         if (trans) {
7643                 int err = btrfs_commit_transaction(trans, info->extent_root);
7644                 if (!ret)
7645                         ret = err;
7646         }
7647
7648         btrfs_free_path(path);
7649         return ret;
7650 }
7651
7652 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7653                               struct extent_record *rec)
7654 {
7655         struct btrfs_trans_handle *trans;
7656         struct btrfs_root *root = fs_info->extent_root;
7657         struct btrfs_path *path;
7658         struct btrfs_extent_item *ei;
7659         struct btrfs_key key;
7660         u64 flags;
7661         int ret = 0;
7662
7663         key.objectid = rec->start;
7664         if (rec->metadata) {
7665                 key.type = BTRFS_METADATA_ITEM_KEY;
7666                 key.offset = rec->info_level;
7667         } else {
7668                 key.type = BTRFS_EXTENT_ITEM_KEY;
7669                 key.offset = rec->max_size;
7670         }
7671
7672         path = btrfs_alloc_path();
7673         if (!path)
7674                 return -ENOMEM;
7675
7676         trans = btrfs_start_transaction(root, 0);
7677         if (IS_ERR(trans)) {
7678                 btrfs_free_path(path);
7679                 return PTR_ERR(trans);
7680         }
7681
7682         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7683         if (ret < 0) {
7684                 btrfs_free_path(path);
7685                 btrfs_commit_transaction(trans, root);
7686                 return ret;
7687         } else if (ret) {
7688                 fprintf(stderr, "Didn't find extent for %llu\n",
7689                         (unsigned long long)rec->start);
7690                 btrfs_free_path(path);
7691                 btrfs_commit_transaction(trans, root);
7692                 return -ENOENT;
7693         }
7694
7695         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7696                             struct btrfs_extent_item);
7697         flags = btrfs_extent_flags(path->nodes[0], ei);
7698         if (rec->flag_block_full_backref) {
7699                 fprintf(stderr, "setting full backref on %llu\n",
7700                         (unsigned long long)key.objectid);
7701                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7702         } else {
7703                 fprintf(stderr, "clearing full backref on %llu\n",
7704                         (unsigned long long)key.objectid);
7705                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7706         }
7707         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7708         btrfs_mark_buffer_dirty(path->nodes[0]);
7709         btrfs_free_path(path);
7710         return btrfs_commit_transaction(trans, root);
7711 }
7712
7713 /* right now we only prune from the extent allocation tree */
7714 static int prune_one_block(struct btrfs_trans_handle *trans,
7715                            struct btrfs_fs_info *info,
7716                            struct btrfs_corrupt_block *corrupt)
7717 {
7718         int ret;
7719         struct btrfs_path path;
7720         struct extent_buffer *eb;
7721         u64 found;
7722         int slot;
7723         int nritems;
7724         int level = corrupt->level + 1;
7725
7726         btrfs_init_path(&path);
7727 again:
7728         /* we want to stop at the parent to our busted block */
7729         path.lowest_level = level;
7730
7731         ret = btrfs_search_slot(trans, info->extent_root,
7732                                 &corrupt->key, &path, -1, 1);
7733
7734         if (ret < 0)
7735                 goto out;
7736
7737         eb = path.nodes[level];
7738         if (!eb) {
7739                 ret = -ENOENT;
7740                 goto out;
7741         }
7742
7743         /*
7744          * hopefully the search gave us the block we want to prune,
7745          * lets try that first
7746          */
7747         slot = path.slots[level];
7748         found =  btrfs_node_blockptr(eb, slot);
7749         if (found == corrupt->cache.start)
7750                 goto del_ptr;
7751
7752         nritems = btrfs_header_nritems(eb);
7753
7754         /* the search failed, lets scan this node and hope we find it */
7755         for (slot = 0; slot < nritems; slot++) {
7756                 found =  btrfs_node_blockptr(eb, slot);
7757                 if (found == corrupt->cache.start)
7758                         goto del_ptr;
7759         }
7760         /*
7761          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7762          * to this block
7763          */
7764         if (eb == info->extent_root->node) {
7765                 ret = -ENOENT;
7766                 goto out;
7767         } else {
7768                 level++;
7769                 btrfs_release_path(&path);
7770                 goto again;
7771         }
7772
7773 del_ptr:
7774         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7775         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7776
7777 out:
7778         btrfs_release_path(&path);
7779         return ret;
7780 }
7781
7782 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7783 {
7784         struct btrfs_trans_handle *trans = NULL;
7785         struct cache_extent *cache;
7786         struct btrfs_corrupt_block *corrupt;
7787
7788         while (1) {
7789                 cache = search_cache_extent(info->corrupt_blocks, 0);
7790                 if (!cache)
7791                         break;
7792                 if (!trans) {
7793                         trans = btrfs_start_transaction(info->extent_root, 1);
7794                         if (IS_ERR(trans))
7795                                 return PTR_ERR(trans);
7796                 }
7797                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7798                 prune_one_block(trans, info, corrupt);
7799                 remove_cache_extent(info->corrupt_blocks, cache);
7800         }
7801         if (trans)
7802                 return btrfs_commit_transaction(trans, info->extent_root);
7803         return 0;
7804 }
7805
7806 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7807 {
7808         struct btrfs_block_group_cache *cache;
7809         u64 start, end;
7810         int ret;
7811
7812         while (1) {
7813                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7814                                             &start, &end, EXTENT_DIRTY);
7815                 if (ret)
7816                         break;
7817                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7818                                    GFP_NOFS);
7819         }
7820
7821         start = 0;
7822         while (1) {
7823                 cache = btrfs_lookup_first_block_group(fs_info, start);
7824                 if (!cache)
7825                         break;
7826                 if (cache->cached)
7827                         cache->cached = 0;
7828                 start = cache->key.objectid + cache->key.offset;
7829         }
7830 }
7831
7832 static int check_extent_refs(struct btrfs_root *root,
7833                              struct cache_tree *extent_cache)
7834 {
7835         struct extent_record *rec;
7836         struct cache_extent *cache;
7837         int err = 0;
7838         int ret = 0;
7839         int fixed = 0;
7840         int had_dups = 0;
7841         int recorded = 0;
7842
7843         if (repair) {
7844                 /*
7845                  * if we're doing a repair, we have to make sure
7846                  * we don't allocate from the problem extents.
7847                  * In the worst case, this will be all the
7848                  * extents in the FS
7849                  */
7850                 cache = search_cache_extent(extent_cache, 0);
7851                 while(cache) {
7852                         rec = container_of(cache, struct extent_record, cache);
7853                         set_extent_dirty(root->fs_info->excluded_extents,
7854                                          rec->start,
7855                                          rec->start + rec->max_size - 1,
7856                                          GFP_NOFS);
7857                         cache = next_cache_extent(cache);
7858                 }
7859
7860                 /* pin down all the corrupted blocks too */
7861                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7862                 while(cache) {
7863                         set_extent_dirty(root->fs_info->excluded_extents,
7864                                          cache->start,
7865                                          cache->start + cache->size - 1,
7866                                          GFP_NOFS);
7867                         cache = next_cache_extent(cache);
7868                 }
7869                 prune_corrupt_blocks(root->fs_info);
7870                 reset_cached_block_groups(root->fs_info);
7871         }
7872
7873         reset_cached_block_groups(root->fs_info);
7874
7875         /*
7876          * We need to delete any duplicate entries we find first otherwise we
7877          * could mess up the extent tree when we have backrefs that actually
7878          * belong to a different extent item and not the weird duplicate one.
7879          */
7880         while (repair && !list_empty(&duplicate_extents)) {
7881                 rec = to_extent_record(duplicate_extents.next);
7882                 list_del_init(&rec->list);
7883
7884                 /* Sometimes we can find a backref before we find an actual
7885                  * extent, so we need to process it a little bit to see if there
7886                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7887                  * if this is a backref screwup.  If we need to delete stuff
7888                  * process_duplicates() will return 0, otherwise it will return
7889                  * 1 and we
7890                  */
7891                 if (process_duplicates(root, extent_cache, rec))
7892                         continue;
7893                 ret = delete_duplicate_records(root, rec);
7894                 if (ret < 0)
7895                         return ret;
7896                 /*
7897                  * delete_duplicate_records will return the number of entries
7898                  * deleted, so if it's greater than 0 then we know we actually
7899                  * did something and we need to remove.
7900                  */
7901                 if (ret)
7902                         had_dups = 1;
7903         }
7904
7905         if (had_dups)
7906                 return -EAGAIN;
7907
7908         while(1) {
7909                 int cur_err = 0;
7910
7911                 fixed = 0;
7912                 recorded = 0;
7913                 cache = search_cache_extent(extent_cache, 0);
7914                 if (!cache)
7915                         break;
7916                 rec = container_of(cache, struct extent_record, cache);
7917                 if (rec->num_duplicates) {
7918                         fprintf(stderr, "extent item %llu has multiple extent "
7919                                 "items\n", (unsigned long long)rec->start);
7920                         err = 1;
7921                         cur_err = 1;
7922                 }
7923
7924                 if (rec->refs != rec->extent_item_refs) {
7925                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7926                                 (unsigned long long)rec->start,
7927                                 (unsigned long long)rec->nr);
7928                         fprintf(stderr, "extent item %llu, found %llu\n",
7929                                 (unsigned long long)rec->extent_item_refs,
7930                                 (unsigned long long)rec->refs);
7931                         ret = record_orphan_data_extents(root->fs_info, rec);
7932                         if (ret < 0)
7933                                 goto repair_abort;
7934                         if (ret == 0) {
7935                                 recorded = 1;
7936                         } else {
7937                                 /*
7938                                  * we can't use the extent to repair file
7939                                  * extent, let the fallback method handle it.
7940                                  */
7941                                 if (!fixed && repair) {
7942                                         ret = fixup_extent_refs(
7943                                                         root->fs_info,
7944                                                         extent_cache, rec);
7945                                         if (ret)
7946                                                 goto repair_abort;
7947                                         fixed = 1;
7948                                 }
7949                         }
7950                         err = 1;
7951                         cur_err = 1;
7952                 }
7953                 if (all_backpointers_checked(rec, 1)) {
7954                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7955                                 (unsigned long long)rec->start,
7956                                 (unsigned long long)rec->nr);
7957
7958                         if (!fixed && !recorded && repair) {
7959                                 ret = fixup_extent_refs(root->fs_info,
7960                                                         extent_cache, rec);
7961                                 if (ret)
7962                                         goto repair_abort;
7963                                 fixed = 1;
7964                         }
7965                         cur_err = 1;
7966                         err = 1;
7967                 }
7968                 if (!rec->owner_ref_checked) {
7969                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7970                                 (unsigned long long)rec->start,
7971                                 (unsigned long long)rec->nr);
7972                         if (!fixed && !recorded && repair) {
7973                                 ret = fixup_extent_refs(root->fs_info,
7974                                                         extent_cache, rec);
7975                                 if (ret)
7976                                         goto repair_abort;
7977                                 fixed = 1;
7978                         }
7979                         err = 1;
7980                         cur_err = 1;
7981                 }
7982                 if (rec->bad_full_backref) {
7983                         fprintf(stderr, "bad full backref, on [%llu]\n",
7984                                 (unsigned long long)rec->start);
7985                         if (repair) {
7986                                 ret = fixup_extent_flags(root->fs_info, rec);
7987                                 if (ret)
7988                                         goto repair_abort;
7989                                 fixed = 1;
7990                         }
7991                         err = 1;
7992                         cur_err = 1;
7993                 }
7994                 /*
7995                  * Although it's not a extent ref's problem, we reuse this
7996                  * routine for error reporting.
7997                  * No repair function yet.
7998                  */
7999                 if (rec->crossing_stripes) {
8000                         fprintf(stderr,
8001                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8002                                 rec->start, rec->start + rec->max_size);
8003                         err = 1;
8004                         cur_err = 1;
8005                 }
8006
8007                 if (rec->wrong_chunk_type) {
8008                         fprintf(stderr,
8009                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8010                                 rec->start, rec->start + rec->max_size);
8011                         err = 1;
8012                         cur_err = 1;
8013                 }
8014
8015                 remove_cache_extent(extent_cache, cache);
8016                 free_all_extent_backrefs(rec);
8017                 if (!init_extent_tree && repair && (!cur_err || fixed))
8018                         clear_extent_dirty(root->fs_info->excluded_extents,
8019                                            rec->start,
8020                                            rec->start + rec->max_size - 1,
8021                                            GFP_NOFS);
8022                 free(rec);
8023         }
8024 repair_abort:
8025         if (repair) {
8026                 if (ret && ret != -EAGAIN) {
8027                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8028                         exit(1);
8029                 } else if (!ret) {
8030                         struct btrfs_trans_handle *trans;
8031
8032                         root = root->fs_info->extent_root;
8033                         trans = btrfs_start_transaction(root, 1);
8034                         if (IS_ERR(trans)) {
8035                                 ret = PTR_ERR(trans);
8036                                 goto repair_abort;
8037                         }
8038
8039                         btrfs_fix_block_accounting(trans, root);
8040                         ret = btrfs_commit_transaction(trans, root);
8041                         if (ret)
8042                                 goto repair_abort;
8043                 }
8044                 if (err)
8045                         fprintf(stderr, "repaired damaged extent references\n");
8046                 return ret;
8047         }
8048         return err;
8049 }
8050
8051 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8052 {
8053         u64 stripe_size;
8054
8055         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8056                 stripe_size = length;
8057                 stripe_size /= num_stripes;
8058         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8059                 stripe_size = length * 2;
8060                 stripe_size /= num_stripes;
8061         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8062                 stripe_size = length;
8063                 stripe_size /= (num_stripes - 1);
8064         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8065                 stripe_size = length;
8066                 stripe_size /= (num_stripes - 2);
8067         } else {
8068                 stripe_size = length;
8069         }
8070         return stripe_size;
8071 }
8072
8073 /*
8074  * Check the chunk with its block group/dev list ref:
8075  * Return 0 if all refs seems valid.
8076  * Return 1 if part of refs seems valid, need later check for rebuild ref
8077  * like missing block group and needs to search extent tree to rebuild them.
8078  * Return -1 if essential refs are missing and unable to rebuild.
8079  */
8080 static int check_chunk_refs(struct chunk_record *chunk_rec,
8081                             struct block_group_tree *block_group_cache,
8082                             struct device_extent_tree *dev_extent_cache,
8083                             int silent)
8084 {
8085         struct cache_extent *block_group_item;
8086         struct block_group_record *block_group_rec;
8087         struct cache_extent *dev_extent_item;
8088         struct device_extent_record *dev_extent_rec;
8089         u64 devid;
8090         u64 offset;
8091         u64 length;
8092         int metadump_v2 = 0;
8093         int i;
8094         int ret = 0;
8095
8096         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8097                                                chunk_rec->offset,
8098                                                chunk_rec->length);
8099         if (block_group_item) {
8100                 block_group_rec = container_of(block_group_item,
8101                                                struct block_group_record,
8102                                                cache);
8103                 if (chunk_rec->length != block_group_rec->offset ||
8104                     chunk_rec->offset != block_group_rec->objectid ||
8105                     (!metadump_v2 &&
8106                      chunk_rec->type_flags != block_group_rec->flags)) {
8107                         if (!silent)
8108                                 fprintf(stderr,
8109                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8110                                         chunk_rec->objectid,
8111                                         chunk_rec->type,
8112                                         chunk_rec->offset,
8113                                         chunk_rec->length,
8114                                         chunk_rec->offset,
8115                                         chunk_rec->type_flags,
8116                                         block_group_rec->objectid,
8117                                         block_group_rec->type,
8118                                         block_group_rec->offset,
8119                                         block_group_rec->offset,
8120                                         block_group_rec->objectid,
8121                                         block_group_rec->flags);
8122                         ret = -1;
8123                 } else {
8124                         list_del_init(&block_group_rec->list);
8125                         chunk_rec->bg_rec = block_group_rec;
8126                 }
8127         } else {
8128                 if (!silent)
8129                         fprintf(stderr,
8130                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8131                                 chunk_rec->objectid,
8132                                 chunk_rec->type,
8133                                 chunk_rec->offset,
8134                                 chunk_rec->length,
8135                                 chunk_rec->offset,
8136                                 chunk_rec->type_flags);
8137                 ret = 1;
8138         }
8139
8140         if (metadump_v2)
8141                 return ret;
8142
8143         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8144                                     chunk_rec->num_stripes);
8145         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8146                 devid = chunk_rec->stripes[i].devid;
8147                 offset = chunk_rec->stripes[i].offset;
8148                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8149                                                        devid, offset, length);
8150                 if (dev_extent_item) {
8151                         dev_extent_rec = container_of(dev_extent_item,
8152                                                 struct device_extent_record,
8153                                                 cache);
8154                         if (dev_extent_rec->objectid != devid ||
8155                             dev_extent_rec->offset != offset ||
8156                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8157                             dev_extent_rec->length != length) {
8158                                 if (!silent)
8159                                         fprintf(stderr,
8160                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8161                                                 chunk_rec->objectid,
8162                                                 chunk_rec->type,
8163                                                 chunk_rec->offset,
8164                                                 chunk_rec->stripes[i].devid,
8165                                                 chunk_rec->stripes[i].offset,
8166                                                 dev_extent_rec->objectid,
8167                                                 dev_extent_rec->offset,
8168                                                 dev_extent_rec->length);
8169                                 ret = -1;
8170                         } else {
8171                                 list_move(&dev_extent_rec->chunk_list,
8172                                           &chunk_rec->dextents);
8173                         }
8174                 } else {
8175                         if (!silent)
8176                                 fprintf(stderr,
8177                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8178                                         chunk_rec->objectid,
8179                                         chunk_rec->type,
8180                                         chunk_rec->offset,
8181                                         chunk_rec->stripes[i].devid,
8182                                         chunk_rec->stripes[i].offset);
8183                         ret = -1;
8184                 }
8185         }
8186         return ret;
8187 }
8188
8189 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8190 int check_chunks(struct cache_tree *chunk_cache,
8191                  struct block_group_tree *block_group_cache,
8192                  struct device_extent_tree *dev_extent_cache,
8193                  struct list_head *good, struct list_head *bad,
8194                  struct list_head *rebuild, int silent)
8195 {
8196         struct cache_extent *chunk_item;
8197         struct chunk_record *chunk_rec;
8198         struct block_group_record *bg_rec;
8199         struct device_extent_record *dext_rec;
8200         int err;
8201         int ret = 0;
8202
8203         chunk_item = first_cache_extent(chunk_cache);
8204         while (chunk_item) {
8205                 chunk_rec = container_of(chunk_item, struct chunk_record,
8206                                          cache);
8207                 err = check_chunk_refs(chunk_rec, block_group_cache,
8208                                        dev_extent_cache, silent);
8209                 if (err < 0)
8210                         ret = err;
8211                 if (err == 0 && good)
8212                         list_add_tail(&chunk_rec->list, good);
8213                 if (err > 0 && rebuild)
8214                         list_add_tail(&chunk_rec->list, rebuild);
8215                 if (err < 0 && bad)
8216                         list_add_tail(&chunk_rec->list, bad);
8217                 chunk_item = next_cache_extent(chunk_item);
8218         }
8219
8220         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8221                 if (!silent)
8222                         fprintf(stderr,
8223                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8224                                 bg_rec->objectid,
8225                                 bg_rec->offset,
8226                                 bg_rec->flags);
8227                 if (!ret)
8228                         ret = 1;
8229         }
8230
8231         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8232                             chunk_list) {
8233                 if (!silent)
8234                         fprintf(stderr,
8235                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8236                                 dext_rec->objectid,
8237                                 dext_rec->offset,
8238                                 dext_rec->length);
8239                 if (!ret)
8240                         ret = 1;
8241         }
8242         return ret;
8243 }
8244
8245
8246 static int check_device_used(struct device_record *dev_rec,
8247                              struct device_extent_tree *dext_cache)
8248 {
8249         struct cache_extent *cache;
8250         struct device_extent_record *dev_extent_rec;
8251         u64 total_byte = 0;
8252
8253         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8254         while (cache) {
8255                 dev_extent_rec = container_of(cache,
8256                                               struct device_extent_record,
8257                                               cache);
8258                 if (dev_extent_rec->objectid != dev_rec->devid)
8259                         break;
8260
8261                 list_del_init(&dev_extent_rec->device_list);
8262                 total_byte += dev_extent_rec->length;
8263                 cache = next_cache_extent(cache);
8264         }
8265
8266         if (total_byte != dev_rec->byte_used) {
8267                 fprintf(stderr,
8268                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8269                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8270                         dev_rec->type, dev_rec->offset);
8271                 return -1;
8272         } else {
8273                 return 0;
8274         }
8275 }
8276
8277 /* check btrfs_dev_item -> btrfs_dev_extent */
8278 static int check_devices(struct rb_root *dev_cache,
8279                          struct device_extent_tree *dev_extent_cache)
8280 {
8281         struct rb_node *dev_node;
8282         struct device_record *dev_rec;
8283         struct device_extent_record *dext_rec;
8284         int err;
8285         int ret = 0;
8286
8287         dev_node = rb_first(dev_cache);
8288         while (dev_node) {
8289                 dev_rec = container_of(dev_node, struct device_record, node);
8290                 err = check_device_used(dev_rec, dev_extent_cache);
8291                 if (err)
8292                         ret = err;
8293
8294                 dev_node = rb_next(dev_node);
8295         }
8296         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8297                             device_list) {
8298                 fprintf(stderr,
8299                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8300                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8301                 if (!ret)
8302                         ret = 1;
8303         }
8304         return ret;
8305 }
8306
8307 static int add_root_item_to_list(struct list_head *head,
8308                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8309                                   u8 level, u8 drop_level,
8310                                   int level_size, struct btrfs_key *drop_key)
8311 {
8312
8313         struct root_item_record *ri_rec;
8314         ri_rec = malloc(sizeof(*ri_rec));
8315         if (!ri_rec)
8316                 return -ENOMEM;
8317         ri_rec->bytenr = bytenr;
8318         ri_rec->objectid = objectid;
8319         ri_rec->level = level;
8320         ri_rec->level_size = level_size;
8321         ri_rec->drop_level = drop_level;
8322         ri_rec->last_snapshot = last_snapshot;
8323         if (drop_key)
8324                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8325         list_add_tail(&ri_rec->list, head);
8326
8327         return 0;
8328 }
8329
8330 static void free_root_item_list(struct list_head *list)
8331 {
8332         struct root_item_record *ri_rec;
8333
8334         while (!list_empty(list)) {
8335                 ri_rec = list_first_entry(list, struct root_item_record,
8336                                           list);
8337                 list_del_init(&ri_rec->list);
8338                 free(ri_rec);
8339         }
8340 }
8341
8342 static int deal_root_from_list(struct list_head *list,
8343                                struct btrfs_root *root,
8344                                struct block_info *bits,
8345                                int bits_nr,
8346                                struct cache_tree *pending,
8347                                struct cache_tree *seen,
8348                                struct cache_tree *reada,
8349                                struct cache_tree *nodes,
8350                                struct cache_tree *extent_cache,
8351                                struct cache_tree *chunk_cache,
8352                                struct rb_root *dev_cache,
8353                                struct block_group_tree *block_group_cache,
8354                                struct device_extent_tree *dev_extent_cache)
8355 {
8356         int ret = 0;
8357         u64 last;
8358
8359         while (!list_empty(list)) {
8360                 struct root_item_record *rec;
8361                 struct extent_buffer *buf;
8362                 rec = list_entry(list->next,
8363                                  struct root_item_record, list);
8364                 last = 0;
8365                 buf = read_tree_block(root->fs_info->tree_root,
8366                                       rec->bytenr, rec->level_size, 0);
8367                 if (!extent_buffer_uptodate(buf)) {
8368                         free_extent_buffer(buf);
8369                         ret = -EIO;
8370                         break;
8371                 }
8372                 ret = add_root_to_pending(buf, extent_cache, pending,
8373                                     seen, nodes, rec->objectid);
8374                 if (ret < 0)
8375                         break;
8376                 /*
8377                  * To rebuild extent tree, we need deal with snapshot
8378                  * one by one, otherwise we deal with node firstly which
8379                  * can maximize readahead.
8380                  */
8381                 while (1) {
8382                         ret = run_next_block(root, bits, bits_nr, &last,
8383                                              pending, seen, reada, nodes,
8384                                              extent_cache, chunk_cache,
8385                                              dev_cache, block_group_cache,
8386                                              dev_extent_cache, rec);
8387                         if (ret != 0)
8388                                 break;
8389                 }
8390                 free_extent_buffer(buf);
8391                 list_del(&rec->list);
8392                 free(rec);
8393                 if (ret < 0)
8394                         break;
8395         }
8396         while (ret >= 0) {
8397                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8398                                      reada, nodes, extent_cache, chunk_cache,
8399                                      dev_cache, block_group_cache,
8400                                      dev_extent_cache, NULL);
8401                 if (ret != 0) {
8402                         if (ret > 0)
8403                                 ret = 0;
8404                         break;
8405                 }
8406         }
8407         return ret;
8408 }
8409
8410 static int check_chunks_and_extents(struct btrfs_root *root)
8411 {
8412         struct rb_root dev_cache;
8413         struct cache_tree chunk_cache;
8414         struct block_group_tree block_group_cache;
8415         struct device_extent_tree dev_extent_cache;
8416         struct cache_tree extent_cache;
8417         struct cache_tree seen;
8418         struct cache_tree pending;
8419         struct cache_tree reada;
8420         struct cache_tree nodes;
8421         struct extent_io_tree excluded_extents;
8422         struct cache_tree corrupt_blocks;
8423         struct btrfs_path path;
8424         struct btrfs_key key;
8425         struct btrfs_key found_key;
8426         int ret, err = 0;
8427         struct block_info *bits;
8428         int bits_nr;
8429         struct extent_buffer *leaf;
8430         int slot;
8431         struct btrfs_root_item ri;
8432         struct list_head dropping_trees;
8433         struct list_head normal_trees;
8434         struct btrfs_root *root1;
8435         u64 objectid;
8436         u32 level_size;
8437         u8 level;
8438
8439         dev_cache = RB_ROOT;
8440         cache_tree_init(&chunk_cache);
8441         block_group_tree_init(&block_group_cache);
8442         device_extent_tree_init(&dev_extent_cache);
8443
8444         cache_tree_init(&extent_cache);
8445         cache_tree_init(&seen);
8446         cache_tree_init(&pending);
8447         cache_tree_init(&nodes);
8448         cache_tree_init(&reada);
8449         cache_tree_init(&corrupt_blocks);
8450         extent_io_tree_init(&excluded_extents);
8451         INIT_LIST_HEAD(&dropping_trees);
8452         INIT_LIST_HEAD(&normal_trees);
8453
8454         if (repair) {
8455                 root->fs_info->excluded_extents = &excluded_extents;
8456                 root->fs_info->fsck_extent_cache = &extent_cache;
8457                 root->fs_info->free_extent_hook = free_extent_hook;
8458                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8459         }
8460
8461         bits_nr = 1024;
8462         bits = malloc(bits_nr * sizeof(struct block_info));
8463         if (!bits) {
8464                 perror("malloc");
8465                 exit(1);
8466         }
8467
8468         if (ctx.progress_enabled) {
8469                 ctx.tp = TASK_EXTENTS;
8470                 task_start(ctx.info);
8471         }
8472
8473 again:
8474         root1 = root->fs_info->tree_root;
8475         level = btrfs_header_level(root1->node);
8476         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8477                                     root1->node->start, 0, level, 0,
8478                                     root1->nodesize, NULL);
8479         if (ret < 0)
8480                 goto out;
8481         root1 = root->fs_info->chunk_root;
8482         level = btrfs_header_level(root1->node);
8483         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8484                                     root1->node->start, 0, level, 0,
8485                                     root1->nodesize, NULL);
8486         if (ret < 0)
8487                 goto out;
8488         btrfs_init_path(&path);
8489         key.offset = 0;
8490         key.objectid = 0;
8491         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8492         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8493                                         &key, &path, 0, 0);
8494         if (ret < 0)
8495                 goto out;
8496         while(1) {
8497                 leaf = path.nodes[0];
8498                 slot = path.slots[0];
8499                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8500                         ret = btrfs_next_leaf(root, &path);
8501                         if (ret != 0)
8502                                 break;
8503                         leaf = path.nodes[0];
8504                         slot = path.slots[0];
8505                 }
8506                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8507                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8508                         unsigned long offset;
8509                         u64 last_snapshot;
8510
8511                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8512                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8513                         last_snapshot = btrfs_root_last_snapshot(&ri);
8514                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8515                                 level = btrfs_root_level(&ri);
8516                                 level_size = root->nodesize;
8517                                 ret = add_root_item_to_list(&normal_trees,
8518                                                 found_key.objectid,
8519                                                 btrfs_root_bytenr(&ri),
8520                                                 last_snapshot, level,
8521                                                 0, level_size, NULL);
8522                                 if (ret < 0)
8523                                         goto out;
8524                         } else {
8525                                 level = btrfs_root_level(&ri);
8526                                 level_size = root->nodesize;
8527                                 objectid = found_key.objectid;
8528                                 btrfs_disk_key_to_cpu(&found_key,
8529                                                       &ri.drop_progress);
8530                                 ret = add_root_item_to_list(&dropping_trees,
8531                                                 objectid,
8532                                                 btrfs_root_bytenr(&ri),
8533                                                 last_snapshot, level,
8534                                                 ri.drop_level,
8535                                                 level_size, &found_key);
8536                                 if (ret < 0)
8537                                         goto out;
8538                         }
8539                 }
8540                 path.slots[0]++;
8541         }
8542         btrfs_release_path(&path);
8543
8544         /*
8545          * check_block can return -EAGAIN if it fixes something, please keep
8546          * this in mind when dealing with return values from these functions, if
8547          * we get -EAGAIN we want to fall through and restart the loop.
8548          */
8549         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8550                                   &seen, &reada, &nodes, &extent_cache,
8551                                   &chunk_cache, &dev_cache, &block_group_cache,
8552                                   &dev_extent_cache);
8553         if (ret < 0) {
8554                 if (ret == -EAGAIN)
8555                         goto loop;
8556                 goto out;
8557         }
8558         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8559                                   &pending, &seen, &reada, &nodes,
8560                                   &extent_cache, &chunk_cache, &dev_cache,
8561                                   &block_group_cache, &dev_extent_cache);
8562         if (ret < 0) {
8563                 if (ret == -EAGAIN)
8564                         goto loop;
8565                 goto out;
8566         }
8567
8568         ret = check_chunks(&chunk_cache, &block_group_cache,
8569                            &dev_extent_cache, NULL, NULL, NULL, 0);
8570         if (ret) {
8571                 if (ret == -EAGAIN)
8572                         goto loop;
8573                 err = ret;
8574         }
8575
8576         ret = check_extent_refs(root, &extent_cache);
8577         if (ret < 0) {
8578                 if (ret == -EAGAIN)
8579                         goto loop;
8580                 goto out;
8581         }
8582
8583         ret = check_devices(&dev_cache, &dev_extent_cache);
8584         if (ret && err)
8585                 ret = err;
8586
8587 out:
8588         task_stop(ctx.info);
8589         if (repair) {
8590                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8591                 extent_io_tree_cleanup(&excluded_extents);
8592                 root->fs_info->fsck_extent_cache = NULL;
8593                 root->fs_info->free_extent_hook = NULL;
8594                 root->fs_info->corrupt_blocks = NULL;
8595                 root->fs_info->excluded_extents = NULL;
8596         }
8597         free(bits);
8598         free_chunk_cache_tree(&chunk_cache);
8599         free_device_cache_tree(&dev_cache);
8600         free_block_group_tree(&block_group_cache);
8601         free_device_extent_tree(&dev_extent_cache);
8602         free_extent_cache_tree(&seen);
8603         free_extent_cache_tree(&pending);
8604         free_extent_cache_tree(&reada);
8605         free_extent_cache_tree(&nodes);
8606         return ret;
8607 loop:
8608         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8609         free_extent_cache_tree(&seen);
8610         free_extent_cache_tree(&pending);
8611         free_extent_cache_tree(&reada);
8612         free_extent_cache_tree(&nodes);
8613         free_chunk_cache_tree(&chunk_cache);
8614         free_block_group_tree(&block_group_cache);
8615         free_device_cache_tree(&dev_cache);
8616         free_device_extent_tree(&dev_extent_cache);
8617         free_extent_record_cache(root->fs_info, &extent_cache);
8618         free_root_item_list(&normal_trees);
8619         free_root_item_list(&dropping_trees);
8620         extent_io_tree_cleanup(&excluded_extents);
8621         goto again;
8622 }
8623
8624 /*
8625  * Check backrefs of a tree block given by @bytenr or @eb.
8626  *
8627  * @root:       the root containing the @bytenr or @eb
8628  * @eb:         tree block extent buffer, can be NULL
8629  * @bytenr:     bytenr of the tree block to search
8630  * @level:      tree level of the tree block
8631  * @owner:      owner of the tree block
8632  *
8633  * Return >0 for any error found and output error message
8634  * Return 0 for no error found
8635  */
8636 static int check_tree_block_ref(struct btrfs_root *root,
8637                                 struct extent_buffer *eb, u64 bytenr,
8638                                 int level, u64 owner)
8639 {
8640         struct btrfs_key key;
8641         struct btrfs_root *extent_root = root->fs_info->extent_root;
8642         struct btrfs_path path;
8643         struct btrfs_extent_item *ei;
8644         struct btrfs_extent_inline_ref *iref;
8645         struct extent_buffer *leaf;
8646         unsigned long end;
8647         unsigned long ptr;
8648         int slot;
8649         int skinny_level;
8650         int type;
8651         u32 nodesize = root->nodesize;
8652         u32 item_size;
8653         u64 offset;
8654         int found_ref = 0;
8655         int err = 0;
8656         int ret;
8657
8658         btrfs_init_path(&path);
8659         key.objectid = bytenr;
8660         if (btrfs_fs_incompat(root->fs_info,
8661                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8662                 key.type = BTRFS_METADATA_ITEM_KEY;
8663         else
8664                 key.type = BTRFS_EXTENT_ITEM_KEY;
8665         key.offset = (u64)-1;
8666
8667         /* Search for the backref in extent tree */
8668         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8669         if (ret < 0) {
8670                 err |= BACKREF_MISSING;
8671                 goto out;
8672         }
8673         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8674         if (ret) {
8675                 err |= BACKREF_MISSING;
8676                 goto out;
8677         }
8678
8679         leaf = path.nodes[0];
8680         slot = path.slots[0];
8681         btrfs_item_key_to_cpu(leaf, &key, slot);
8682
8683         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8684
8685         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8686                 skinny_level = (int)key.offset;
8687                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8688         } else {
8689                 struct btrfs_tree_block_info *info;
8690
8691                 info = (struct btrfs_tree_block_info *)(ei + 1);
8692                 skinny_level = btrfs_tree_block_level(leaf, info);
8693                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8694         }
8695
8696         if (eb) {
8697                 u64 header_gen;
8698                 u64 extent_gen;
8699
8700                 if (!(btrfs_extent_flags(leaf, ei) &
8701                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8702                         error(
8703                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8704                                 key.objectid, nodesize,
8705                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8706                         err = BACKREF_MISMATCH;
8707                 }
8708                 header_gen = btrfs_header_generation(eb);
8709                 extent_gen = btrfs_extent_generation(leaf, ei);
8710                 if (header_gen != extent_gen) {
8711                         error(
8712         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8713                                 key.objectid, nodesize, header_gen,
8714                                 extent_gen);
8715                         err = BACKREF_MISMATCH;
8716                 }
8717                 if (level != skinny_level) {
8718                         error(
8719                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8720                                 key.objectid, nodesize, level, skinny_level);
8721                         err = BACKREF_MISMATCH;
8722                 }
8723                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8724                         error(
8725                         "extent[%llu %u] is referred by other roots than %llu",
8726                                 key.objectid, nodesize, root->objectid);
8727                         err = BACKREF_MISMATCH;
8728                 }
8729         }
8730
8731         /*
8732          * Iterate the extent/metadata item to find the exact backref
8733          */
8734         item_size = btrfs_item_size_nr(leaf, slot);
8735         ptr = (unsigned long)iref;
8736         end = (unsigned long)ei + item_size;
8737         while (ptr < end) {
8738                 iref = (struct btrfs_extent_inline_ref *)ptr;
8739                 type = btrfs_extent_inline_ref_type(leaf, iref);
8740                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8741
8742                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8743                         (offset == root->objectid || offset == owner)) {
8744                         found_ref = 1;
8745                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8746                         /* Check if the backref points to valid referencer */
8747                         found_ref = !check_tree_block_ref(root, NULL, offset,
8748                                                           level + 1, owner);
8749                 }
8750
8751                 if (found_ref)
8752                         break;
8753                 ptr += btrfs_extent_inline_ref_size(type);
8754         }
8755
8756         /*
8757          * Inlined extent item doesn't have what we need, check
8758          * TREE_BLOCK_REF_KEY
8759          */
8760         if (!found_ref) {
8761                 btrfs_release_path(&path);
8762                 key.objectid = bytenr;
8763                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8764                 key.offset = root->objectid;
8765
8766                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8767                 if (!ret)
8768                         found_ref = 1;
8769         }
8770         if (!found_ref)
8771                 err |= BACKREF_MISSING;
8772 out:
8773         btrfs_release_path(&path);
8774         if (eb && (err & BACKREF_MISSING))
8775                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8776                         bytenr, nodesize, owner, level);
8777         return err;
8778 }
8779
8780 /*
8781  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8782  *
8783  * Return >0 any error found and output error message
8784  * Return 0 for no error found
8785  */
8786 static int check_extent_data_item(struct btrfs_root *root,
8787                                   struct extent_buffer *eb, int slot)
8788 {
8789         struct btrfs_file_extent_item *fi;
8790         struct btrfs_path path;
8791         struct btrfs_root *extent_root = root->fs_info->extent_root;
8792         struct btrfs_key fi_key;
8793         struct btrfs_key dbref_key;
8794         struct extent_buffer *leaf;
8795         struct btrfs_extent_item *ei;
8796         struct btrfs_extent_inline_ref *iref;
8797         struct btrfs_extent_data_ref *dref;
8798         u64 owner;
8799         u64 file_extent_gen;
8800         u64 disk_bytenr;
8801         u64 disk_num_bytes;
8802         u64 extent_num_bytes;
8803         u64 extent_flags;
8804         u64 extent_gen;
8805         u32 item_size;
8806         unsigned long end;
8807         unsigned long ptr;
8808         int type;
8809         u64 ref_root;
8810         int found_dbackref = 0;
8811         int err = 0;
8812         int ret;
8813
8814         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8815         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8816         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8817
8818         /* Nothing to check for hole and inline data extents */
8819         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8820             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8821                 return 0;
8822
8823         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8824         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8825         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8826
8827         /* Check unaligned disk_num_bytes and num_bytes */
8828         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8829                 error(
8830 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8831                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8832                         root->sectorsize);
8833                 err |= BYTES_UNALIGNED;
8834         } else {
8835                 data_bytes_allocated += disk_num_bytes;
8836         }
8837         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8838                 error(
8839 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8840                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8841                         root->sectorsize);
8842                 err |= BYTES_UNALIGNED;
8843         } else {
8844                 data_bytes_referenced += extent_num_bytes;
8845         }
8846         owner = btrfs_header_owner(eb);
8847
8848         /* Check the extent item of the file extent in extent tree */
8849         btrfs_init_path(&path);
8850         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8851         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8852         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8853
8854         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8855         if (ret) {
8856                 err |= BACKREF_MISSING;
8857                 goto error;
8858         }
8859
8860         leaf = path.nodes[0];
8861         slot = path.slots[0];
8862         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8863
8864         extent_flags = btrfs_extent_flags(leaf, ei);
8865         extent_gen = btrfs_extent_generation(leaf, ei);
8866
8867         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8868                 error(
8869                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8870                     disk_bytenr, disk_num_bytes,
8871                     BTRFS_EXTENT_FLAG_DATA);
8872                 err |= BACKREF_MISMATCH;
8873         }
8874
8875         if (file_extent_gen < extent_gen) {
8876                 error(
8877 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8878                         disk_bytenr, disk_num_bytes, file_extent_gen,
8879                         extent_gen);
8880                 err |= BACKREF_MISMATCH;
8881         }
8882
8883         /* Check data backref inside that extent item */
8884         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8885         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8886         ptr = (unsigned long)iref;
8887         end = (unsigned long)ei + item_size;
8888         while (ptr < end) {
8889                 iref = (struct btrfs_extent_inline_ref *)ptr;
8890                 type = btrfs_extent_inline_ref_type(leaf, iref);
8891                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8892
8893                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8894                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8895                         if (ref_root == owner || ref_root == root->objectid)
8896                                 found_dbackref = 1;
8897                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8898                         found_dbackref = !check_tree_block_ref(root, NULL,
8899                                 btrfs_extent_inline_ref_offset(leaf, iref),
8900                                 0, owner);
8901                 }
8902
8903                 if (found_dbackref)
8904                         break;
8905                 ptr += btrfs_extent_inline_ref_size(type);
8906         }
8907
8908         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8909         if (!found_dbackref) {
8910                 btrfs_release_path(&path);
8911
8912                 btrfs_init_path(&path);
8913                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8914                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8915                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8916                                 fi_key.objectid, fi_key.offset);
8917
8918                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8919                                         &dbref_key, &path, 0, 0);
8920                 if (!ret)
8921                         found_dbackref = 1;
8922         }
8923
8924         if (!found_dbackref)
8925                 err |= BACKREF_MISSING;
8926 error:
8927         btrfs_release_path(&path);
8928         if (err & BACKREF_MISSING) {
8929                 error("data extent[%llu %llu] backref lost",
8930                       disk_bytenr, disk_num_bytes);
8931         }
8932         return err;
8933 }
8934
8935 /*
8936  * Get real tree block level for the case like shared block
8937  * Return >= 0 as tree level
8938  * Return <0 for error
8939  */
8940 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8941 {
8942         struct extent_buffer *eb;
8943         struct btrfs_path path;
8944         struct btrfs_key key;
8945         struct btrfs_extent_item *ei;
8946         u64 flags;
8947         u64 transid;
8948         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8949         u8 backref_level;
8950         u8 header_level;
8951         int ret;
8952
8953         /* Search extent tree for extent generation and level */
8954         key.objectid = bytenr;
8955         key.type = BTRFS_METADATA_ITEM_KEY;
8956         key.offset = (u64)-1;
8957
8958         btrfs_init_path(&path);
8959         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8960         if (ret < 0)
8961                 goto release_out;
8962         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8963         if (ret < 0)
8964                 goto release_out;
8965         if (ret > 0) {
8966                 ret = -ENOENT;
8967                 goto release_out;
8968         }
8969
8970         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8971         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8972                             struct btrfs_extent_item);
8973         flags = btrfs_extent_flags(path.nodes[0], ei);
8974         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8975                 ret = -ENOENT;
8976                 goto release_out;
8977         }
8978
8979         /* Get transid for later read_tree_block() check */
8980         transid = btrfs_extent_generation(path.nodes[0], ei);
8981
8982         /* Get backref level as one source */
8983         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8984                 backref_level = key.offset;
8985         } else {
8986                 struct btrfs_tree_block_info *info;
8987
8988                 info = (struct btrfs_tree_block_info *)(ei + 1);
8989                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8990         }
8991         btrfs_release_path(&path);
8992
8993         /* Get level from tree block as an alternative source */
8994         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8995         if (!extent_buffer_uptodate(eb)) {
8996                 free_extent_buffer(eb);
8997                 return -EIO;
8998         }
8999         header_level = btrfs_header_level(eb);
9000         free_extent_buffer(eb);
9001
9002         if (header_level != backref_level)
9003                 return -EIO;
9004         return header_level;
9005
9006 release_out:
9007         btrfs_release_path(&path);
9008         return ret;
9009 }
9010
9011 /*
9012  * Check if a tree block backref is valid (points to a valid tree block)
9013  * if level == -1, level will be resolved
9014  * Return >0 for any error found and print error message
9015  */
9016 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9017                                     u64 bytenr, int level)
9018 {
9019         struct btrfs_root *root;
9020         struct btrfs_key key;
9021         struct btrfs_path path;
9022         struct extent_buffer *eb;
9023         struct extent_buffer *node;
9024         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9025         int err = 0;
9026         int ret;
9027
9028         /* Query level for level == -1 special case */
9029         if (level == -1)
9030                 level = query_tree_block_level(fs_info, bytenr);
9031         if (level < 0) {
9032                 err |= REFERENCER_MISSING;
9033                 goto out;
9034         }
9035
9036         key.objectid = root_id;
9037         key.type = BTRFS_ROOT_ITEM_KEY;
9038         key.offset = (u64)-1;
9039
9040         root = btrfs_read_fs_root(fs_info, &key);
9041         if (IS_ERR(root)) {
9042                 err |= REFERENCER_MISSING;
9043                 goto out;
9044         }
9045
9046         /* Read out the tree block to get item/node key */
9047         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9048         if (!extent_buffer_uptodate(eb)) {
9049                 err |= REFERENCER_MISSING;
9050                 free_extent_buffer(eb);
9051                 goto out;
9052         }
9053
9054         /* Empty tree, no need to check key */
9055         if (!btrfs_header_nritems(eb) && !level) {
9056                 free_extent_buffer(eb);
9057                 goto out;
9058         }
9059
9060         if (level)
9061                 btrfs_node_key_to_cpu(eb, &key, 0);
9062         else
9063                 btrfs_item_key_to_cpu(eb, &key, 0);
9064
9065         free_extent_buffer(eb);
9066
9067         btrfs_init_path(&path);
9068         path.lowest_level = level;
9069         /* Search with the first key, to ensure we can reach it */
9070         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9071         if (ret < 0) {
9072                 err |= REFERENCER_MISSING;
9073                 goto release_out;
9074         }
9075
9076         node = path.nodes[level];
9077         if (btrfs_header_bytenr(node) != bytenr) {
9078                 error(
9079         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9080                         bytenr, nodesize, bytenr,
9081                         btrfs_header_bytenr(node));
9082                 err |= REFERENCER_MISMATCH;
9083         }
9084         if (btrfs_header_level(node) != level) {
9085                 error(
9086         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9087                         bytenr, nodesize, level,
9088                         btrfs_header_level(node));
9089                 err |= REFERENCER_MISMATCH;
9090         }
9091
9092 release_out:
9093         btrfs_release_path(&path);
9094 out:
9095         if (err & REFERENCER_MISSING) {
9096                 if (level < 0)
9097                         error("extent [%llu %d] lost referencer (owner: %llu)",
9098                                 bytenr, nodesize, root_id);
9099                 else
9100                         error(
9101                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9102                                 bytenr, nodesize, root_id, level);
9103         }
9104
9105         return err;
9106 }
9107
9108 /*
9109  * Check referencer for shared block backref
9110  * If level == -1, this function will resolve the level.
9111  */
9112 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9113                                      u64 parent, u64 bytenr, int level)
9114 {
9115         struct extent_buffer *eb;
9116         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9117         u32 nr;
9118         int found_parent = 0;
9119         int i;
9120
9121         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9122         if (!extent_buffer_uptodate(eb))
9123                 goto out;
9124
9125         if (level == -1)
9126                 level = query_tree_block_level(fs_info, bytenr);
9127         if (level < 0)
9128                 goto out;
9129
9130         if (level + 1 != btrfs_header_level(eb))
9131                 goto out;
9132
9133         nr = btrfs_header_nritems(eb);
9134         for (i = 0; i < nr; i++) {
9135                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9136                         found_parent = 1;
9137                         break;
9138                 }
9139         }
9140 out:
9141         free_extent_buffer(eb);
9142         if (!found_parent) {
9143                 error(
9144         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9145                         bytenr, nodesize, parent, level);
9146                 return REFERENCER_MISSING;
9147         }
9148         return 0;
9149 }
9150
9151 /*
9152  * Check referencer for normal (inlined) data ref
9153  * If len == 0, it will be resolved by searching in extent tree
9154  */
9155 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9156                                      u64 root_id, u64 objectid, u64 offset,
9157                                      u64 bytenr, u64 len, u32 count)
9158 {
9159         struct btrfs_root *root;
9160         struct btrfs_root *extent_root = fs_info->extent_root;
9161         struct btrfs_key key;
9162         struct btrfs_path path;
9163         struct extent_buffer *leaf;
9164         struct btrfs_file_extent_item *fi;
9165         u32 found_count = 0;
9166         int slot;
9167         int ret = 0;
9168
9169         if (!len) {
9170                 key.objectid = bytenr;
9171                 key.type = BTRFS_EXTENT_ITEM_KEY;
9172                 key.offset = (u64)-1;
9173
9174                 btrfs_init_path(&path);
9175                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9176                 if (ret < 0)
9177                         goto out;
9178                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9179                 if (ret)
9180                         goto out;
9181                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9182                 if (key.objectid != bytenr ||
9183                     key.type != BTRFS_EXTENT_ITEM_KEY)
9184                         goto out;
9185                 len = key.offset;
9186                 btrfs_release_path(&path);
9187         }
9188         key.objectid = root_id;
9189         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9190         key.offset = (u64)-1;
9191         btrfs_init_path(&path);
9192
9193         root = btrfs_read_fs_root(fs_info, &key);
9194         if (IS_ERR(root))
9195                 goto out;
9196
9197         key.objectid = objectid;
9198         key.type = BTRFS_EXTENT_DATA_KEY;
9199         /*
9200          * It can be nasty as data backref offset is
9201          * file offset - file extent offset, which is smaller or
9202          * equal to original backref offset.  The only special case is
9203          * overflow.  So we need to special check and do further search.
9204          */
9205         key.offset = offset & (1ULL << 63) ? 0 : offset;
9206
9207         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9208         if (ret < 0)
9209                 goto out;
9210
9211         /*
9212          * Search afterwards to get correct one
9213          * NOTE: As we must do a comprehensive check on the data backref to
9214          * make sure the dref count also matches, we must iterate all file
9215          * extents for that inode.
9216          */
9217         while (1) {
9218                 leaf = path.nodes[0];
9219                 slot = path.slots[0];
9220
9221                 btrfs_item_key_to_cpu(leaf, &key, slot);
9222                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9223                         break;
9224                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9225                 /*
9226                  * Except normal disk bytenr and disk num bytes, we still
9227                  * need to do extra check on dbackref offset as
9228                  * dbackref offset = file_offset - file_extent_offset
9229                  */
9230                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9231                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9232                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9233                     offset)
9234                         found_count++;
9235
9236                 ret = btrfs_next_item(root, &path);
9237                 if (ret)
9238                         break;
9239         }
9240 out:
9241         btrfs_release_path(&path);
9242         if (found_count != count) {
9243                 error(
9244 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9245                         bytenr, len, root_id, objectid, offset, count, found_count);
9246                 return REFERENCER_MISSING;
9247         }
9248         return 0;
9249 }
9250
9251 /*
9252  * Check if the referencer of a shared data backref exists
9253  */
9254 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9255                                      u64 parent, u64 bytenr)
9256 {
9257         struct extent_buffer *eb;
9258         struct btrfs_key key;
9259         struct btrfs_file_extent_item *fi;
9260         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9261         u32 nr;
9262         int found_parent = 0;
9263         int i;
9264
9265         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9266         if (!extent_buffer_uptodate(eb))
9267                 goto out;
9268
9269         nr = btrfs_header_nritems(eb);
9270         for (i = 0; i < nr; i++) {
9271                 btrfs_item_key_to_cpu(eb, &key, i);
9272                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9273                         continue;
9274
9275                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9276                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9277                         continue;
9278
9279                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9280                         found_parent = 1;
9281                         break;
9282                 }
9283         }
9284
9285 out:
9286         free_extent_buffer(eb);
9287         if (!found_parent) {
9288                 error("shared extent %llu referencer lost (parent: %llu)",
9289                         bytenr, parent);
9290                 return REFERENCER_MISSING;
9291         }
9292         return 0;
9293 }
9294
9295 /*
9296  * This function will check a given extent item, including its backref and
9297  * itself (like crossing stripe boundary and type)
9298  *
9299  * Since we don't use extent_record anymore, introduce new error bit
9300  */
9301 static int check_extent_item(struct btrfs_fs_info *fs_info,
9302                              struct extent_buffer *eb, int slot)
9303 {
9304         struct btrfs_extent_item *ei;
9305         struct btrfs_extent_inline_ref *iref;
9306         struct btrfs_extent_data_ref *dref;
9307         unsigned long end;
9308         unsigned long ptr;
9309         int type;
9310         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9311         u32 item_size = btrfs_item_size_nr(eb, slot);
9312         u64 flags;
9313         u64 offset;
9314         int metadata = 0;
9315         int level;
9316         struct btrfs_key key;
9317         int ret;
9318         int err = 0;
9319
9320         btrfs_item_key_to_cpu(eb, &key, slot);
9321         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9322                 bytes_used += key.offset;
9323         else
9324                 bytes_used += nodesize;
9325
9326         if (item_size < sizeof(*ei)) {
9327                 /*
9328                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9329                  * old thing when on disk format is still un-determined.
9330                  * No need to care about it anymore
9331                  */
9332                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9333                 return -ENOTTY;
9334         }
9335
9336         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9337         flags = btrfs_extent_flags(eb, ei);
9338
9339         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9340                 metadata = 1;
9341         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9342                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9343                       key.objectid, key.objectid + nodesize);
9344                 err |= CROSSING_STRIPE_BOUNDARY;
9345         }
9346
9347         ptr = (unsigned long)(ei + 1);
9348
9349         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9350                 /* Old EXTENT_ITEM metadata */
9351                 struct btrfs_tree_block_info *info;
9352
9353                 info = (struct btrfs_tree_block_info *)ptr;
9354                 level = btrfs_tree_block_level(eb, info);
9355                 ptr += sizeof(struct btrfs_tree_block_info);
9356         } else {
9357                 /* New METADATA_ITEM */
9358                 level = key.offset;
9359         }
9360         end = (unsigned long)ei + item_size;
9361
9362         if (ptr >= end) {
9363                 err |= ITEM_SIZE_MISMATCH;
9364                 goto out;
9365         }
9366
9367         /* Now check every backref in this extent item */
9368 next:
9369         iref = (struct btrfs_extent_inline_ref *)ptr;
9370         type = btrfs_extent_inline_ref_type(eb, iref);
9371         offset = btrfs_extent_inline_ref_offset(eb, iref);
9372         switch (type) {
9373         case BTRFS_TREE_BLOCK_REF_KEY:
9374                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9375                                                level);
9376                 err |= ret;
9377                 break;
9378         case BTRFS_SHARED_BLOCK_REF_KEY:
9379                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9380                                                  level);
9381                 err |= ret;
9382                 break;
9383         case BTRFS_EXTENT_DATA_REF_KEY:
9384                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9385                 ret = check_extent_data_backref(fs_info,
9386                                 btrfs_extent_data_ref_root(eb, dref),
9387                                 btrfs_extent_data_ref_objectid(eb, dref),
9388                                 btrfs_extent_data_ref_offset(eb, dref),
9389                                 key.objectid, key.offset,
9390                                 btrfs_extent_data_ref_count(eb, dref));
9391                 err |= ret;
9392                 break;
9393         case BTRFS_SHARED_DATA_REF_KEY:
9394                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9395                 err |= ret;
9396                 break;
9397         default:
9398                 error("extent[%llu %d %llu] has unknown ref type: %d",
9399                         key.objectid, key.type, key.offset, type);
9400                 err |= UNKNOWN_TYPE;
9401                 goto out;
9402         }
9403
9404         ptr += btrfs_extent_inline_ref_size(type);
9405         if (ptr < end)
9406                 goto next;
9407
9408 out:
9409         return err;
9410 }
9411
9412 /*
9413  * Check if a dev extent item is referred correctly by its chunk
9414  */
9415 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9416                                  struct extent_buffer *eb, int slot)
9417 {
9418         struct btrfs_root *chunk_root = fs_info->chunk_root;
9419         struct btrfs_dev_extent *ptr;
9420         struct btrfs_path path;
9421         struct btrfs_key chunk_key;
9422         struct btrfs_key devext_key;
9423         struct btrfs_chunk *chunk;
9424         struct extent_buffer *l;
9425         int num_stripes;
9426         u64 length;
9427         int i;
9428         int found_chunk = 0;
9429         int ret;
9430
9431         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9432         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9433         length = btrfs_dev_extent_length(eb, ptr);
9434
9435         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9436         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9437         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9438
9439         btrfs_init_path(&path);
9440         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9441         if (ret)
9442                 goto out;
9443
9444         l = path.nodes[0];
9445         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9446         if (btrfs_chunk_length(l, chunk) != length)
9447                 goto out;
9448
9449         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9450         for (i = 0; i < num_stripes; i++) {
9451                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9452                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9453
9454                 if (devid == devext_key.objectid &&
9455                     offset == devext_key.offset) {
9456                         found_chunk = 1;
9457                         break;
9458                 }
9459         }
9460 out:
9461         btrfs_release_path(&path);
9462         if (!found_chunk) {
9463                 error(
9464                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9465                         devext_key.objectid, devext_key.offset, length);
9466                 return REFERENCER_MISSING;
9467         }
9468         return 0;
9469 }
9470
9471 /*
9472  * Check if the used space is correct with the dev item
9473  */
9474 static int check_dev_item(struct btrfs_fs_info *fs_info,
9475                           struct extent_buffer *eb, int slot)
9476 {
9477         struct btrfs_root *dev_root = fs_info->dev_root;
9478         struct btrfs_dev_item *dev_item;
9479         struct btrfs_path path;
9480         struct btrfs_key key;
9481         struct btrfs_dev_extent *ptr;
9482         u64 dev_id;
9483         u64 used;
9484         u64 total = 0;
9485         int ret;
9486
9487         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9488         dev_id = btrfs_device_id(eb, dev_item);
9489         used = btrfs_device_bytes_used(eb, dev_item);
9490
9491         key.objectid = dev_id;
9492         key.type = BTRFS_DEV_EXTENT_KEY;
9493         key.offset = 0;
9494
9495         btrfs_init_path(&path);
9496         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9497         if (ret < 0) {
9498                 btrfs_item_key_to_cpu(eb, &key, slot);
9499                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9500                         key.objectid, key.type, key.offset);
9501                 btrfs_release_path(&path);
9502                 return REFERENCER_MISSING;
9503         }
9504
9505         /* Iterate dev_extents to calculate the used space of a device */
9506         while (1) {
9507                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9508
9509                 if (key.objectid > dev_id)
9510                         break;
9511                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9512                         goto next;
9513
9514                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9515                                      struct btrfs_dev_extent);
9516                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9517 next:
9518                 ret = btrfs_next_item(dev_root, &path);
9519                 if (ret)
9520                         break;
9521         }
9522         btrfs_release_path(&path);
9523
9524         if (used != total) {
9525                 btrfs_item_key_to_cpu(eb, &key, slot);
9526                 error(
9527 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9528                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9529                         BTRFS_DEV_EXTENT_KEY, dev_id);
9530                 return ACCOUNTING_MISMATCH;
9531         }
9532         return 0;
9533 }
9534
9535 /*
9536  * Check a block group item with its referener (chunk) and its used space
9537  * with extent/metadata item
9538  */
9539 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9540                                   struct extent_buffer *eb, int slot)
9541 {
9542         struct btrfs_root *extent_root = fs_info->extent_root;
9543         struct btrfs_root *chunk_root = fs_info->chunk_root;
9544         struct btrfs_block_group_item *bi;
9545         struct btrfs_block_group_item bg_item;
9546         struct btrfs_path path;
9547         struct btrfs_key bg_key;
9548         struct btrfs_key chunk_key;
9549         struct btrfs_key extent_key;
9550         struct btrfs_chunk *chunk;
9551         struct extent_buffer *leaf;
9552         struct btrfs_extent_item *ei;
9553         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9554         u64 flags;
9555         u64 bg_flags;
9556         u64 used;
9557         u64 total = 0;
9558         int ret;
9559         int err = 0;
9560
9561         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9562         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9563         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9564         used = btrfs_block_group_used(&bg_item);
9565         bg_flags = btrfs_block_group_flags(&bg_item);
9566
9567         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9568         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9569         chunk_key.offset = bg_key.objectid;
9570
9571         btrfs_init_path(&path);
9572         /* Search for the referencer chunk */
9573         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9574         if (ret) {
9575                 error(
9576                 "block group[%llu %llu] did not find the related chunk item",
9577                         bg_key.objectid, bg_key.offset);
9578                 err |= REFERENCER_MISSING;
9579         } else {
9580                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9581                                         struct btrfs_chunk);
9582                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9583                                                 bg_key.offset) {
9584                         error(
9585         "block group[%llu %llu] related chunk item length does not match",
9586                                 bg_key.objectid, bg_key.offset);
9587                         err |= REFERENCER_MISMATCH;
9588                 }
9589         }
9590         btrfs_release_path(&path);
9591
9592         /* Search from the block group bytenr */
9593         extent_key.objectid = bg_key.objectid;
9594         extent_key.type = 0;
9595         extent_key.offset = 0;
9596
9597         btrfs_init_path(&path);
9598         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9599         if (ret < 0)
9600                 goto out;
9601
9602         /* Iterate extent tree to account used space */
9603         while (1) {
9604                 leaf = path.nodes[0];
9605                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9606                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9607                         break;
9608
9609                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9610                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9611                         goto next;
9612                 if (extent_key.objectid < bg_key.objectid)
9613                         goto next;
9614
9615                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9616                         total += nodesize;
9617                 else
9618                         total += extent_key.offset;
9619
9620                 ei = btrfs_item_ptr(leaf, path.slots[0],
9621                                     struct btrfs_extent_item);
9622                 flags = btrfs_extent_flags(leaf, ei);
9623                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9624                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9625                                 error(
9626                         "bad extent[%llu, %llu) type mismatch with chunk",
9627                                         extent_key.objectid,
9628                                         extent_key.objectid + extent_key.offset);
9629                                 err |= CHUNK_TYPE_MISMATCH;
9630                         }
9631                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9632                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9633                                     BTRFS_BLOCK_GROUP_METADATA))) {
9634                                 error(
9635                         "bad extent[%llu, %llu) type mismatch with chunk",
9636                                         extent_key.objectid,
9637                                         extent_key.objectid + nodesize);
9638                                 err |= CHUNK_TYPE_MISMATCH;
9639                         }
9640                 }
9641 next:
9642                 ret = btrfs_next_item(extent_root, &path);
9643                 if (ret)
9644                         break;
9645         }
9646
9647 out:
9648         btrfs_release_path(&path);
9649
9650         if (total != used) {
9651                 error(
9652                 "block group[%llu %llu] used %llu but extent items used %llu",
9653                         bg_key.objectid, bg_key.offset, used, total);
9654                 err |= ACCOUNTING_MISMATCH;
9655         }
9656         return err;
9657 }
9658
9659 /*
9660  * Check a chunk item.
9661  * Including checking all referred dev_extents and block group
9662  */
9663 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9664                             struct extent_buffer *eb, int slot)
9665 {
9666         struct btrfs_root *extent_root = fs_info->extent_root;
9667         struct btrfs_root *dev_root = fs_info->dev_root;
9668         struct btrfs_path path;
9669         struct btrfs_key chunk_key;
9670         struct btrfs_key bg_key;
9671         struct btrfs_key devext_key;
9672         struct btrfs_chunk *chunk;
9673         struct extent_buffer *leaf;
9674         struct btrfs_block_group_item *bi;
9675         struct btrfs_block_group_item bg_item;
9676         struct btrfs_dev_extent *ptr;
9677         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9678         u64 length;
9679         u64 chunk_end;
9680         u64 type;
9681         u64 profile;
9682         int num_stripes;
9683         u64 offset;
9684         u64 objectid;
9685         int i;
9686         int ret;
9687         int err = 0;
9688
9689         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9690         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9691         length = btrfs_chunk_length(eb, chunk);
9692         chunk_end = chunk_key.offset + length;
9693         if (!IS_ALIGNED(length, sectorsize)) {
9694                 error("chunk[%llu %llu) not aligned to %u",
9695                         chunk_key.offset, chunk_end, sectorsize);
9696                 err |= BYTES_UNALIGNED;
9697                 goto out;
9698         }
9699
9700         type = btrfs_chunk_type(eb, chunk);
9701         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9702         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9703                 error("chunk[%llu %llu) has no chunk type",
9704                         chunk_key.offset, chunk_end);
9705                 err |= UNKNOWN_TYPE;
9706         }
9707         if (profile && (profile & (profile - 1))) {
9708                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9709                         chunk_key.offset, chunk_end, profile);
9710                 err |= UNKNOWN_TYPE;
9711         }
9712
9713         bg_key.objectid = chunk_key.offset;
9714         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9715         bg_key.offset = length;
9716
9717         btrfs_init_path(&path);
9718         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9719         if (ret) {
9720                 error(
9721                 "chunk[%llu %llu) did not find the related block group item",
9722                         chunk_key.offset, chunk_end);
9723                 err |= REFERENCER_MISSING;
9724         } else{
9725                 leaf = path.nodes[0];
9726                 bi = btrfs_item_ptr(leaf, path.slots[0],
9727                                     struct btrfs_block_group_item);
9728                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9729                                    sizeof(bg_item));
9730                 if (btrfs_block_group_flags(&bg_item) != type) {
9731                         error(
9732 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9733                                 chunk_key.offset, chunk_end, type,
9734                                 btrfs_block_group_flags(&bg_item));
9735                         err |= REFERENCER_MISSING;
9736                 }
9737         }
9738
9739         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9740         for (i = 0; i < num_stripes; i++) {
9741                 btrfs_release_path(&path);
9742                 btrfs_init_path(&path);
9743                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9744                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9745                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9746
9747                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9748                                         0, 0);
9749                 if (ret)
9750                         goto not_match_dev;
9751
9752                 leaf = path.nodes[0];
9753                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9754                                      struct btrfs_dev_extent);
9755                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9756                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9757                 if (objectid != chunk_key.objectid ||
9758                     offset != chunk_key.offset ||
9759                     btrfs_dev_extent_length(leaf, ptr) != length)
9760                         goto not_match_dev;
9761                 continue;
9762 not_match_dev:
9763                 err |= BACKREF_MISSING;
9764                 error(
9765                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9766                         chunk_key.objectid, chunk_end, i);
9767                 continue;
9768         }
9769         btrfs_release_path(&path);
9770 out:
9771         return err;
9772 }
9773
9774 /*
9775  * Main entry function to check known items and update related accounting info
9776  */
9777 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9778 {
9779         struct btrfs_fs_info *fs_info = root->fs_info;
9780         struct btrfs_key key;
9781         int slot = 0;
9782         int type;
9783         struct btrfs_extent_data_ref *dref;
9784         int ret;
9785         int err = 0;
9786
9787 next:
9788         btrfs_item_key_to_cpu(eb, &key, slot);
9789         type = btrfs_key_type(&key);
9790
9791         switch (type) {
9792         case BTRFS_EXTENT_DATA_KEY:
9793                 ret = check_extent_data_item(root, eb, slot);
9794                 err |= ret;
9795                 break;
9796         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9797                 ret = check_block_group_item(fs_info, eb, slot);
9798                 err |= ret;
9799                 break;
9800         case BTRFS_DEV_ITEM_KEY:
9801                 ret = check_dev_item(fs_info, eb, slot);
9802                 err |= ret;
9803                 break;
9804         case BTRFS_CHUNK_ITEM_KEY:
9805                 ret = check_chunk_item(fs_info, eb, slot);
9806                 err |= ret;
9807                 break;
9808         case BTRFS_DEV_EXTENT_KEY:
9809                 ret = check_dev_extent_item(fs_info, eb, slot);
9810                 err |= ret;
9811                 break;
9812         case BTRFS_EXTENT_ITEM_KEY:
9813         case BTRFS_METADATA_ITEM_KEY:
9814                 ret = check_extent_item(fs_info, eb, slot);
9815                 err |= ret;
9816                 break;
9817         case BTRFS_EXTENT_CSUM_KEY:
9818                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9819                 break;
9820         case BTRFS_TREE_BLOCK_REF_KEY:
9821                 ret = check_tree_block_backref(fs_info, key.offset,
9822                                                key.objectid, -1);
9823                 err |= ret;
9824                 break;
9825         case BTRFS_EXTENT_DATA_REF_KEY:
9826                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9827                 ret = check_extent_data_backref(fs_info,
9828                                 btrfs_extent_data_ref_root(eb, dref),
9829                                 btrfs_extent_data_ref_objectid(eb, dref),
9830                                 btrfs_extent_data_ref_offset(eb, dref),
9831                                 key.objectid, 0,
9832                                 btrfs_extent_data_ref_count(eb, dref));
9833                 err |= ret;
9834                 break;
9835         case BTRFS_SHARED_BLOCK_REF_KEY:
9836                 ret = check_shared_block_backref(fs_info, key.offset,
9837                                                  key.objectid, -1);
9838                 err |= ret;
9839                 break;
9840         case BTRFS_SHARED_DATA_REF_KEY:
9841                 ret = check_shared_data_backref(fs_info, key.offset,
9842                                                 key.objectid);
9843                 err |= ret;
9844                 break;
9845         default:
9846                 break;
9847         }
9848
9849         if (++slot < btrfs_header_nritems(eb))
9850                 goto next;
9851
9852         return err;
9853 }
9854
9855 /*
9856  * Helper function for later fs/subvol tree check.  To determine if a tree
9857  * block should be checked.
9858  * This function will ensure only the direct referencer with lowest rootid to
9859  * check a fs/subvolume tree block.
9860  *
9861  * Backref check at extent tree would detect errors like missing subvolume
9862  * tree, so we can do aggressive check to reduce duplicated checks.
9863  */
9864 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9865 {
9866         struct btrfs_root *extent_root = root->fs_info->extent_root;
9867         struct btrfs_key key;
9868         struct btrfs_path path;
9869         struct extent_buffer *leaf;
9870         int slot;
9871         struct btrfs_extent_item *ei;
9872         unsigned long ptr;
9873         unsigned long end;
9874         int type;
9875         u32 item_size;
9876         u64 offset;
9877         struct btrfs_extent_inline_ref *iref;
9878         int ret;
9879
9880         btrfs_init_path(&path);
9881         key.objectid = btrfs_header_bytenr(eb);
9882         key.type = BTRFS_METADATA_ITEM_KEY;
9883         key.offset = (u64)-1;
9884
9885         /*
9886          * Any failure in backref resolving means we can't determine
9887          * whom the tree block belongs to.
9888          * So in that case, we need to check that tree block
9889          */
9890         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9891         if (ret < 0)
9892                 goto need_check;
9893
9894         ret = btrfs_previous_extent_item(extent_root, &path,
9895                                          btrfs_header_bytenr(eb));
9896         if (ret)
9897                 goto need_check;
9898
9899         leaf = path.nodes[0];
9900         slot = path.slots[0];
9901         btrfs_item_key_to_cpu(leaf, &key, slot);
9902         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9903
9904         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9905                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9906         } else {
9907                 struct btrfs_tree_block_info *info;
9908
9909                 info = (struct btrfs_tree_block_info *)(ei + 1);
9910                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9911         }
9912
9913         item_size = btrfs_item_size_nr(leaf, slot);
9914         ptr = (unsigned long)iref;
9915         end = (unsigned long)ei + item_size;
9916         while (ptr < end) {
9917                 iref = (struct btrfs_extent_inline_ref *)ptr;
9918                 type = btrfs_extent_inline_ref_type(leaf, iref);
9919                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9920
9921                 /*
9922                  * We only check the tree block if current root is
9923                  * the lowest referencer of it.
9924                  */
9925                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9926                     offset < root->objectid) {
9927                         btrfs_release_path(&path);
9928                         return 0;
9929                 }
9930
9931                 ptr += btrfs_extent_inline_ref_size(type);
9932         }
9933         /*
9934          * Normally we should also check keyed tree block ref, but that may be
9935          * very time consuming.  Inlined ref should already make us skip a lot
9936          * of refs now.  So skip search keyed tree block ref.
9937          */
9938
9939 need_check:
9940         btrfs_release_path(&path);
9941         return 1;
9942 }
9943
9944 /*
9945  * Traversal function for tree block. We will do:
9946  * 1) Skip shared fs/subvolume tree blocks
9947  * 2) Update related bytes accounting
9948  * 3) Pre-order traversal
9949  */
9950 static int traverse_tree_block(struct btrfs_root *root,
9951                                 struct extent_buffer *node)
9952 {
9953         struct extent_buffer *eb;
9954         struct btrfs_key key;
9955         struct btrfs_key drop_key;
9956         int level;
9957         u64 nr;
9958         int i;
9959         int err = 0;
9960         int ret;
9961
9962         /*
9963          * Skip shared fs/subvolume tree block, in that case they will
9964          * be checked by referencer with lowest rootid
9965          */
9966         if (is_fstree(root->objectid) && !should_check(root, node))
9967                 return 0;
9968
9969         /* Update bytes accounting */
9970         total_btree_bytes += node->len;
9971         if (fs_root_objectid(btrfs_header_owner(node)))
9972                 total_fs_tree_bytes += node->len;
9973         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9974                 total_extent_tree_bytes += node->len;
9975         if (!found_old_backref &&
9976             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9977             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9978             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9979                 found_old_backref = 1;
9980
9981         /* pre-order tranversal, check itself first */
9982         level = btrfs_header_level(node);
9983         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9984                                    btrfs_header_level(node),
9985                                    btrfs_header_owner(node));
9986         err |= ret;
9987         if (err)
9988                 error(
9989         "check %s failed root %llu bytenr %llu level %d, force continue check",
9990                         level ? "node":"leaf", root->objectid,
9991                         btrfs_header_bytenr(node), btrfs_header_level(node));
9992
9993         if (!level) {
9994                 btree_space_waste += btrfs_leaf_free_space(root, node);
9995                 ret = check_leaf_items(root, node);
9996                 err |= ret;
9997                 return err;
9998         }
9999
10000         nr = btrfs_header_nritems(node);
10001         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10002         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10003                 sizeof(struct btrfs_key_ptr);
10004
10005         /* Then check all its children */
10006         for (i = 0; i < nr; i++) {
10007                 u64 blocknr = btrfs_node_blockptr(node, i);
10008
10009                 btrfs_node_key_to_cpu(node, &key, i);
10010                 if (level == root->root_item.drop_level &&
10011                     is_dropped_key(&key, &drop_key))
10012                         continue;
10013
10014                 /*
10015                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10016                  * to call the function itself.
10017                  */
10018                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10019                 if (extent_buffer_uptodate(eb)) {
10020                         ret = traverse_tree_block(root, eb);
10021                         err |= ret;
10022                 }
10023                 free_extent_buffer(eb);
10024         }
10025
10026         return err;
10027 }
10028
10029 /*
10030  * Low memory usage version check_chunks_and_extents.
10031  */
10032 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10033 {
10034         struct btrfs_path path;
10035         struct btrfs_key key;
10036         struct btrfs_root *root1;
10037         struct btrfs_root *cur_root;
10038         int err = 0;
10039         int ret;
10040
10041         root1 = root->fs_info->chunk_root;
10042         ret = traverse_tree_block(root1, root1->node);
10043         err |= ret;
10044
10045         root1 = root->fs_info->tree_root;
10046         ret = traverse_tree_block(root1, root1->node);
10047         err |= ret;
10048
10049         btrfs_init_path(&path);
10050         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10051         key.offset = 0;
10052         key.type = BTRFS_ROOT_ITEM_KEY;
10053
10054         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10055         if (ret) {
10056                 error("cannot find extent treet in tree_root");
10057                 goto out;
10058         }
10059
10060         while (1) {
10061                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10062                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10063                         goto next;
10064                 key.offset = (u64)-1;
10065
10066                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10067                 if (IS_ERR(cur_root) || !cur_root) {
10068                         error("failed to read tree: %lld", key.objectid);
10069                         goto next;
10070                 }
10071
10072                 ret = traverse_tree_block(cur_root, cur_root->node);
10073                 err |= ret;
10074
10075 next:
10076                 ret = btrfs_next_item(root1, &path);
10077                 if (ret)
10078                         goto out;
10079         }
10080
10081 out:
10082         btrfs_release_path(&path);
10083         return err;
10084 }
10085
10086 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10087                            struct btrfs_root *root, int overwrite)
10088 {
10089         struct extent_buffer *c;
10090         struct extent_buffer *old = root->node;
10091         int level;
10092         int ret;
10093         struct btrfs_disk_key disk_key = {0,0,0};
10094
10095         level = 0;
10096
10097         if (overwrite) {
10098                 c = old;
10099                 extent_buffer_get(c);
10100                 goto init;
10101         }
10102         c = btrfs_alloc_free_block(trans, root,
10103                                    root->nodesize,
10104                                    root->root_key.objectid,
10105                                    &disk_key, level, 0, 0);
10106         if (IS_ERR(c)) {
10107                 c = old;
10108                 extent_buffer_get(c);
10109                 overwrite = 1;
10110         }
10111 init:
10112         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10113         btrfs_set_header_level(c, level);
10114         btrfs_set_header_bytenr(c, c->start);
10115         btrfs_set_header_generation(c, trans->transid);
10116         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10117         btrfs_set_header_owner(c, root->root_key.objectid);
10118
10119         write_extent_buffer(c, root->fs_info->fsid,
10120                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10121
10122         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10123                             btrfs_header_chunk_tree_uuid(c),
10124                             BTRFS_UUID_SIZE);
10125
10126         btrfs_mark_buffer_dirty(c);
10127         /*
10128          * this case can happen in the following case:
10129          *
10130          * 1.overwrite previous root.
10131          *
10132          * 2.reinit reloc data root, this is because we skip pin
10133          * down reloc data tree before which means we can allocate
10134          * same block bytenr here.
10135          */
10136         if (old->start == c->start) {
10137                 btrfs_set_root_generation(&root->root_item,
10138                                           trans->transid);
10139                 root->root_item.level = btrfs_header_level(root->node);
10140                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10141                                         &root->root_key, &root->root_item);
10142                 if (ret) {
10143                         free_extent_buffer(c);
10144                         return ret;
10145                 }
10146         }
10147         free_extent_buffer(old);
10148         root->node = c;
10149         add_root_to_dirty_list(root);
10150         return 0;
10151 }
10152
10153 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10154                                 struct extent_buffer *eb, int tree_root)
10155 {
10156         struct extent_buffer *tmp;
10157         struct btrfs_root_item *ri;
10158         struct btrfs_key key;
10159         u64 bytenr;
10160         u32 nodesize;
10161         int level = btrfs_header_level(eb);
10162         int nritems;
10163         int ret;
10164         int i;
10165
10166         /*
10167          * If we have pinned this block before, don't pin it again.
10168          * This can not only avoid forever loop with broken filesystem
10169          * but also give us some speedups.
10170          */
10171         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10172                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10173                 return 0;
10174
10175         btrfs_pin_extent(fs_info, eb->start, eb->len);
10176
10177         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10178         nritems = btrfs_header_nritems(eb);
10179         for (i = 0; i < nritems; i++) {
10180                 if (level == 0) {
10181                         btrfs_item_key_to_cpu(eb, &key, i);
10182                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10183                                 continue;
10184                         /* Skip the extent root and reloc roots */
10185                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10186                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10187                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10188                                 continue;
10189                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10190                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10191
10192                         /*
10193                          * If at any point we start needing the real root we
10194                          * will have to build a stump root for the root we are
10195                          * in, but for now this doesn't actually use the root so
10196                          * just pass in extent_root.
10197                          */
10198                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10199                                               nodesize, 0);
10200                         if (!extent_buffer_uptodate(tmp)) {
10201                                 fprintf(stderr, "Error reading root block\n");
10202                                 return -EIO;
10203                         }
10204                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10205                         free_extent_buffer(tmp);
10206                         if (ret)
10207                                 return ret;
10208                 } else {
10209                         bytenr = btrfs_node_blockptr(eb, i);
10210
10211                         /* If we aren't the tree root don't read the block */
10212                         if (level == 1 && !tree_root) {
10213                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10214                                 continue;
10215                         }
10216
10217                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10218                                               nodesize, 0);
10219                         if (!extent_buffer_uptodate(tmp)) {
10220                                 fprintf(stderr, "Error reading tree block\n");
10221                                 return -EIO;
10222                         }
10223                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10224                         free_extent_buffer(tmp);
10225                         if (ret)
10226                                 return ret;
10227                 }
10228         }
10229
10230         return 0;
10231 }
10232
10233 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10234 {
10235         int ret;
10236
10237         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10238         if (ret)
10239                 return ret;
10240
10241         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10242 }
10243
10244 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10245 {
10246         struct btrfs_block_group_cache *cache;
10247         struct btrfs_path *path;
10248         struct extent_buffer *leaf;
10249         struct btrfs_chunk *chunk;
10250         struct btrfs_key key;
10251         int ret;
10252         u64 start;
10253
10254         path = btrfs_alloc_path();
10255         if (!path)
10256                 return -ENOMEM;
10257
10258         key.objectid = 0;
10259         key.type = BTRFS_CHUNK_ITEM_KEY;
10260         key.offset = 0;
10261
10262         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10263         if (ret < 0) {
10264                 btrfs_free_path(path);
10265                 return ret;
10266         }
10267
10268         /*
10269          * We do this in case the block groups were screwed up and had alloc
10270          * bits that aren't actually set on the chunks.  This happens with
10271          * restored images every time and could happen in real life I guess.
10272          */
10273         fs_info->avail_data_alloc_bits = 0;
10274         fs_info->avail_metadata_alloc_bits = 0;
10275         fs_info->avail_system_alloc_bits = 0;
10276
10277         /* First we need to create the in-memory block groups */
10278         while (1) {
10279                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10280                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10281                         if (ret < 0) {
10282                                 btrfs_free_path(path);
10283                                 return ret;
10284                         }
10285                         if (ret) {
10286                                 ret = 0;
10287                                 break;
10288                         }
10289                 }
10290                 leaf = path->nodes[0];
10291                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10292                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10293                         path->slots[0]++;
10294                         continue;
10295                 }
10296
10297                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10298                                        struct btrfs_chunk);
10299                 btrfs_add_block_group(fs_info, 0,
10300                                       btrfs_chunk_type(leaf, chunk),
10301                                       key.objectid, key.offset,
10302                                       btrfs_chunk_length(leaf, chunk));
10303                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10304                                  key.offset + btrfs_chunk_length(leaf, chunk),
10305                                  GFP_NOFS);
10306                 path->slots[0]++;
10307         }
10308         start = 0;
10309         while (1) {
10310                 cache = btrfs_lookup_first_block_group(fs_info, start);
10311                 if (!cache)
10312                         break;
10313                 cache->cached = 1;
10314                 start = cache->key.objectid + cache->key.offset;
10315         }
10316
10317         btrfs_free_path(path);
10318         return 0;
10319 }
10320
10321 static int reset_balance(struct btrfs_trans_handle *trans,
10322                          struct btrfs_fs_info *fs_info)
10323 {
10324         struct btrfs_root *root = fs_info->tree_root;
10325         struct btrfs_path *path;
10326         struct extent_buffer *leaf;
10327         struct btrfs_key key;
10328         int del_slot, del_nr = 0;
10329         int ret;
10330         int found = 0;
10331
10332         path = btrfs_alloc_path();
10333         if (!path)
10334                 return -ENOMEM;
10335
10336         key.objectid = BTRFS_BALANCE_OBJECTID;
10337         key.type = BTRFS_BALANCE_ITEM_KEY;
10338         key.offset = 0;
10339
10340         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10341         if (ret) {
10342                 if (ret > 0)
10343                         ret = 0;
10344                 if (!ret)
10345                         goto reinit_data_reloc;
10346                 else
10347                         goto out;
10348         }
10349
10350         ret = btrfs_del_item(trans, root, path);
10351         if (ret)
10352                 goto out;
10353         btrfs_release_path(path);
10354
10355         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10356         key.type = BTRFS_ROOT_ITEM_KEY;
10357         key.offset = 0;
10358
10359         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10360         if (ret < 0)
10361                 goto out;
10362         while (1) {
10363                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10364                         if (!found)
10365                                 break;
10366
10367                         if (del_nr) {
10368                                 ret = btrfs_del_items(trans, root, path,
10369                                                       del_slot, del_nr);
10370                                 del_nr = 0;
10371                                 if (ret)
10372                                         goto out;
10373                         }
10374                         key.offset++;
10375                         btrfs_release_path(path);
10376
10377                         found = 0;
10378                         ret = btrfs_search_slot(trans, root, &key, path,
10379                                                 -1, 1);
10380                         if (ret < 0)
10381                                 goto out;
10382                         continue;
10383                 }
10384                 found = 1;
10385                 leaf = path->nodes[0];
10386                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10387                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10388                         break;
10389                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10390                         path->slots[0]++;
10391                         continue;
10392                 }
10393                 if (!del_nr) {
10394                         del_slot = path->slots[0];
10395                         del_nr = 1;
10396                 } else {
10397                         del_nr++;
10398                 }
10399                 path->slots[0]++;
10400         }
10401
10402         if (del_nr) {
10403                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10404                 if (ret)
10405                         goto out;
10406         }
10407         btrfs_release_path(path);
10408
10409 reinit_data_reloc:
10410         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10411         key.type = BTRFS_ROOT_ITEM_KEY;
10412         key.offset = (u64)-1;
10413         root = btrfs_read_fs_root(fs_info, &key);
10414         if (IS_ERR(root)) {
10415                 fprintf(stderr, "Error reading data reloc tree\n");
10416                 ret = PTR_ERR(root);
10417                 goto out;
10418         }
10419         record_root_in_trans(trans, root);
10420         ret = btrfs_fsck_reinit_root(trans, root, 0);
10421         if (ret)
10422                 goto out;
10423         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10424 out:
10425         btrfs_free_path(path);
10426         return ret;
10427 }
10428
10429 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10430                               struct btrfs_fs_info *fs_info)
10431 {
10432         u64 start = 0;
10433         int ret;
10434
10435         /*
10436          * The only reason we don't do this is because right now we're just
10437          * walking the trees we find and pinning down their bytes, we don't look
10438          * at any of the leaves.  In order to do mixed groups we'd have to check
10439          * the leaves of any fs roots and pin down the bytes for any file
10440          * extents we find.  Not hard but why do it if we don't have to?
10441          */
10442         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10443                 fprintf(stderr, "We don't support re-initing the extent tree "
10444                         "for mixed block groups yet, please notify a btrfs "
10445                         "developer you want to do this so they can add this "
10446                         "functionality.\n");
10447                 return -EINVAL;
10448         }
10449
10450         /*
10451          * first we need to walk all of the trees except the extent tree and pin
10452          * down the bytes that are in use so we don't overwrite any existing
10453          * metadata.
10454          */
10455         ret = pin_metadata_blocks(fs_info);
10456         if (ret) {
10457                 fprintf(stderr, "error pinning down used bytes\n");
10458                 return ret;
10459         }
10460
10461         /*
10462          * Need to drop all the block groups since we're going to recreate all
10463          * of them again.
10464          */
10465         btrfs_free_block_groups(fs_info);
10466         ret = reset_block_groups(fs_info);
10467         if (ret) {
10468                 fprintf(stderr, "error resetting the block groups\n");
10469                 return ret;
10470         }
10471
10472         /* Ok we can allocate now, reinit the extent root */
10473         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10474         if (ret) {
10475                 fprintf(stderr, "extent root initialization failed\n");
10476                 /*
10477                  * When the transaction code is updated we should end the
10478                  * transaction, but for now progs only knows about commit so
10479                  * just return an error.
10480                  */
10481                 return ret;
10482         }
10483
10484         /*
10485          * Now we have all the in-memory block groups setup so we can make
10486          * allocations properly, and the metadata we care about is safe since we
10487          * pinned all of it above.
10488          */
10489         while (1) {
10490                 struct btrfs_block_group_cache *cache;
10491
10492                 cache = btrfs_lookup_first_block_group(fs_info, start);
10493                 if (!cache)
10494                         break;
10495                 start = cache->key.objectid + cache->key.offset;
10496                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10497                                         &cache->key, &cache->item,
10498                                         sizeof(cache->item));
10499                 if (ret) {
10500                         fprintf(stderr, "Error adding block group\n");
10501                         return ret;
10502                 }
10503                 btrfs_extent_post_op(trans, fs_info->extent_root);
10504         }
10505
10506         ret = reset_balance(trans, fs_info);
10507         if (ret)
10508                 fprintf(stderr, "error resetting the pending balance\n");
10509
10510         return ret;
10511 }
10512
10513 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10514 {
10515         struct btrfs_path *path;
10516         struct btrfs_trans_handle *trans;
10517         struct btrfs_key key;
10518         int ret;
10519
10520         printf("Recowing metadata block %llu\n", eb->start);
10521         key.objectid = btrfs_header_owner(eb);
10522         key.type = BTRFS_ROOT_ITEM_KEY;
10523         key.offset = (u64)-1;
10524
10525         root = btrfs_read_fs_root(root->fs_info, &key);
10526         if (IS_ERR(root)) {
10527                 fprintf(stderr, "Couldn't find owner root %llu\n",
10528                         key.objectid);
10529                 return PTR_ERR(root);
10530         }
10531
10532         path = btrfs_alloc_path();
10533         if (!path)
10534                 return -ENOMEM;
10535
10536         trans = btrfs_start_transaction(root, 1);
10537         if (IS_ERR(trans)) {
10538                 btrfs_free_path(path);
10539                 return PTR_ERR(trans);
10540         }
10541
10542         path->lowest_level = btrfs_header_level(eb);
10543         if (path->lowest_level)
10544                 btrfs_node_key_to_cpu(eb, &key, 0);
10545         else
10546                 btrfs_item_key_to_cpu(eb, &key, 0);
10547
10548         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10549         btrfs_commit_transaction(trans, root);
10550         btrfs_free_path(path);
10551         return ret;
10552 }
10553
10554 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10555 {
10556         struct btrfs_path *path;
10557         struct btrfs_trans_handle *trans;
10558         struct btrfs_key key;
10559         int ret;
10560
10561         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10562                bad->key.type, bad->key.offset);
10563         key.objectid = bad->root_id;
10564         key.type = BTRFS_ROOT_ITEM_KEY;
10565         key.offset = (u64)-1;
10566
10567         root = btrfs_read_fs_root(root->fs_info, &key);
10568         if (IS_ERR(root)) {
10569                 fprintf(stderr, "Couldn't find owner root %llu\n",
10570                         key.objectid);
10571                 return PTR_ERR(root);
10572         }
10573
10574         path = btrfs_alloc_path();
10575         if (!path)
10576                 return -ENOMEM;
10577
10578         trans = btrfs_start_transaction(root, 1);
10579         if (IS_ERR(trans)) {
10580                 btrfs_free_path(path);
10581                 return PTR_ERR(trans);
10582         }
10583
10584         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10585         if (ret) {
10586                 if (ret > 0)
10587                         ret = 0;
10588                 goto out;
10589         }
10590         ret = btrfs_del_item(trans, root, path);
10591 out:
10592         btrfs_commit_transaction(trans, root);
10593         btrfs_free_path(path);
10594         return ret;
10595 }
10596
10597 static int zero_log_tree(struct btrfs_root *root)
10598 {
10599         struct btrfs_trans_handle *trans;
10600         int ret;
10601
10602         trans = btrfs_start_transaction(root, 1);
10603         if (IS_ERR(trans)) {
10604                 ret = PTR_ERR(trans);
10605                 return ret;
10606         }
10607         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10608         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10609         ret = btrfs_commit_transaction(trans, root);
10610         return ret;
10611 }
10612
10613 static int populate_csum(struct btrfs_trans_handle *trans,
10614                          struct btrfs_root *csum_root, char *buf, u64 start,
10615                          u64 len)
10616 {
10617         u64 offset = 0;
10618         u64 sectorsize;
10619         int ret = 0;
10620
10621         while (offset < len) {
10622                 sectorsize = csum_root->sectorsize;
10623                 ret = read_extent_data(csum_root, buf, start + offset,
10624                                        &sectorsize, 0);
10625                 if (ret)
10626                         break;
10627                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10628                                             start + offset, buf, sectorsize);
10629                 if (ret)
10630                         break;
10631                 offset += sectorsize;
10632         }
10633         return ret;
10634 }
10635
10636 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10637                                       struct btrfs_root *csum_root,
10638                                       struct btrfs_root *cur_root)
10639 {
10640         struct btrfs_path *path;
10641         struct btrfs_key key;
10642         struct extent_buffer *node;
10643         struct btrfs_file_extent_item *fi;
10644         char *buf = NULL;
10645         u64 start = 0;
10646         u64 len = 0;
10647         int slot = 0;
10648         int ret = 0;
10649
10650         path = btrfs_alloc_path();
10651         if (!path)
10652                 return -ENOMEM;
10653         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10654         if (!buf) {
10655                 ret = -ENOMEM;
10656                 goto out;
10657         }
10658
10659         key.objectid = 0;
10660         key.offset = 0;
10661         key.type = 0;
10662
10663         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10664         if (ret < 0)
10665                 goto out;
10666         /* Iterate all regular file extents and fill its csum */
10667         while (1) {
10668                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10669
10670                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10671                         goto next;
10672                 node = path->nodes[0];
10673                 slot = path->slots[0];
10674                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10675                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10676                         goto next;
10677                 start = btrfs_file_extent_disk_bytenr(node, fi);
10678                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10679
10680                 ret = populate_csum(trans, csum_root, buf, start, len);
10681                 if (ret == -EEXIST)
10682                         ret = 0;
10683                 if (ret < 0)
10684                         goto out;
10685 next:
10686                 /*
10687                  * TODO: if next leaf is corrupted, jump to nearest next valid
10688                  * leaf.
10689                  */
10690                 ret = btrfs_next_item(cur_root, path);
10691                 if (ret < 0)
10692                         goto out;
10693                 if (ret > 0) {
10694                         ret = 0;
10695                         goto out;
10696                 }
10697         }
10698
10699 out:
10700         btrfs_free_path(path);
10701         free(buf);
10702         return ret;
10703 }
10704
10705 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10706                                   struct btrfs_root *csum_root)
10707 {
10708         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10709         struct btrfs_path *path;
10710         struct btrfs_root *tree_root = fs_info->tree_root;
10711         struct btrfs_root *cur_root;
10712         struct extent_buffer *node;
10713         struct btrfs_key key;
10714         int slot = 0;
10715         int ret = 0;
10716
10717         path = btrfs_alloc_path();
10718         if (!path)
10719                 return -ENOMEM;
10720
10721         key.objectid = BTRFS_FS_TREE_OBJECTID;
10722         key.offset = 0;
10723         key.type = BTRFS_ROOT_ITEM_KEY;
10724
10725         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10726         if (ret < 0)
10727                 goto out;
10728         if (ret > 0) {
10729                 ret = -ENOENT;
10730                 goto out;
10731         }
10732
10733         while (1) {
10734                 node = path->nodes[0];
10735                 slot = path->slots[0];
10736                 btrfs_item_key_to_cpu(node, &key, slot);
10737                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10738                         goto out;
10739                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10740                         goto next;
10741                 if (!is_fstree(key.objectid))
10742                         goto next;
10743                 key.offset = (u64)-1;
10744
10745                 cur_root = btrfs_read_fs_root(fs_info, &key);
10746                 if (IS_ERR(cur_root) || !cur_root) {
10747                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10748                                 key.objectid);
10749                         goto out;
10750                 }
10751                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10752                                 cur_root);
10753                 if (ret < 0)
10754                         goto out;
10755 next:
10756                 ret = btrfs_next_item(tree_root, path);
10757                 if (ret > 0) {
10758                         ret = 0;
10759                         goto out;
10760                 }
10761                 if (ret < 0)
10762                         goto out;
10763         }
10764
10765 out:
10766         btrfs_free_path(path);
10767         return ret;
10768 }
10769
10770 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10771                                       struct btrfs_root *csum_root)
10772 {
10773         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10774         struct btrfs_path *path;
10775         struct btrfs_extent_item *ei;
10776         struct extent_buffer *leaf;
10777         char *buf;
10778         struct btrfs_key key;
10779         int ret;
10780
10781         path = btrfs_alloc_path();
10782         if (!path)
10783                 return -ENOMEM;
10784
10785         key.objectid = 0;
10786         key.type = BTRFS_EXTENT_ITEM_KEY;
10787         key.offset = 0;
10788
10789         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10790         if (ret < 0) {
10791                 btrfs_free_path(path);
10792                 return ret;
10793         }
10794
10795         buf = malloc(csum_root->sectorsize);
10796         if (!buf) {
10797                 btrfs_free_path(path);
10798                 return -ENOMEM;
10799         }
10800
10801         while (1) {
10802                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10803                         ret = btrfs_next_leaf(extent_root, path);
10804                         if (ret < 0)
10805                                 break;
10806                         if (ret) {
10807                                 ret = 0;
10808                                 break;
10809                         }
10810                 }
10811                 leaf = path->nodes[0];
10812
10813                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10814                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10815                         path->slots[0]++;
10816                         continue;
10817                 }
10818
10819                 ei = btrfs_item_ptr(leaf, path->slots[0],
10820                                     struct btrfs_extent_item);
10821                 if (!(btrfs_extent_flags(leaf, ei) &
10822                       BTRFS_EXTENT_FLAG_DATA)) {
10823                         path->slots[0]++;
10824                         continue;
10825                 }
10826
10827                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10828                                     key.offset);
10829                 if (ret)
10830                         break;
10831                 path->slots[0]++;
10832         }
10833
10834         btrfs_free_path(path);
10835         free(buf);
10836         return ret;
10837 }
10838
10839 /*
10840  * Recalculate the csum and put it into the csum tree.
10841  *
10842  * Extent tree init will wipe out all the extent info, so in that case, we
10843  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10844  * will use fs/subvol trees to init the csum tree.
10845  */
10846 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10847                           struct btrfs_root *csum_root,
10848                           int search_fs_tree)
10849 {
10850         if (search_fs_tree)
10851                 return fill_csum_tree_from_fs(trans, csum_root);
10852         else
10853                 return fill_csum_tree_from_extent(trans, csum_root);
10854 }
10855
10856 static void free_roots_info_cache(void)
10857 {
10858         if (!roots_info_cache)
10859                 return;
10860
10861         while (!cache_tree_empty(roots_info_cache)) {
10862                 struct cache_extent *entry;
10863                 struct root_item_info *rii;
10864
10865                 entry = first_cache_extent(roots_info_cache);
10866                 if (!entry)
10867                         break;
10868                 remove_cache_extent(roots_info_cache, entry);
10869                 rii = container_of(entry, struct root_item_info, cache_extent);
10870                 free(rii);
10871         }
10872
10873         free(roots_info_cache);
10874         roots_info_cache = NULL;
10875 }
10876
10877 static int build_roots_info_cache(struct btrfs_fs_info *info)
10878 {
10879         int ret = 0;
10880         struct btrfs_key key;
10881         struct extent_buffer *leaf;
10882         struct btrfs_path *path;
10883
10884         if (!roots_info_cache) {
10885                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10886                 if (!roots_info_cache)
10887                         return -ENOMEM;
10888                 cache_tree_init(roots_info_cache);
10889         }
10890
10891         path = btrfs_alloc_path();
10892         if (!path)
10893                 return -ENOMEM;
10894
10895         key.objectid = 0;
10896         key.type = BTRFS_EXTENT_ITEM_KEY;
10897         key.offset = 0;
10898
10899         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10900         if (ret < 0)
10901                 goto out;
10902         leaf = path->nodes[0];
10903
10904         while (1) {
10905                 struct btrfs_key found_key;
10906                 struct btrfs_extent_item *ei;
10907                 struct btrfs_extent_inline_ref *iref;
10908                 int slot = path->slots[0];
10909                 int type;
10910                 u64 flags;
10911                 u64 root_id;
10912                 u8 level;
10913                 struct cache_extent *entry;
10914                 struct root_item_info *rii;
10915
10916                 if (slot >= btrfs_header_nritems(leaf)) {
10917                         ret = btrfs_next_leaf(info->extent_root, path);
10918                         if (ret < 0) {
10919                                 break;
10920                         } else if (ret) {
10921                                 ret = 0;
10922                                 break;
10923                         }
10924                         leaf = path->nodes[0];
10925                         slot = path->slots[0];
10926                 }
10927
10928                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10929
10930                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10931                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10932                         goto next;
10933
10934                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10935                 flags = btrfs_extent_flags(leaf, ei);
10936
10937                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10938                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10939                         goto next;
10940
10941                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10942                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10943                         level = found_key.offset;
10944                 } else {
10945                         struct btrfs_tree_block_info *binfo;
10946
10947                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10948                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10949                         level = btrfs_tree_block_level(leaf, binfo);
10950                 }
10951
10952                 /*
10953                  * For a root extent, it must be of the following type and the
10954                  * first (and only one) iref in the item.
10955                  */
10956                 type = btrfs_extent_inline_ref_type(leaf, iref);
10957                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10958                         goto next;
10959
10960                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10961                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10962                 if (!entry) {
10963                         rii = malloc(sizeof(struct root_item_info));
10964                         if (!rii) {
10965                                 ret = -ENOMEM;
10966                                 goto out;
10967                         }
10968                         rii->cache_extent.start = root_id;
10969                         rii->cache_extent.size = 1;
10970                         rii->level = (u8)-1;
10971                         entry = &rii->cache_extent;
10972                         ret = insert_cache_extent(roots_info_cache, entry);
10973                         ASSERT(ret == 0);
10974                 } else {
10975                         rii = container_of(entry, struct root_item_info,
10976                                            cache_extent);
10977                 }
10978
10979                 ASSERT(rii->cache_extent.start == root_id);
10980                 ASSERT(rii->cache_extent.size == 1);
10981
10982                 if (level > rii->level || rii->level == (u8)-1) {
10983                         rii->level = level;
10984                         rii->bytenr = found_key.objectid;
10985                         rii->gen = btrfs_extent_generation(leaf, ei);
10986                         rii->node_count = 1;
10987                 } else if (level == rii->level) {
10988                         rii->node_count++;
10989                 }
10990 next:
10991                 path->slots[0]++;
10992         }
10993
10994 out:
10995         btrfs_free_path(path);
10996
10997         return ret;
10998 }
10999
11000 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11001                                   struct btrfs_path *path,
11002                                   const struct btrfs_key *root_key,
11003                                   const int read_only_mode)
11004 {
11005         const u64 root_id = root_key->objectid;
11006         struct cache_extent *entry;
11007         struct root_item_info *rii;
11008         struct btrfs_root_item ri;
11009         unsigned long offset;
11010
11011         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11012         if (!entry) {
11013                 fprintf(stderr,
11014                         "Error: could not find extent items for root %llu\n",
11015                         root_key->objectid);
11016                 return -ENOENT;
11017         }
11018
11019         rii = container_of(entry, struct root_item_info, cache_extent);
11020         ASSERT(rii->cache_extent.start == root_id);
11021         ASSERT(rii->cache_extent.size == 1);
11022
11023         if (rii->node_count != 1) {
11024                 fprintf(stderr,
11025                         "Error: could not find btree root extent for root %llu\n",
11026                         root_id);
11027                 return -ENOENT;
11028         }
11029
11030         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11031         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11032
11033         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11034             btrfs_root_level(&ri) != rii->level ||
11035             btrfs_root_generation(&ri) != rii->gen) {
11036
11037                 /*
11038                  * If we're in repair mode but our caller told us to not update
11039                  * the root item, i.e. just check if it needs to be updated, don't
11040                  * print this message, since the caller will call us again shortly
11041                  * for the same root item without read only mode (the caller will
11042                  * open a transaction first).
11043                  */
11044                 if (!(read_only_mode && repair))
11045                         fprintf(stderr,
11046                                 "%sroot item for root %llu,"
11047                                 " current bytenr %llu, current gen %llu, current level %u,"
11048                                 " new bytenr %llu, new gen %llu, new level %u\n",
11049                                 (read_only_mode ? "" : "fixing "),
11050                                 root_id,
11051                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11052                                 btrfs_root_level(&ri),
11053                                 rii->bytenr, rii->gen, rii->level);
11054
11055                 if (btrfs_root_generation(&ri) > rii->gen) {
11056                         fprintf(stderr,
11057                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11058                                 root_id, btrfs_root_generation(&ri), rii->gen);
11059                         return -EINVAL;
11060                 }
11061
11062                 if (!read_only_mode) {
11063                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11064                         btrfs_set_root_level(&ri, rii->level);
11065                         btrfs_set_root_generation(&ri, rii->gen);
11066                         write_extent_buffer(path->nodes[0], &ri,
11067                                             offset, sizeof(ri));
11068                 }
11069
11070                 return 1;
11071         }
11072
11073         return 0;
11074 }
11075
11076 /*
11077  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11078  * caused read-only snapshots to be corrupted if they were created at a moment
11079  * when the source subvolume/snapshot had orphan items. The issue was that the
11080  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11081  * node instead of the post orphan cleanup root node.
11082  * So this function, and its callees, just detects and fixes those cases. Even
11083  * though the regression was for read-only snapshots, this function applies to
11084  * any snapshot/subvolume root.
11085  * This must be run before any other repair code - not doing it so, makes other
11086  * repair code delete or modify backrefs in the extent tree for example, which
11087  * will result in an inconsistent fs after repairing the root items.
11088  */
11089 static int repair_root_items(struct btrfs_fs_info *info)
11090 {
11091         struct btrfs_path *path = NULL;
11092         struct btrfs_key key;
11093         struct extent_buffer *leaf;
11094         struct btrfs_trans_handle *trans = NULL;
11095         int ret = 0;
11096         int bad_roots = 0;
11097         int need_trans = 0;
11098
11099         ret = build_roots_info_cache(info);
11100         if (ret)
11101                 goto out;
11102
11103         path = btrfs_alloc_path();
11104         if (!path) {
11105                 ret = -ENOMEM;
11106                 goto out;
11107         }
11108
11109         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11110         key.type = BTRFS_ROOT_ITEM_KEY;
11111         key.offset = 0;
11112
11113 again:
11114         /*
11115          * Avoid opening and committing transactions if a leaf doesn't have
11116          * any root items that need to be fixed, so that we avoid rotating
11117          * backup roots unnecessarily.
11118          */
11119         if (need_trans) {
11120                 trans = btrfs_start_transaction(info->tree_root, 1);
11121                 if (IS_ERR(trans)) {
11122                         ret = PTR_ERR(trans);
11123                         goto out;
11124                 }
11125         }
11126
11127         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11128                                 0, trans ? 1 : 0);
11129         if (ret < 0)
11130                 goto out;
11131         leaf = path->nodes[0];
11132
11133         while (1) {
11134                 struct btrfs_key found_key;
11135
11136                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11137                         int no_more_keys = find_next_key(path, &key);
11138
11139                         btrfs_release_path(path);
11140                         if (trans) {
11141                                 ret = btrfs_commit_transaction(trans,
11142                                                                info->tree_root);
11143                                 trans = NULL;
11144                                 if (ret < 0)
11145                                         goto out;
11146                         }
11147                         need_trans = 0;
11148                         if (no_more_keys)
11149                                 break;
11150                         goto again;
11151                 }
11152
11153                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11154
11155                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11156                         goto next;
11157                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11158                         goto next;
11159
11160                 ret = maybe_repair_root_item(info, path, &found_key,
11161                                              trans ? 0 : 1);
11162                 if (ret < 0)
11163                         goto out;
11164                 if (ret) {
11165                         if (!trans && repair) {
11166                                 need_trans = 1;
11167                                 key = found_key;
11168                                 btrfs_release_path(path);
11169                                 goto again;
11170                         }
11171                         bad_roots++;
11172                 }
11173 next:
11174                 path->slots[0]++;
11175         }
11176         ret = 0;
11177 out:
11178         free_roots_info_cache();
11179         btrfs_free_path(path);
11180         if (trans)
11181                 btrfs_commit_transaction(trans, info->tree_root);
11182         if (ret < 0)
11183                 return ret;
11184
11185         return bad_roots;
11186 }
11187
11188 const char * const cmd_check_usage[] = {
11189         "btrfs check [options] <device>",
11190         "Check structural integrity of a filesystem (unmounted).",
11191         "Check structural integrity of an unmounted filesystem. Verify internal",
11192         "trees' consistency and item connectivity. In the repair mode try to",
11193         "fix the problems found. ",
11194         "WARNING: the repair mode is considered dangerous",
11195         "",
11196         "-s|--super <superblock>     use this superblock copy",
11197         "-b|--backup                 use the first valid backup root copy",
11198         "--repair                    try to repair the filesystem",
11199         "--readonly                  run in read-only mode (default)",
11200         "--init-csum-tree            create a new CRC tree",
11201         "--init-extent-tree          create a new extent tree",
11202         "--mode <MODE>               select mode, allows to make some memory/IO",
11203         "                            trade-offs, where MODE is one of:",
11204         "                            original - read inodes and extents to memory (requires",
11205         "                                       more memory, does less IO)",
11206         "                            lowmem   - try to use less memory but read blocks again",
11207         "                                       when needed",
11208         "--check-data-csum           verify checksums of data blocks",
11209         "-Q|--qgroup-report           print a report on qgroup consistency",
11210         "-E|--subvol-extents <subvolid>",
11211         "                            print subvolume extents and sharing state",
11212         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11213         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11214         "-p|--progress               indicate progress",
11215         NULL
11216 };
11217
11218 int cmd_check(int argc, char **argv)
11219 {
11220         struct cache_tree root_cache;
11221         struct btrfs_root *root;
11222         struct btrfs_fs_info *info;
11223         u64 bytenr = 0;
11224         u64 subvolid = 0;
11225         u64 tree_root_bytenr = 0;
11226         u64 chunk_root_bytenr = 0;
11227         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11228         int ret;
11229         u64 num;
11230         int init_csum_tree = 0;
11231         int readonly = 0;
11232         int qgroup_report = 0;
11233         int qgroups_repaired = 0;
11234         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11235
11236         while(1) {
11237                 int c;
11238                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11239                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11240                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11241                         GETOPT_VAL_MODE };
11242                 static const struct option long_options[] = {
11243                         { "super", required_argument, NULL, 's' },
11244                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11245                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11246                         { "init-csum-tree", no_argument, NULL,
11247                                 GETOPT_VAL_INIT_CSUM },
11248                         { "init-extent-tree", no_argument, NULL,
11249                                 GETOPT_VAL_INIT_EXTENT },
11250                         { "check-data-csum", no_argument, NULL,
11251                                 GETOPT_VAL_CHECK_CSUM },
11252                         { "backup", no_argument, NULL, 'b' },
11253                         { "subvol-extents", required_argument, NULL, 'E' },
11254                         { "qgroup-report", no_argument, NULL, 'Q' },
11255                         { "tree-root", required_argument, NULL, 'r' },
11256                         { "chunk-root", required_argument, NULL,
11257                                 GETOPT_VAL_CHUNK_TREE },
11258                         { "progress", no_argument, NULL, 'p' },
11259                         { "mode", required_argument, NULL,
11260                                 GETOPT_VAL_MODE },
11261                         { NULL, 0, NULL, 0}
11262                 };
11263
11264                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11265                 if (c < 0)
11266                         break;
11267                 switch(c) {
11268                         case 'a': /* ignored */ break;
11269                         case 'b':
11270                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11271                                 break;
11272                         case 's':
11273                                 num = arg_strtou64(optarg);
11274                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11275                                         fprintf(stderr,
11276                                                 "ERROR: super mirror should be less than: %d\n",
11277                                                 BTRFS_SUPER_MIRROR_MAX);
11278                                         exit(1);
11279                                 }
11280                                 bytenr = btrfs_sb_offset(((int)num));
11281                                 printf("using SB copy %llu, bytenr %llu\n", num,
11282                                        (unsigned long long)bytenr);
11283                                 break;
11284                         case 'Q':
11285                                 qgroup_report = 1;
11286                                 break;
11287                         case 'E':
11288                                 subvolid = arg_strtou64(optarg);
11289                                 break;
11290                         case 'r':
11291                                 tree_root_bytenr = arg_strtou64(optarg);
11292                                 break;
11293                         case GETOPT_VAL_CHUNK_TREE:
11294                                 chunk_root_bytenr = arg_strtou64(optarg);
11295                                 break;
11296                         case 'p':
11297                                 ctx.progress_enabled = true;
11298                                 break;
11299                         case '?':
11300                         case 'h':
11301                                 usage(cmd_check_usage);
11302                         case GETOPT_VAL_REPAIR:
11303                                 printf("enabling repair mode\n");
11304                                 repair = 1;
11305                                 ctree_flags |= OPEN_CTREE_WRITES;
11306                                 break;
11307                         case GETOPT_VAL_READONLY:
11308                                 readonly = 1;
11309                                 break;
11310                         case GETOPT_VAL_INIT_CSUM:
11311                                 printf("Creating a new CRC tree\n");
11312                                 init_csum_tree = 1;
11313                                 repair = 1;
11314                                 ctree_flags |= OPEN_CTREE_WRITES;
11315                                 break;
11316                         case GETOPT_VAL_INIT_EXTENT:
11317                                 init_extent_tree = 1;
11318                                 ctree_flags |= (OPEN_CTREE_WRITES |
11319                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11320                                 repair = 1;
11321                                 break;
11322                         case GETOPT_VAL_CHECK_CSUM:
11323                                 check_data_csum = 1;
11324                                 break;
11325                         case GETOPT_VAL_MODE:
11326                                 check_mode = parse_check_mode(optarg);
11327                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11328                                         error("unknown mode: %s", optarg);
11329                                         exit(1);
11330                                 }
11331                                 break;
11332                 }
11333         }
11334
11335         if (check_argc_exact(argc - optind, 1))
11336                 usage(cmd_check_usage);
11337
11338         if (ctx.progress_enabled) {
11339                 ctx.tp = TASK_NOTHING;
11340                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11341         }
11342
11343         /* This check is the only reason for --readonly to exist */
11344         if (readonly && repair) {
11345                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11346                 exit(1);
11347         }
11348
11349         /*
11350          * Not supported yet
11351          */
11352         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11353                 error("Low memory mode doesn't support repair yet");
11354                 exit(1);
11355         }
11356
11357         radix_tree_init();
11358         cache_tree_init(&root_cache);
11359
11360         if((ret = check_mounted(argv[optind])) < 0) {
11361                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11362                 goto err_out;
11363         } else if(ret) {
11364                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11365                 ret = -EBUSY;
11366                 goto err_out;
11367         }
11368
11369         /* only allow partial opening under repair mode */
11370         if (repair)
11371                 ctree_flags |= OPEN_CTREE_PARTIAL;
11372
11373         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11374                                   chunk_root_bytenr, ctree_flags);
11375         if (!info) {
11376                 fprintf(stderr, "Couldn't open file system\n");
11377                 ret = -EIO;
11378                 goto err_out;
11379         }
11380
11381         global_info = info;
11382         root = info->fs_root;
11383
11384         /*
11385          * repair mode will force us to commit transaction which
11386          * will make us fail to load log tree when mounting.
11387          */
11388         if (repair && btrfs_super_log_root(info->super_copy)) {
11389                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11390                 if (!ret) {
11391                         ret = 1;
11392                         goto close_out;
11393                 }
11394                 ret = zero_log_tree(root);
11395                 if (ret) {
11396                         fprintf(stderr, "fail to zero log tree\n");
11397                         goto close_out;
11398                 }
11399         }
11400
11401         uuid_unparse(info->super_copy->fsid, uuidbuf);
11402         if (qgroup_report) {
11403                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11404                        uuidbuf);
11405                 ret = qgroup_verify_all(info);
11406                 if (ret == 0)
11407                         report_qgroups(1);
11408                 goto close_out;
11409         }
11410         if (subvolid) {
11411                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11412                        subvolid, argv[optind], uuidbuf);
11413                 ret = print_extent_state(info, subvolid);
11414                 goto close_out;
11415         }
11416         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11417
11418         if (!extent_buffer_uptodate(info->tree_root->node) ||
11419             !extent_buffer_uptodate(info->dev_root->node) ||
11420             !extent_buffer_uptodate(info->chunk_root->node)) {
11421                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11422                 ret = -EIO;
11423                 goto close_out;
11424         }
11425
11426         if (init_extent_tree || init_csum_tree) {
11427                 struct btrfs_trans_handle *trans;
11428
11429                 trans = btrfs_start_transaction(info->extent_root, 0);
11430                 if (IS_ERR(trans)) {
11431                         fprintf(stderr, "Error starting transaction\n");
11432                         ret = PTR_ERR(trans);
11433                         goto close_out;
11434                 }
11435
11436                 if (init_extent_tree) {
11437                         printf("Creating a new extent tree\n");
11438                         ret = reinit_extent_tree(trans, info);
11439                         if (ret)
11440                                 goto close_out;
11441                 }
11442
11443                 if (init_csum_tree) {
11444                         fprintf(stderr, "Reinit crc root\n");
11445                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11446                         if (ret) {
11447                                 fprintf(stderr, "crc root initialization failed\n");
11448                                 ret = -EIO;
11449                                 goto close_out;
11450                         }
11451
11452                         ret = fill_csum_tree(trans, info->csum_root,
11453                                              init_extent_tree);
11454                         if (ret) {
11455                                 fprintf(stderr, "crc refilling failed\n");
11456                                 return -EIO;
11457                         }
11458                 }
11459                 /*
11460                  * Ok now we commit and run the normal fsck, which will add
11461                  * extent entries for all of the items it finds.
11462                  */
11463                 ret = btrfs_commit_transaction(trans, info->extent_root);
11464                 if (ret)
11465                         goto close_out;
11466         }
11467         if (!extent_buffer_uptodate(info->extent_root->node)) {
11468                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11469                 ret = -EIO;
11470                 goto close_out;
11471         }
11472         if (!extent_buffer_uptodate(info->csum_root->node)) {
11473                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11474                 ret = -EIO;
11475                 goto close_out;
11476         }
11477
11478         if (!ctx.progress_enabled)
11479                 fprintf(stderr, "checking extents\n");
11480         if (check_mode == CHECK_MODE_LOWMEM)
11481                 ret = check_chunks_and_extents_v2(root);
11482         else
11483                 ret = check_chunks_and_extents(root);
11484         if (ret)
11485                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11486
11487         ret = repair_root_items(info);
11488         if (ret < 0)
11489                 goto close_out;
11490         if (repair) {
11491                 fprintf(stderr, "Fixed %d roots.\n", ret);
11492                 ret = 0;
11493         } else if (ret > 0) {
11494                 fprintf(stderr,
11495                        "Found %d roots with an outdated root item.\n",
11496                        ret);
11497                 fprintf(stderr,
11498                         "Please run a filesystem check with the option --repair to fix them.\n");
11499                 ret = 1;
11500                 goto close_out;
11501         }
11502
11503         if (!ctx.progress_enabled) {
11504                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11505                         fprintf(stderr, "checking free space tree\n");
11506                 else
11507                         fprintf(stderr, "checking free space cache\n");
11508         }
11509         ret = check_space_cache(root);
11510         if (ret)
11511                 goto out;
11512
11513         /*
11514          * We used to have to have these hole extents in between our real
11515          * extents so if we don't have this flag set we need to make sure there
11516          * are no gaps in the file extents for inodes, otherwise we can just
11517          * ignore it when this happens.
11518          */
11519         no_holes = btrfs_fs_incompat(root->fs_info,
11520                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11521         if (!ctx.progress_enabled)
11522                 fprintf(stderr, "checking fs roots\n");
11523         ret = check_fs_roots(root, &root_cache);
11524         if (ret)
11525                 goto out;
11526
11527         fprintf(stderr, "checking csums\n");
11528         ret = check_csums(root);
11529         if (ret)
11530                 goto out;
11531
11532         fprintf(stderr, "checking root refs\n");
11533         ret = check_root_refs(root, &root_cache);
11534         if (ret)
11535                 goto out;
11536
11537         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11538                 struct extent_buffer *eb;
11539
11540                 eb = list_first_entry(&root->fs_info->recow_ebs,
11541                                       struct extent_buffer, recow);
11542                 list_del_init(&eb->recow);
11543                 ret = recow_extent_buffer(root, eb);
11544                 if (ret)
11545                         break;
11546         }
11547
11548         while (!list_empty(&delete_items)) {
11549                 struct bad_item *bad;
11550
11551                 bad = list_first_entry(&delete_items, struct bad_item, list);
11552                 list_del_init(&bad->list);
11553                 if (repair)
11554                         ret = delete_bad_item(root, bad);
11555                 free(bad);
11556         }
11557
11558         if (info->quota_enabled) {
11559                 int err;
11560                 fprintf(stderr, "checking quota groups\n");
11561                 err = qgroup_verify_all(info);
11562                 if (err)
11563                         goto out;
11564                 report_qgroups(0);
11565                 err = repair_qgroups(info, &qgroups_repaired);
11566                 if (err)
11567                         goto out;
11568         }
11569
11570         if (!list_empty(&root->fs_info->recow_ebs)) {
11571                 fprintf(stderr, "Transid errors in file system\n");
11572                 ret = 1;
11573         }
11574 out:
11575         /* Don't override original ret */
11576         if (!ret && qgroups_repaired)
11577                 ret = qgroups_repaired;
11578
11579         if (found_old_backref) { /*
11580                  * there was a disk format change when mixed
11581                  * backref was in testing tree. The old format
11582                  * existed about one week.
11583                  */
11584                 printf("\n * Found old mixed backref format. "
11585                        "The old format is not supported! *"
11586                        "\n * Please mount the FS in readonly mode, "
11587                        "backup data and re-format the FS. *\n\n");
11588                 ret = 1;
11589         }
11590         printf("found %llu bytes used err is %d\n",
11591                (unsigned long long)bytes_used, ret);
11592         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11593         printf("total tree bytes: %llu\n",
11594                (unsigned long long)total_btree_bytes);
11595         printf("total fs tree bytes: %llu\n",
11596                (unsigned long long)total_fs_tree_bytes);
11597         printf("total extent tree bytes: %llu\n",
11598                (unsigned long long)total_extent_tree_bytes);
11599         printf("btree space waste bytes: %llu\n",
11600                (unsigned long long)btree_space_waste);
11601         printf("file data blocks allocated: %llu\n referenced %llu\n",
11602                 (unsigned long long)data_bytes_allocated,
11603                 (unsigned long long)data_bytes_referenced);
11604
11605         free_qgroup_counts();
11606         free_root_recs_tree(&root_cache);
11607 close_out:
11608         close_ctree(root);
11609 err_out:
11610         if (ctx.progress_enabled)
11611                 task_deinit(ctx.info);
11612
11613         return ret;
11614 }