btrfs-progs: check: make low memory mode support partially dropped snapshots
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         unsigned int filetype:8;
189         int errors;
190         unsigned int ref_type;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         size_t size;
663         int ret;
664
665         rec = malloc(sizeof(*rec));
666         if (!rec)
667                 return ERR_PTR(-ENOMEM);
668         memcpy(rec, orig_rec, sizeof(*rec));
669         rec->refs = 1;
670         INIT_LIST_HEAD(&rec->backrefs);
671         INIT_LIST_HEAD(&rec->orphan_extents);
672         rec->holes = RB_ROOT;
673
674         list_for_each_entry(orig, &orig_rec->backrefs, list) {
675                 size = sizeof(*orig) + orig->namelen + 1;
676                 backref = malloc(size);
677                 if (!backref) {
678                         ret = -ENOMEM;
679                         goto cleanup;
680                 }
681                 memcpy(backref, orig, size);
682                 list_add_tail(&backref->list, &rec->backrefs);
683         }
684         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
685                 dst_orphan = malloc(sizeof(*dst_orphan));
686                 if (!dst_orphan) {
687                         ret = -ENOMEM;
688                         goto cleanup;
689                 }
690                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
691                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
692         }
693         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
694         BUG_ON(ret < 0);
695
696         return rec;
697
698 cleanup:
699         if (!list_empty(&rec->backrefs))
700                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
701                         list_del(&orig->list);
702                         free(orig);
703                 }
704
705         if (!list_empty(&rec->orphan_extents))
706                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
707                         list_del(&orig->list);
708                         free(orig);
709                 }
710
711         free(rec);
712
713         return ERR_PTR(ret);
714 }
715
716 static void print_orphan_data_extents(struct list_head *orphan_extents,
717                                       u64 objectid)
718 {
719         struct orphan_data_extent *orphan;
720
721         if (list_empty(orphan_extents))
722                 return;
723         printf("The following data extent is lost in tree %llu:\n",
724                objectid);
725         list_for_each_entry(orphan, orphan_extents, list) {
726                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
727                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
728                        orphan->disk_len);
729         }
730 }
731
732 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
733 {
734         u64 root_objectid = root->root_key.objectid;
735         int errors = rec->errors;
736
737         if (!errors)
738                 return;
739         /* reloc root errors, we print its corresponding fs root objectid*/
740         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
741                 root_objectid = root->root_key.offset;
742                 fprintf(stderr, "reloc");
743         }
744         fprintf(stderr, "root %llu inode %llu errors %x",
745                 (unsigned long long) root_objectid,
746                 (unsigned long long) rec->ino, rec->errors);
747
748         if (errors & I_ERR_NO_INODE_ITEM)
749                 fprintf(stderr, ", no inode item");
750         if (errors & I_ERR_NO_ORPHAN_ITEM)
751                 fprintf(stderr, ", no orphan item");
752         if (errors & I_ERR_DUP_INODE_ITEM)
753                 fprintf(stderr, ", dup inode item");
754         if (errors & I_ERR_DUP_DIR_INDEX)
755                 fprintf(stderr, ", dup dir index");
756         if (errors & I_ERR_ODD_DIR_ITEM)
757                 fprintf(stderr, ", odd dir item");
758         if (errors & I_ERR_ODD_FILE_EXTENT)
759                 fprintf(stderr, ", odd file extent");
760         if (errors & I_ERR_BAD_FILE_EXTENT)
761                 fprintf(stderr, ", bad file extent");
762         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
763                 fprintf(stderr, ", file extent overlap");
764         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
765                 fprintf(stderr, ", file extent discount");
766         if (errors & I_ERR_DIR_ISIZE_WRONG)
767                 fprintf(stderr, ", dir isize wrong");
768         if (errors & I_ERR_FILE_NBYTES_WRONG)
769                 fprintf(stderr, ", nbytes wrong");
770         if (errors & I_ERR_ODD_CSUM_ITEM)
771                 fprintf(stderr, ", odd csum item");
772         if (errors & I_ERR_SOME_CSUM_MISSING)
773                 fprintf(stderr, ", some csum missing");
774         if (errors & I_ERR_LINK_COUNT_WRONG)
775                 fprintf(stderr, ", link count wrong");
776         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
777                 fprintf(stderr, ", orphan file extent");
778         fprintf(stderr, "\n");
779         /* Print the orphan extents if needed */
780         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
781                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
782
783         /* Print the holes if needed */
784         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
785                 struct file_extent_hole *hole;
786                 struct rb_node *node;
787                 int found = 0;
788
789                 node = rb_first(&rec->holes);
790                 fprintf(stderr, "Found file extent holes:\n");
791                 while (node) {
792                         found = 1;
793                         hole = rb_entry(node, struct file_extent_hole, node);
794                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
795                                 hole->start, hole->len);
796                         node = rb_next(node);
797                 }
798                 if (!found)
799                         fprintf(stderr, "\tstart: 0, len: %llu\n",
800                                 round_up(rec->isize, root->sectorsize));
801         }
802 }
803
804 static void print_ref_error(int errors)
805 {
806         if (errors & REF_ERR_NO_DIR_ITEM)
807                 fprintf(stderr, ", no dir item");
808         if (errors & REF_ERR_NO_DIR_INDEX)
809                 fprintf(stderr, ", no dir index");
810         if (errors & REF_ERR_NO_INODE_REF)
811                 fprintf(stderr, ", no inode ref");
812         if (errors & REF_ERR_DUP_DIR_ITEM)
813                 fprintf(stderr, ", dup dir item");
814         if (errors & REF_ERR_DUP_DIR_INDEX)
815                 fprintf(stderr, ", dup dir index");
816         if (errors & REF_ERR_DUP_INODE_REF)
817                 fprintf(stderr, ", dup inode ref");
818         if (errors & REF_ERR_INDEX_UNMATCH)
819                 fprintf(stderr, ", index mismatch");
820         if (errors & REF_ERR_FILETYPE_UNMATCH)
821                 fprintf(stderr, ", filetype mismatch");
822         if (errors & REF_ERR_NAME_TOO_LONG)
823                 fprintf(stderr, ", name too long");
824         if (errors & REF_ERR_NO_ROOT_REF)
825                 fprintf(stderr, ", no root ref");
826         if (errors & REF_ERR_NO_ROOT_BACKREF)
827                 fprintf(stderr, ", no root backref");
828         if (errors & REF_ERR_DUP_ROOT_REF)
829                 fprintf(stderr, ", dup root ref");
830         if (errors & REF_ERR_DUP_ROOT_BACKREF)
831                 fprintf(stderr, ", dup root backref");
832         fprintf(stderr, "\n");
833 }
834
835 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
836                                           u64 ino, int mod)
837 {
838         struct ptr_node *node;
839         struct cache_extent *cache;
840         struct inode_record *rec = NULL;
841         int ret;
842
843         cache = lookup_cache_extent(inode_cache, ino, 1);
844         if (cache) {
845                 node = container_of(cache, struct ptr_node, cache);
846                 rec = node->data;
847                 if (mod && rec->refs > 1) {
848                         node->data = clone_inode_rec(rec);
849                         if (IS_ERR(node->data))
850                                 return node->data;
851                         rec->refs--;
852                         rec = node->data;
853                 }
854         } else if (mod) {
855                 rec = calloc(1, sizeof(*rec));
856                 if (!rec)
857                         return ERR_PTR(-ENOMEM);
858                 rec->ino = ino;
859                 rec->extent_start = (u64)-1;
860                 rec->refs = 1;
861                 INIT_LIST_HEAD(&rec->backrefs);
862                 INIT_LIST_HEAD(&rec->orphan_extents);
863                 rec->holes = RB_ROOT;
864
865                 node = malloc(sizeof(*node));
866                 if (!node) {
867                         free(rec);
868                         return ERR_PTR(-ENOMEM);
869                 }
870                 node->cache.start = ino;
871                 node->cache.size = 1;
872                 node->data = rec;
873
874                 if (ino == BTRFS_FREE_INO_OBJECTID)
875                         rec->found_link = 1;
876
877                 ret = insert_cache_extent(inode_cache, &node->cache);
878                 if (ret)
879                         return ERR_PTR(-EEXIST);
880         }
881         return rec;
882 }
883
884 static void free_orphan_data_extents(struct list_head *orphan_extents)
885 {
886         struct orphan_data_extent *orphan;
887
888         while (!list_empty(orphan_extents)) {
889                 orphan = list_entry(orphan_extents->next,
890                                     struct orphan_data_extent, list);
891                 list_del(&orphan->list);
892                 free(orphan);
893         }
894 }
895
896 static void free_inode_rec(struct inode_record *rec)
897 {
898         struct inode_backref *backref;
899
900         if (--rec->refs > 0)
901                 return;
902
903         while (!list_empty(&rec->backrefs)) {
904                 backref = to_inode_backref(rec->backrefs.next);
905                 list_del(&backref->list);
906                 free(backref);
907         }
908         free_orphan_data_extents(&rec->orphan_extents);
909         free_file_extent_holes(&rec->holes);
910         free(rec);
911 }
912
913 static int can_free_inode_rec(struct inode_record *rec)
914 {
915         if (!rec->errors && rec->checked && rec->found_inode_item &&
916             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
917                 return 1;
918         return 0;
919 }
920
921 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
922                                  struct inode_record *rec)
923 {
924         struct cache_extent *cache;
925         struct inode_backref *tmp, *backref;
926         struct ptr_node *node;
927         unsigned char filetype;
928
929         if (!rec->found_inode_item)
930                 return;
931
932         filetype = imode_to_type(rec->imode);
933         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
934                 if (backref->found_dir_item && backref->found_dir_index) {
935                         if (backref->filetype != filetype)
936                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
937                         if (!backref->errors && backref->found_inode_ref &&
938                             rec->nlink == rec->found_link) {
939                                 list_del(&backref->list);
940                                 free(backref);
941                         }
942                 }
943         }
944
945         if (!rec->checked || rec->merging)
946                 return;
947
948         if (S_ISDIR(rec->imode)) {
949                 if (rec->found_size != rec->isize)
950                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
951                 if (rec->found_file_extent)
952                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
953         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
954                 if (rec->found_dir_item)
955                         rec->errors |= I_ERR_ODD_DIR_ITEM;
956                 if (rec->found_size != rec->nbytes)
957                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
958                 if (rec->nlink > 0 && !no_holes &&
959                     (rec->extent_end < rec->isize ||
960                      first_extent_gap(&rec->holes) < rec->isize))
961                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
962         }
963
964         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
965                 if (rec->found_csum_item && rec->nodatasum)
966                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
967                 if (rec->some_csum_missing && !rec->nodatasum)
968                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
969         }
970
971         BUG_ON(rec->refs != 1);
972         if (can_free_inode_rec(rec)) {
973                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
974                 node = container_of(cache, struct ptr_node, cache);
975                 BUG_ON(node->data != rec);
976                 remove_cache_extent(inode_cache, &node->cache);
977                 free(node);
978                 free_inode_rec(rec);
979         }
980 }
981
982 static int check_orphan_item(struct btrfs_root *root, u64 ino)
983 {
984         struct btrfs_path path;
985         struct btrfs_key key;
986         int ret;
987
988         key.objectid = BTRFS_ORPHAN_OBJECTID;
989         key.type = BTRFS_ORPHAN_ITEM_KEY;
990         key.offset = ino;
991
992         btrfs_init_path(&path);
993         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
994         btrfs_release_path(&path);
995         if (ret > 0)
996                 ret = -ENOENT;
997         return ret;
998 }
999
1000 static int process_inode_item(struct extent_buffer *eb,
1001                               int slot, struct btrfs_key *key,
1002                               struct shared_node *active_node)
1003 {
1004         struct inode_record *rec;
1005         struct btrfs_inode_item *item;
1006
1007         rec = active_node->current;
1008         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1009         if (rec->found_inode_item) {
1010                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1011                 return 1;
1012         }
1013         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1014         rec->nlink = btrfs_inode_nlink(eb, item);
1015         rec->isize = btrfs_inode_size(eb, item);
1016         rec->nbytes = btrfs_inode_nbytes(eb, item);
1017         rec->imode = btrfs_inode_mode(eb, item);
1018         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1019                 rec->nodatasum = 1;
1020         rec->found_inode_item = 1;
1021         if (rec->nlink == 0)
1022                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1023         maybe_free_inode_rec(&active_node->inode_cache, rec);
1024         return 0;
1025 }
1026
1027 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1028                                                 const char *name,
1029                                                 int namelen, u64 dir)
1030 {
1031         struct inode_backref *backref;
1032
1033         list_for_each_entry(backref, &rec->backrefs, list) {
1034                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1035                         break;
1036                 if (backref->dir != dir || backref->namelen != namelen)
1037                         continue;
1038                 if (memcmp(name, backref->name, namelen))
1039                         continue;
1040                 return backref;
1041         }
1042
1043         backref = malloc(sizeof(*backref) + namelen + 1);
1044         if (!backref)
1045                 return NULL;
1046         memset(backref, 0, sizeof(*backref));
1047         backref->dir = dir;
1048         backref->namelen = namelen;
1049         memcpy(backref->name, name, namelen);
1050         backref->name[namelen] = '\0';
1051         list_add_tail(&backref->list, &rec->backrefs);
1052         return backref;
1053 }
1054
1055 static int add_inode_backref(struct cache_tree *inode_cache,
1056                              u64 ino, u64 dir, u64 index,
1057                              const char *name, int namelen,
1058                              int filetype, int itemtype, int errors)
1059 {
1060         struct inode_record *rec;
1061         struct inode_backref *backref;
1062
1063         rec = get_inode_rec(inode_cache, ino, 1);
1064         BUG_ON(IS_ERR(rec));
1065         backref = get_inode_backref(rec, name, namelen, dir);
1066         BUG_ON(!backref);
1067         if (errors)
1068                 backref->errors |= errors;
1069         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1070                 if (backref->found_dir_index)
1071                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1072                 if (backref->found_inode_ref && backref->index != index)
1073                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1074                 if (backref->found_dir_item && backref->filetype != filetype)
1075                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1076
1077                 backref->index = index;
1078                 backref->filetype = filetype;
1079                 backref->found_dir_index = 1;
1080         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1081                 rec->found_link++;
1082                 if (backref->found_dir_item)
1083                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1084                 if (backref->found_dir_index && backref->filetype != filetype)
1085                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1086
1087                 backref->filetype = filetype;
1088                 backref->found_dir_item = 1;
1089         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1090                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1091                 if (backref->found_inode_ref)
1092                         backref->errors |= REF_ERR_DUP_INODE_REF;
1093                 if (backref->found_dir_index && backref->index != index)
1094                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1095                 else
1096                         backref->index = index;
1097
1098                 backref->ref_type = itemtype;
1099                 backref->found_inode_ref = 1;
1100         } else {
1101                 BUG_ON(1);
1102         }
1103
1104         maybe_free_inode_rec(inode_cache, rec);
1105         return 0;
1106 }
1107
1108 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1109                             struct cache_tree *dst_cache)
1110 {
1111         struct inode_backref *backref;
1112         u32 dir_count = 0;
1113         int ret = 0;
1114
1115         dst->merging = 1;
1116         list_for_each_entry(backref, &src->backrefs, list) {
1117                 if (backref->found_dir_index) {
1118                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1119                                         backref->index, backref->name,
1120                                         backref->namelen, backref->filetype,
1121                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1122                 }
1123                 if (backref->found_dir_item) {
1124                         dir_count++;
1125                         add_inode_backref(dst_cache, dst->ino,
1126                                         backref->dir, 0, backref->name,
1127                                         backref->namelen, backref->filetype,
1128                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1129                 }
1130                 if (backref->found_inode_ref) {
1131                         add_inode_backref(dst_cache, dst->ino,
1132                                         backref->dir, backref->index,
1133                                         backref->name, backref->namelen, 0,
1134                                         backref->ref_type, backref->errors);
1135                 }
1136         }
1137
1138         if (src->found_dir_item)
1139                 dst->found_dir_item = 1;
1140         if (src->found_file_extent)
1141                 dst->found_file_extent = 1;
1142         if (src->found_csum_item)
1143                 dst->found_csum_item = 1;
1144         if (src->some_csum_missing)
1145                 dst->some_csum_missing = 1;
1146         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1147                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1148                 if (ret < 0)
1149                         return ret;
1150         }
1151
1152         BUG_ON(src->found_link < dir_count);
1153         dst->found_link += src->found_link - dir_count;
1154         dst->found_size += src->found_size;
1155         if (src->extent_start != (u64)-1) {
1156                 if (dst->extent_start == (u64)-1) {
1157                         dst->extent_start = src->extent_start;
1158                         dst->extent_end = src->extent_end;
1159                 } else {
1160                         if (dst->extent_end > src->extent_start)
1161                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1162                         else if (dst->extent_end < src->extent_start) {
1163                                 ret = add_file_extent_hole(&dst->holes,
1164                                         dst->extent_end,
1165                                         src->extent_start - dst->extent_end);
1166                         }
1167                         if (dst->extent_end < src->extent_end)
1168                                 dst->extent_end = src->extent_end;
1169                 }
1170         }
1171
1172         dst->errors |= src->errors;
1173         if (src->found_inode_item) {
1174                 if (!dst->found_inode_item) {
1175                         dst->nlink = src->nlink;
1176                         dst->isize = src->isize;
1177                         dst->nbytes = src->nbytes;
1178                         dst->imode = src->imode;
1179                         dst->nodatasum = src->nodatasum;
1180                         dst->found_inode_item = 1;
1181                 } else {
1182                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1183                 }
1184         }
1185         dst->merging = 0;
1186
1187         return 0;
1188 }
1189
1190 static int splice_shared_node(struct shared_node *src_node,
1191                               struct shared_node *dst_node)
1192 {
1193         struct cache_extent *cache;
1194         struct ptr_node *node, *ins;
1195         struct cache_tree *src, *dst;
1196         struct inode_record *rec, *conflict;
1197         u64 current_ino = 0;
1198         int splice = 0;
1199         int ret;
1200
1201         if (--src_node->refs == 0)
1202                 splice = 1;
1203         if (src_node->current)
1204                 current_ino = src_node->current->ino;
1205
1206         src = &src_node->root_cache;
1207         dst = &dst_node->root_cache;
1208 again:
1209         cache = search_cache_extent(src, 0);
1210         while (cache) {
1211                 node = container_of(cache, struct ptr_node, cache);
1212                 rec = node->data;
1213                 cache = next_cache_extent(cache);
1214
1215                 if (splice) {
1216                         remove_cache_extent(src, &node->cache);
1217                         ins = node;
1218                 } else {
1219                         ins = malloc(sizeof(*ins));
1220                         BUG_ON(!ins);
1221                         ins->cache.start = node->cache.start;
1222                         ins->cache.size = node->cache.size;
1223                         ins->data = rec;
1224                         rec->refs++;
1225                 }
1226                 ret = insert_cache_extent(dst, &ins->cache);
1227                 if (ret == -EEXIST) {
1228                         conflict = get_inode_rec(dst, rec->ino, 1);
1229                         BUG_ON(IS_ERR(conflict));
1230                         merge_inode_recs(rec, conflict, dst);
1231                         if (rec->checked) {
1232                                 conflict->checked = 1;
1233                                 if (dst_node->current == conflict)
1234                                         dst_node->current = NULL;
1235                         }
1236                         maybe_free_inode_rec(dst, conflict);
1237                         free_inode_rec(rec);
1238                         free(ins);
1239                 } else {
1240                         BUG_ON(ret);
1241                 }
1242         }
1243
1244         if (src == &src_node->root_cache) {
1245                 src = &src_node->inode_cache;
1246                 dst = &dst_node->inode_cache;
1247                 goto again;
1248         }
1249
1250         if (current_ino > 0 && (!dst_node->current ||
1251             current_ino > dst_node->current->ino)) {
1252                 if (dst_node->current) {
1253                         dst_node->current->checked = 1;
1254                         maybe_free_inode_rec(dst, dst_node->current);
1255                 }
1256                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1257                 BUG_ON(IS_ERR(dst_node->current));
1258         }
1259         return 0;
1260 }
1261
1262 static void free_inode_ptr(struct cache_extent *cache)
1263 {
1264         struct ptr_node *node;
1265         struct inode_record *rec;
1266
1267         node = container_of(cache, struct ptr_node, cache);
1268         rec = node->data;
1269         free_inode_rec(rec);
1270         free(node);
1271 }
1272
1273 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1274
1275 static struct shared_node *find_shared_node(struct cache_tree *shared,
1276                                             u64 bytenr)
1277 {
1278         struct cache_extent *cache;
1279         struct shared_node *node;
1280
1281         cache = lookup_cache_extent(shared, bytenr, 1);
1282         if (cache) {
1283                 node = container_of(cache, struct shared_node, cache);
1284                 return node;
1285         }
1286         return NULL;
1287 }
1288
1289 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1290 {
1291         int ret;
1292         struct shared_node *node;
1293
1294         node = calloc(1, sizeof(*node));
1295         if (!node)
1296                 return -ENOMEM;
1297         node->cache.start = bytenr;
1298         node->cache.size = 1;
1299         cache_tree_init(&node->root_cache);
1300         cache_tree_init(&node->inode_cache);
1301         node->refs = refs;
1302
1303         ret = insert_cache_extent(shared, &node->cache);
1304
1305         return ret;
1306 }
1307
1308 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1309                              struct walk_control *wc, int level)
1310 {
1311         struct shared_node *node;
1312         struct shared_node *dest;
1313         int ret;
1314
1315         if (level == wc->active_node)
1316                 return 0;
1317
1318         BUG_ON(wc->active_node <= level);
1319         node = find_shared_node(&wc->shared, bytenr);
1320         if (!node) {
1321                 ret = add_shared_node(&wc->shared, bytenr, refs);
1322                 BUG_ON(ret);
1323                 node = find_shared_node(&wc->shared, bytenr);
1324                 wc->nodes[level] = node;
1325                 wc->active_node = level;
1326                 return 0;
1327         }
1328
1329         if (wc->root_level == wc->active_node &&
1330             btrfs_root_refs(&root->root_item) == 0) {
1331                 if (--node->refs == 0) {
1332                         free_inode_recs_tree(&node->root_cache);
1333                         free_inode_recs_tree(&node->inode_cache);
1334                         remove_cache_extent(&wc->shared, &node->cache);
1335                         free(node);
1336                 }
1337                 return 1;
1338         }
1339
1340         dest = wc->nodes[wc->active_node];
1341         splice_shared_node(node, dest);
1342         if (node->refs == 0) {
1343                 remove_cache_extent(&wc->shared, &node->cache);
1344                 free(node);
1345         }
1346         return 1;
1347 }
1348
1349 static int leave_shared_node(struct btrfs_root *root,
1350                              struct walk_control *wc, int level)
1351 {
1352         struct shared_node *node;
1353         struct shared_node *dest;
1354         int i;
1355
1356         if (level == wc->root_level)
1357                 return 0;
1358
1359         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1360                 if (wc->nodes[i])
1361                         break;
1362         }
1363         BUG_ON(i >= BTRFS_MAX_LEVEL);
1364
1365         node = wc->nodes[wc->active_node];
1366         wc->nodes[wc->active_node] = NULL;
1367         wc->active_node = i;
1368
1369         dest = wc->nodes[wc->active_node];
1370         if (wc->active_node < wc->root_level ||
1371             btrfs_root_refs(&root->root_item) > 0) {
1372                 BUG_ON(node->refs <= 1);
1373                 splice_shared_node(node, dest);
1374         } else {
1375                 BUG_ON(node->refs < 2);
1376                 node->refs--;
1377         }
1378         return 0;
1379 }
1380
1381 /*
1382  * Returns:
1383  * < 0 - on error
1384  * 1   - if the root with id child_root_id is a child of root parent_root_id
1385  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1386  *       has other root(s) as parent(s)
1387  * 2   - if the root child_root_id doesn't have any parent roots
1388  */
1389 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1390                          u64 child_root_id)
1391 {
1392         struct btrfs_path path;
1393         struct btrfs_key key;
1394         struct extent_buffer *leaf;
1395         int has_parent = 0;
1396         int ret;
1397
1398         btrfs_init_path(&path);
1399
1400         key.objectid = parent_root_id;
1401         key.type = BTRFS_ROOT_REF_KEY;
1402         key.offset = child_root_id;
1403         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1404                                 0, 0);
1405         if (ret < 0)
1406                 return ret;
1407         btrfs_release_path(&path);
1408         if (!ret)
1409                 return 1;
1410
1411         key.objectid = child_root_id;
1412         key.type = BTRFS_ROOT_BACKREF_KEY;
1413         key.offset = 0;
1414         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1415                                 0, 0);
1416         if (ret < 0)
1417                 goto out;
1418
1419         while (1) {
1420                 leaf = path.nodes[0];
1421                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1422                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1423                         if (ret)
1424                                 break;
1425                         leaf = path.nodes[0];
1426                 }
1427
1428                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1429                 if (key.objectid != child_root_id ||
1430                     key.type != BTRFS_ROOT_BACKREF_KEY)
1431                         break;
1432
1433                 has_parent = 1;
1434
1435                 if (key.offset == parent_root_id) {
1436                         btrfs_release_path(&path);
1437                         return 1;
1438                 }
1439
1440                 path.slots[0]++;
1441         }
1442 out:
1443         btrfs_release_path(&path);
1444         if (ret < 0)
1445                 return ret;
1446         return has_parent ? 0 : 2;
1447 }
1448
1449 static int process_dir_item(struct btrfs_root *root,
1450                             struct extent_buffer *eb,
1451                             int slot, struct btrfs_key *key,
1452                             struct shared_node *active_node)
1453 {
1454         u32 total;
1455         u32 cur = 0;
1456         u32 len;
1457         u32 name_len;
1458         u32 data_len;
1459         int error;
1460         int nritems = 0;
1461         int filetype;
1462         struct btrfs_dir_item *di;
1463         struct inode_record *rec;
1464         struct cache_tree *root_cache;
1465         struct cache_tree *inode_cache;
1466         struct btrfs_key location;
1467         char namebuf[BTRFS_NAME_LEN];
1468
1469         root_cache = &active_node->root_cache;
1470         inode_cache = &active_node->inode_cache;
1471         rec = active_node->current;
1472         rec->found_dir_item = 1;
1473
1474         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1475         total = btrfs_item_size_nr(eb, slot);
1476         while (cur < total) {
1477                 nritems++;
1478                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1479                 name_len = btrfs_dir_name_len(eb, di);
1480                 data_len = btrfs_dir_data_len(eb, di);
1481                 filetype = btrfs_dir_type(eb, di);
1482
1483                 rec->found_size += name_len;
1484                 if (name_len <= BTRFS_NAME_LEN) {
1485                         len = name_len;
1486                         error = 0;
1487                 } else {
1488                         len = BTRFS_NAME_LEN;
1489                         error = REF_ERR_NAME_TOO_LONG;
1490                 }
1491                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1492
1493                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1494                         add_inode_backref(inode_cache, location.objectid,
1495                                           key->objectid, key->offset, namebuf,
1496                                           len, filetype, key->type, error);
1497                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1498                         add_inode_backref(root_cache, location.objectid,
1499                                           key->objectid, key->offset,
1500                                           namebuf, len, filetype,
1501                                           key->type, error);
1502                 } else {
1503                         fprintf(stderr, "invalid location in dir item %u\n",
1504                                 location.type);
1505                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1506                                           key->objectid, key->offset, namebuf,
1507                                           len, filetype, key->type, error);
1508                 }
1509
1510                 len = sizeof(*di) + name_len + data_len;
1511                 di = (struct btrfs_dir_item *)((char *)di + len);
1512                 cur += len;
1513         }
1514         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1515                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1516
1517         return 0;
1518 }
1519
1520 static int process_inode_ref(struct extent_buffer *eb,
1521                              int slot, struct btrfs_key *key,
1522                              struct shared_node *active_node)
1523 {
1524         u32 total;
1525         u32 cur = 0;
1526         u32 len;
1527         u32 name_len;
1528         u64 index;
1529         int error;
1530         struct cache_tree *inode_cache;
1531         struct btrfs_inode_ref *ref;
1532         char namebuf[BTRFS_NAME_LEN];
1533
1534         inode_cache = &active_node->inode_cache;
1535
1536         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1537         total = btrfs_item_size_nr(eb, slot);
1538         while (cur < total) {
1539                 name_len = btrfs_inode_ref_name_len(eb, ref);
1540                 index = btrfs_inode_ref_index(eb, ref);
1541                 if (name_len <= BTRFS_NAME_LEN) {
1542                         len = name_len;
1543                         error = 0;
1544                 } else {
1545                         len = BTRFS_NAME_LEN;
1546                         error = REF_ERR_NAME_TOO_LONG;
1547                 }
1548                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1549                 add_inode_backref(inode_cache, key->objectid, key->offset,
1550                                   index, namebuf, len, 0, key->type, error);
1551
1552                 len = sizeof(*ref) + name_len;
1553                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1554                 cur += len;
1555         }
1556         return 0;
1557 }
1558
1559 static int process_inode_extref(struct extent_buffer *eb,
1560                                 int slot, struct btrfs_key *key,
1561                                 struct shared_node *active_node)
1562 {
1563         u32 total;
1564         u32 cur = 0;
1565         u32 len;
1566         u32 name_len;
1567         u64 index;
1568         u64 parent;
1569         int error;
1570         struct cache_tree *inode_cache;
1571         struct btrfs_inode_extref *extref;
1572         char namebuf[BTRFS_NAME_LEN];
1573
1574         inode_cache = &active_node->inode_cache;
1575
1576         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1577         total = btrfs_item_size_nr(eb, slot);
1578         while (cur < total) {
1579                 name_len = btrfs_inode_extref_name_len(eb, extref);
1580                 index = btrfs_inode_extref_index(eb, extref);
1581                 parent = btrfs_inode_extref_parent(eb, extref);
1582                 if (name_len <= BTRFS_NAME_LEN) {
1583                         len = name_len;
1584                         error = 0;
1585                 } else {
1586                         len = BTRFS_NAME_LEN;
1587                         error = REF_ERR_NAME_TOO_LONG;
1588                 }
1589                 read_extent_buffer(eb, namebuf,
1590                                    (unsigned long)(extref + 1), len);
1591                 add_inode_backref(inode_cache, key->objectid, parent,
1592                                   index, namebuf, len, 0, key->type, error);
1593
1594                 len = sizeof(*extref) + name_len;
1595                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1596                 cur += len;
1597         }
1598         return 0;
1599
1600 }
1601
1602 static int count_csum_range(struct btrfs_root *root, u64 start,
1603                             u64 len, u64 *found)
1604 {
1605         struct btrfs_key key;
1606         struct btrfs_path path;
1607         struct extent_buffer *leaf;
1608         int ret;
1609         size_t size;
1610         *found = 0;
1611         u64 csum_end;
1612         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1613
1614         btrfs_init_path(&path);
1615
1616         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1617         key.offset = start;
1618         key.type = BTRFS_EXTENT_CSUM_KEY;
1619
1620         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1621                                 &key, &path, 0, 0);
1622         if (ret < 0)
1623                 goto out;
1624         if (ret > 0 && path.slots[0] > 0) {
1625                 leaf = path.nodes[0];
1626                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1627                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1628                     key.type == BTRFS_EXTENT_CSUM_KEY)
1629                         path.slots[0]--;
1630         }
1631
1632         while (len > 0) {
1633                 leaf = path.nodes[0];
1634                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1635                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1636                         if (ret > 0)
1637                                 break;
1638                         else if (ret < 0)
1639                                 goto out;
1640                         leaf = path.nodes[0];
1641                 }
1642
1643                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1644                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1645                     key.type != BTRFS_EXTENT_CSUM_KEY)
1646                         break;
1647
1648                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1649                 if (key.offset >= start + len)
1650                         break;
1651
1652                 if (key.offset > start)
1653                         start = key.offset;
1654
1655                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1656                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1657                 if (csum_end > start) {
1658                         size = min(csum_end - start, len);
1659                         len -= size;
1660                         start += size;
1661                         *found += size;
1662                 }
1663
1664                 path.slots[0]++;
1665         }
1666 out:
1667         btrfs_release_path(&path);
1668         if (ret < 0)
1669                 return ret;
1670         return 0;
1671 }
1672
1673 static int process_file_extent(struct btrfs_root *root,
1674                                 struct extent_buffer *eb,
1675                                 int slot, struct btrfs_key *key,
1676                                 struct shared_node *active_node)
1677 {
1678         struct inode_record *rec;
1679         struct btrfs_file_extent_item *fi;
1680         u64 num_bytes = 0;
1681         u64 disk_bytenr = 0;
1682         u64 extent_offset = 0;
1683         u64 mask = root->sectorsize - 1;
1684         int extent_type;
1685         int ret;
1686
1687         rec = active_node->current;
1688         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1689         rec->found_file_extent = 1;
1690
1691         if (rec->extent_start == (u64)-1) {
1692                 rec->extent_start = key->offset;
1693                 rec->extent_end = key->offset;
1694         }
1695
1696         if (rec->extent_end > key->offset)
1697                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1698         else if (rec->extent_end < key->offset) {
1699                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1700                                            key->offset - rec->extent_end);
1701                 if (ret < 0)
1702                         return ret;
1703         }
1704
1705         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1706         extent_type = btrfs_file_extent_type(eb, fi);
1707
1708         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1709                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1710                 if (num_bytes == 0)
1711                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1712                 rec->found_size += num_bytes;
1713                 num_bytes = (num_bytes + mask) & ~mask;
1714         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1715                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1716                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1717                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1718                 extent_offset = btrfs_file_extent_offset(eb, fi);
1719                 if (num_bytes == 0 || (num_bytes & mask))
1720                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1721                 if (num_bytes + extent_offset >
1722                     btrfs_file_extent_ram_bytes(eb, fi))
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1725                     (btrfs_file_extent_compression(eb, fi) ||
1726                      btrfs_file_extent_encryption(eb, fi) ||
1727                      btrfs_file_extent_other_encoding(eb, fi)))
1728                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1729                 if (disk_bytenr > 0)
1730                         rec->found_size += num_bytes;
1731         } else {
1732                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733         }
1734         rec->extent_end = key->offset + num_bytes;
1735
1736         /*
1737          * The data reloc tree will copy full extents into its inode and then
1738          * copy the corresponding csums.  Because the extent it copied could be
1739          * a preallocated extent that hasn't been written to yet there may be no
1740          * csums to copy, ergo we won't have csums for our file extent.  This is
1741          * ok so just don't bother checking csums if the inode belongs to the
1742          * data reloc tree.
1743          */
1744         if (disk_bytenr > 0 &&
1745             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1746                 u64 found;
1747                 if (btrfs_file_extent_compression(eb, fi))
1748                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1749                 else
1750                         disk_bytenr += extent_offset;
1751
1752                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1753                 if (ret < 0)
1754                         return ret;
1755                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1756                         if (found > 0)
1757                                 rec->found_csum_item = 1;
1758                         if (found < num_bytes)
1759                                 rec->some_csum_missing = 1;
1760                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1761                         if (found > 0)
1762                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1763                 }
1764         }
1765         return 0;
1766 }
1767
1768 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1769                             struct walk_control *wc)
1770 {
1771         struct btrfs_key key;
1772         u32 nritems;
1773         int i;
1774         int ret = 0;
1775         struct cache_tree *inode_cache;
1776         struct shared_node *active_node;
1777
1778         if (wc->root_level == wc->active_node &&
1779             btrfs_root_refs(&root->root_item) == 0)
1780                 return 0;
1781
1782         active_node = wc->nodes[wc->active_node];
1783         inode_cache = &active_node->inode_cache;
1784         nritems = btrfs_header_nritems(eb);
1785         for (i = 0; i < nritems; i++) {
1786                 btrfs_item_key_to_cpu(eb, &key, i);
1787
1788                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1789                         continue;
1790                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1791                         continue;
1792
1793                 if (active_node->current == NULL ||
1794                     active_node->current->ino < key.objectid) {
1795                         if (active_node->current) {
1796                                 active_node->current->checked = 1;
1797                                 maybe_free_inode_rec(inode_cache,
1798                                                      active_node->current);
1799                         }
1800                         active_node->current = get_inode_rec(inode_cache,
1801                                                              key.objectid, 1);
1802                         BUG_ON(IS_ERR(active_node->current));
1803                 }
1804                 switch (key.type) {
1805                 case BTRFS_DIR_ITEM_KEY:
1806                 case BTRFS_DIR_INDEX_KEY:
1807                         ret = process_dir_item(root, eb, i, &key, active_node);
1808                         break;
1809                 case BTRFS_INODE_REF_KEY:
1810                         ret = process_inode_ref(eb, i, &key, active_node);
1811                         break;
1812                 case BTRFS_INODE_EXTREF_KEY:
1813                         ret = process_inode_extref(eb, i, &key, active_node);
1814                         break;
1815                 case BTRFS_INODE_ITEM_KEY:
1816                         ret = process_inode_item(eb, i, &key, active_node);
1817                         break;
1818                 case BTRFS_EXTENT_DATA_KEY:
1819                         ret = process_file_extent(root, eb, i, &key,
1820                                                   active_node);
1821                         break;
1822                 default:
1823                         break;
1824                 };
1825         }
1826         return ret;
1827 }
1828
1829 static void reada_walk_down(struct btrfs_root *root,
1830                             struct extent_buffer *node, int slot)
1831 {
1832         u64 bytenr;
1833         u64 ptr_gen;
1834         u32 nritems;
1835         u32 blocksize;
1836         int i;
1837         int level;
1838
1839         level = btrfs_header_level(node);
1840         if (level != 1)
1841                 return;
1842
1843         nritems = btrfs_header_nritems(node);
1844         blocksize = root->nodesize;
1845         for (i = slot; i < nritems; i++) {
1846                 bytenr = btrfs_node_blockptr(node, i);
1847                 ptr_gen = btrfs_node_ptr_generation(node, i);
1848                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1849         }
1850 }
1851
1852 /*
1853  * Check the child node/leaf by the following condition:
1854  * 1. the first item key of the node/leaf should be the same with the one
1855  *    in parent.
1856  * 2. block in parent node should match the child node/leaf.
1857  * 3. generation of parent node and child's header should be consistent.
1858  *
1859  * Or the child node/leaf pointed by the key in parent is not valid.
1860  *
1861  * We hope to check leaf owner too, but since subvol may share leaves,
1862  * which makes leaf owner check not so strong, key check should be
1863  * sufficient enough for that case.
1864  */
1865 static int check_child_node(struct btrfs_root *root,
1866                             struct extent_buffer *parent, int slot,
1867                             struct extent_buffer *child)
1868 {
1869         struct btrfs_key parent_key;
1870         struct btrfs_key child_key;
1871         int ret = 0;
1872
1873         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1874         if (btrfs_header_level(child) == 0)
1875                 btrfs_item_key_to_cpu(child, &child_key, 0);
1876         else
1877                 btrfs_node_key_to_cpu(child, &child_key, 0);
1878
1879         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1880                 ret = -EINVAL;
1881                 fprintf(stderr,
1882                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1883                         parent_key.objectid, parent_key.type, parent_key.offset,
1884                         child_key.objectid, child_key.type, child_key.offset);
1885         }
1886         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1887                 ret = -EINVAL;
1888                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1889                         btrfs_node_blockptr(parent, slot),
1890                         btrfs_header_bytenr(child));
1891         }
1892         if (btrfs_node_ptr_generation(parent, slot) !=
1893             btrfs_header_generation(child)) {
1894                 ret = -EINVAL;
1895                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1896                         btrfs_header_generation(child),
1897                         btrfs_node_ptr_generation(parent, slot));
1898         }
1899         return ret;
1900 }
1901
1902 struct node_refs {
1903         u64 bytenr[BTRFS_MAX_LEVEL];
1904         u64 refs[BTRFS_MAX_LEVEL];
1905 };
1906
1907 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1908                           struct walk_control *wc, int *level,
1909                           struct node_refs *nrefs)
1910 {
1911         enum btrfs_tree_block_status status;
1912         u64 bytenr;
1913         u64 ptr_gen;
1914         struct extent_buffer *next;
1915         struct extent_buffer *cur;
1916         u32 blocksize;
1917         int ret, err = 0;
1918         u64 refs;
1919
1920         WARN_ON(*level < 0);
1921         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1922
1923         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1924                 refs = nrefs->refs[*level];
1925                 ret = 0;
1926         } else {
1927                 ret = btrfs_lookup_extent_info(NULL, root,
1928                                        path->nodes[*level]->start,
1929                                        *level, 1, &refs, NULL);
1930                 if (ret < 0) {
1931                         err = ret;
1932                         goto out;
1933                 }
1934                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1935                 nrefs->refs[*level] = refs;
1936         }
1937
1938         if (refs > 1) {
1939                 ret = enter_shared_node(root, path->nodes[*level]->start,
1940                                         refs, wc, *level);
1941                 if (ret > 0) {
1942                         err = ret;
1943                         goto out;
1944                 }
1945         }
1946
1947         while (*level >= 0) {
1948                 WARN_ON(*level < 0);
1949                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1950                 cur = path->nodes[*level];
1951
1952                 if (btrfs_header_level(cur) != *level)
1953                         WARN_ON(1);
1954
1955                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1956                         break;
1957                 if (*level == 0) {
1958                         ret = process_one_leaf(root, cur, wc);
1959                         if (ret < 0)
1960                                 err = ret;
1961                         break;
1962                 }
1963                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1964                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1965                 blocksize = root->nodesize;
1966
1967                 if (bytenr == nrefs->bytenr[*level - 1]) {
1968                         refs = nrefs->refs[*level - 1];
1969                 } else {
1970                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1971                                         *level - 1, 1, &refs, NULL);
1972                         if (ret < 0) {
1973                                 refs = 0;
1974                         } else {
1975                                 nrefs->bytenr[*level - 1] = bytenr;
1976                                 nrefs->refs[*level - 1] = refs;
1977                         }
1978                 }
1979
1980                 if (refs > 1) {
1981                         ret = enter_shared_node(root, bytenr, refs,
1982                                                 wc, *level - 1);
1983                         if (ret > 0) {
1984                                 path->slots[*level]++;
1985                                 continue;
1986                         }
1987                 }
1988
1989                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1990                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1991                         free_extent_buffer(next);
1992                         reada_walk_down(root, cur, path->slots[*level]);
1993                         next = read_tree_block(root, bytenr, blocksize,
1994                                                ptr_gen);
1995                         if (!extent_buffer_uptodate(next)) {
1996                                 struct btrfs_key node_key;
1997
1998                                 btrfs_node_key_to_cpu(path->nodes[*level],
1999                                                       &node_key,
2000                                                       path->slots[*level]);
2001                                 btrfs_add_corrupt_extent_record(root->fs_info,
2002                                                 &node_key,
2003                                                 path->nodes[*level]->start,
2004                                                 root->nodesize, *level);
2005                                 err = -EIO;
2006                                 goto out;
2007                         }
2008                 }
2009
2010                 ret = check_child_node(root, cur, path->slots[*level], next);
2011                 if (ret) {
2012                         err = ret;
2013                         goto out;
2014                 }
2015
2016                 if (btrfs_is_leaf(next))
2017                         status = btrfs_check_leaf(root, NULL, next);
2018                 else
2019                         status = btrfs_check_node(root, NULL, next);
2020                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2021                         free_extent_buffer(next);
2022                         err = -EIO;
2023                         goto out;
2024                 }
2025
2026                 *level = *level - 1;
2027                 free_extent_buffer(path->nodes[*level]);
2028                 path->nodes[*level] = next;
2029                 path->slots[*level] = 0;
2030         }
2031 out:
2032         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2033         return err;
2034 }
2035
2036 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2037                         struct walk_control *wc, int *level)
2038 {
2039         int i;
2040         struct extent_buffer *leaf;
2041
2042         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2043                 leaf = path->nodes[i];
2044                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2045                         path->slots[i]++;
2046                         *level = i;
2047                         return 0;
2048                 } else {
2049                         free_extent_buffer(path->nodes[*level]);
2050                         path->nodes[*level] = NULL;
2051                         BUG_ON(*level > wc->active_node);
2052                         if (*level == wc->active_node)
2053                                 leave_shared_node(root, wc, *level);
2054                         *level = i + 1;
2055                 }
2056         }
2057         return 1;
2058 }
2059
2060 static int check_root_dir(struct inode_record *rec)
2061 {
2062         struct inode_backref *backref;
2063         int ret = -1;
2064
2065         if (!rec->found_inode_item || rec->errors)
2066                 goto out;
2067         if (rec->nlink != 1 || rec->found_link != 0)
2068                 goto out;
2069         if (list_empty(&rec->backrefs))
2070                 goto out;
2071         backref = to_inode_backref(rec->backrefs.next);
2072         if (!backref->found_inode_ref)
2073                 goto out;
2074         if (backref->index != 0 || backref->namelen != 2 ||
2075             memcmp(backref->name, "..", 2))
2076                 goto out;
2077         if (backref->found_dir_index || backref->found_dir_item)
2078                 goto out;
2079         ret = 0;
2080 out:
2081         return ret;
2082 }
2083
2084 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2085                               struct btrfs_root *root, struct btrfs_path *path,
2086                               struct inode_record *rec)
2087 {
2088         struct btrfs_inode_item *ei;
2089         struct btrfs_key key;
2090         int ret;
2091
2092         key.objectid = rec->ino;
2093         key.type = BTRFS_INODE_ITEM_KEY;
2094         key.offset = (u64)-1;
2095
2096         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2097         if (ret < 0)
2098                 goto out;
2099         if (ret) {
2100                 if (!path->slots[0]) {
2101                         ret = -ENOENT;
2102                         goto out;
2103                 }
2104                 path->slots[0]--;
2105                 ret = 0;
2106         }
2107         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2108         if (key.objectid != rec->ino) {
2109                 ret = -ENOENT;
2110                 goto out;
2111         }
2112
2113         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2114                             struct btrfs_inode_item);
2115         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2116         btrfs_mark_buffer_dirty(path->nodes[0]);
2117         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2118         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2119                root->root_key.objectid);
2120 out:
2121         btrfs_release_path(path);
2122         return ret;
2123 }
2124
2125 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2126                                     struct btrfs_root *root,
2127                                     struct btrfs_path *path,
2128                                     struct inode_record *rec)
2129 {
2130         int ret;
2131
2132         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2133         btrfs_release_path(path);
2134         if (!ret)
2135                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2136         return ret;
2137 }
2138
2139 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2140                                struct btrfs_root *root,
2141                                struct btrfs_path *path,
2142                                struct inode_record *rec)
2143 {
2144         struct btrfs_inode_item *ei;
2145         struct btrfs_key key;
2146         int ret = 0;
2147
2148         key.objectid = rec->ino;
2149         key.type = BTRFS_INODE_ITEM_KEY;
2150         key.offset = 0;
2151
2152         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2153         if (ret) {
2154                 if (ret > 0)
2155                         ret = -ENOENT;
2156                 goto out;
2157         }
2158
2159         /* Since ret == 0, no need to check anything */
2160         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2161                             struct btrfs_inode_item);
2162         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2163         btrfs_mark_buffer_dirty(path->nodes[0]);
2164         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2165         printf("reset nbytes for ino %llu root %llu\n",
2166                rec->ino, root->root_key.objectid);
2167 out:
2168         btrfs_release_path(path);
2169         return ret;
2170 }
2171
2172 static int add_missing_dir_index(struct btrfs_root *root,
2173                                  struct cache_tree *inode_cache,
2174                                  struct inode_record *rec,
2175                                  struct inode_backref *backref)
2176 {
2177         struct btrfs_path *path;
2178         struct btrfs_trans_handle *trans;
2179         struct btrfs_dir_item *dir_item;
2180         struct extent_buffer *leaf;
2181         struct btrfs_key key;
2182         struct btrfs_disk_key disk_key;
2183         struct inode_record *dir_rec;
2184         unsigned long name_ptr;
2185         u32 data_size = sizeof(*dir_item) + backref->namelen;
2186         int ret;
2187
2188         path = btrfs_alloc_path();
2189         if (!path)
2190                 return -ENOMEM;
2191
2192         trans = btrfs_start_transaction(root, 1);
2193         if (IS_ERR(trans)) {
2194                 btrfs_free_path(path);
2195                 return PTR_ERR(trans);
2196         }
2197
2198         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2199                 (unsigned long long)rec->ino);
2200         key.objectid = backref->dir;
2201         key.type = BTRFS_DIR_INDEX_KEY;
2202         key.offset = backref->index;
2203
2204         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2205         BUG_ON(ret);
2206
2207         leaf = path->nodes[0];
2208         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2209
2210         disk_key.objectid = cpu_to_le64(rec->ino);
2211         disk_key.type = BTRFS_INODE_ITEM_KEY;
2212         disk_key.offset = 0;
2213
2214         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2215         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2216         btrfs_set_dir_data_len(leaf, dir_item, 0);
2217         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2218         name_ptr = (unsigned long)(dir_item + 1);
2219         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2220         btrfs_mark_buffer_dirty(leaf);
2221         btrfs_free_path(path);
2222         btrfs_commit_transaction(trans, root);
2223
2224         backref->found_dir_index = 1;
2225         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2226         BUG_ON(IS_ERR(dir_rec));
2227         if (!dir_rec)
2228                 return 0;
2229         dir_rec->found_size += backref->namelen;
2230         if (dir_rec->found_size == dir_rec->isize &&
2231             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2232                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2233         if (dir_rec->found_size != dir_rec->isize)
2234                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2235
2236         return 0;
2237 }
2238
2239 static int delete_dir_index(struct btrfs_root *root,
2240                             struct cache_tree *inode_cache,
2241                             struct inode_record *rec,
2242                             struct inode_backref *backref)
2243 {
2244         struct btrfs_trans_handle *trans;
2245         struct btrfs_dir_item *di;
2246         struct btrfs_path *path;
2247         int ret = 0;
2248
2249         path = btrfs_alloc_path();
2250         if (!path)
2251                 return -ENOMEM;
2252
2253         trans = btrfs_start_transaction(root, 1);
2254         if (IS_ERR(trans)) {
2255                 btrfs_free_path(path);
2256                 return PTR_ERR(trans);
2257         }
2258
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2266                                     backref->name, backref->namelen,
2267                                     backref->index, -1);
2268         if (IS_ERR(di)) {
2269                 ret = PTR_ERR(di);
2270                 btrfs_free_path(path);
2271                 btrfs_commit_transaction(trans, root);
2272                 if (ret == -ENOENT)
2273                         return 0;
2274                 return ret;
2275         }
2276
2277         if (!di)
2278                 ret = btrfs_del_item(trans, root, path);
2279         else
2280                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2281         BUG_ON(ret);
2282         btrfs_free_path(path);
2283         btrfs_commit_transaction(trans, root);
2284         return ret;
2285 }
2286
2287 static int create_inode_item(struct btrfs_root *root,
2288                              struct inode_record *rec,
2289                              struct inode_backref *backref, int root_dir)
2290 {
2291         struct btrfs_trans_handle *trans;
2292         struct btrfs_inode_item inode_item;
2293         time_t now = time(NULL);
2294         int ret;
2295
2296         trans = btrfs_start_transaction(root, 1);
2297         if (IS_ERR(trans)) {
2298                 ret = PTR_ERR(trans);
2299                 return ret;
2300         }
2301
2302         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2303                 "be incomplete, please check permissions and content after "
2304                 "the fsck completes.\n", (unsigned long long)root->objectid,
2305                 (unsigned long long)rec->ino);
2306
2307         memset(&inode_item, 0, sizeof(inode_item));
2308         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2309         if (root_dir)
2310                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2311         else
2312                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2313         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2314         if (rec->found_dir_item) {
2315                 if (rec->found_file_extent)
2316                         fprintf(stderr, "root %llu inode %llu has both a dir "
2317                                 "item and extents, unsure if it is a dir or a "
2318                                 "regular file so setting it as a directory\n",
2319                                 (unsigned long long)root->objectid,
2320                                 (unsigned long long)rec->ino);
2321                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2322                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2323         } else if (!rec->found_dir_item) {
2324                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2325                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2326         }
2327         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2328         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2329         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2334         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2335
2336         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2337         BUG_ON(ret);
2338         btrfs_commit_transaction(trans, root);
2339         return 0;
2340 }
2341
2342 static int repair_inode_backrefs(struct btrfs_root *root,
2343                                  struct inode_record *rec,
2344                                  struct cache_tree *inode_cache,
2345                                  int delete)
2346 {
2347         struct inode_backref *tmp, *backref;
2348         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2349         int ret = 0;
2350         int repaired = 0;
2351
2352         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2353                 if (!delete && rec->ino == root_dirid) {
2354                         if (!rec->found_inode_item) {
2355                                 ret = create_inode_item(root, rec, backref, 1);
2356                                 if (ret)
2357                                         break;
2358                                 repaired++;
2359                         }
2360                 }
2361
2362                 /* Index 0 for root dir's are special, don't mess with it */
2363                 if (rec->ino == root_dirid && backref->index == 0)
2364                         continue;
2365
2366                 if (delete &&
2367                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2368                      (backref->found_dir_index && backref->found_inode_ref &&
2369                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2370                         ret = delete_dir_index(root, inode_cache, rec, backref);
2371                         if (ret)
2372                                 break;
2373                         repaired++;
2374                         list_del(&backref->list);
2375                         free(backref);
2376                 }
2377
2378                 if (!delete && !backref->found_dir_index &&
2379                     backref->found_dir_item && backref->found_inode_ref) {
2380                         ret = add_missing_dir_index(root, inode_cache, rec,
2381                                                     backref);
2382                         if (ret)
2383                                 break;
2384                         repaired++;
2385                         if (backref->found_dir_item &&
2386                             backref->found_dir_index &&
2387                             backref->found_dir_index) {
2388                                 if (!backref->errors &&
2389                                     backref->found_inode_ref) {
2390                                         list_del(&backref->list);
2391                                         free(backref);
2392                                 }
2393                         }
2394                 }
2395
2396                 if (!delete && (!backref->found_dir_index &&
2397                                 !backref->found_dir_item &&
2398                                 backref->found_inode_ref)) {
2399                         struct btrfs_trans_handle *trans;
2400                         struct btrfs_key location;
2401
2402                         ret = check_dir_conflict(root, backref->name,
2403                                                  backref->namelen,
2404                                                  backref->dir,
2405                                                  backref->index);
2406                         if (ret) {
2407                                 /*
2408                                  * let nlink fixing routine to handle it,
2409                                  * which can do it better.
2410                                  */
2411                                 ret = 0;
2412                                 break;
2413                         }
2414                         location.objectid = rec->ino;
2415                         location.type = BTRFS_INODE_ITEM_KEY;
2416                         location.offset = 0;
2417
2418                         trans = btrfs_start_transaction(root, 1);
2419                         if (IS_ERR(trans)) {
2420                                 ret = PTR_ERR(trans);
2421                                 break;
2422                         }
2423                         fprintf(stderr, "adding missing dir index/item pair "
2424                                 "for inode %llu\n",
2425                                 (unsigned long long)rec->ino);
2426                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2427                                                     backref->namelen,
2428                                                     backref->dir, &location,
2429                                                     imode_to_type(rec->imode),
2430                                                     backref->index);
2431                         BUG_ON(ret);
2432                         btrfs_commit_transaction(trans, root);
2433                         repaired++;
2434                 }
2435
2436                 if (!delete && (backref->found_inode_ref &&
2437                                 backref->found_dir_index &&
2438                                 backref->found_dir_item &&
2439                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2440                                 !rec->found_inode_item)) {
2441                         ret = create_inode_item(root, rec, backref, 0);
2442                         if (ret)
2443                                 break;
2444                         repaired++;
2445                 }
2446
2447         }
2448         return ret ? ret : repaired;
2449 }
2450
2451 /*
2452  * To determine the file type for nlink/inode_item repair
2453  *
2454  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2455  * Return -ENOENT if file type is not found.
2456  */
2457 static int find_file_type(struct inode_record *rec, u8 *type)
2458 {
2459         struct inode_backref *backref;
2460
2461         /* For inode item recovered case */
2462         if (rec->found_inode_item) {
2463                 *type = imode_to_type(rec->imode);
2464                 return 0;
2465         }
2466
2467         list_for_each_entry(backref, &rec->backrefs, list) {
2468                 if (backref->found_dir_index || backref->found_dir_item) {
2469                         *type = backref->filetype;
2470                         return 0;
2471                 }
2472         }
2473         return -ENOENT;
2474 }
2475
2476 /*
2477  * To determine the file name for nlink repair
2478  *
2479  * Return 0 if file name is found, set name and namelen.
2480  * Return -ENOENT if file name is not found.
2481  */
2482 static int find_file_name(struct inode_record *rec,
2483                           char *name, int *namelen)
2484 {
2485         struct inode_backref *backref;
2486
2487         list_for_each_entry(backref, &rec->backrefs, list) {
2488                 if (backref->found_dir_index || backref->found_dir_item ||
2489                     backref->found_inode_ref) {
2490                         memcpy(name, backref->name, backref->namelen);
2491                         *namelen = backref->namelen;
2492                         return 0;
2493                 }
2494         }
2495         return -ENOENT;
2496 }
2497
2498 /* Reset the nlink of the inode to the correct one */
2499 static int reset_nlink(struct btrfs_trans_handle *trans,
2500                        struct btrfs_root *root,
2501                        struct btrfs_path *path,
2502                        struct inode_record *rec)
2503 {
2504         struct inode_backref *backref;
2505         struct inode_backref *tmp;
2506         struct btrfs_key key;
2507         struct btrfs_inode_item *inode_item;
2508         int ret = 0;
2509
2510         /* We don't believe this either, reset it and iterate backref */
2511         rec->found_link = 0;
2512
2513         /* Remove all backref including the valid ones */
2514         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2515                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2516                                    backref->index, backref->name,
2517                                    backref->namelen, 0);
2518                 if (ret < 0)
2519                         goto out;
2520
2521                 /* remove invalid backref, so it won't be added back */
2522                 if (!(backref->found_dir_index &&
2523                       backref->found_dir_item &&
2524                       backref->found_inode_ref)) {
2525                         list_del(&backref->list);
2526                         free(backref);
2527                 } else {
2528                         rec->found_link++;
2529                 }
2530         }
2531
2532         /* Set nlink to 0 */
2533         key.objectid = rec->ino;
2534         key.type = BTRFS_INODE_ITEM_KEY;
2535         key.offset = 0;
2536         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2537         if (ret < 0)
2538                 goto out;
2539         if (ret > 0) {
2540                 ret = -ENOENT;
2541                 goto out;
2542         }
2543         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2544                                     struct btrfs_inode_item);
2545         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2546         btrfs_mark_buffer_dirty(path->nodes[0]);
2547         btrfs_release_path(path);
2548
2549         /*
2550          * Add back valid inode_ref/dir_item/dir_index,
2551          * add_link() will handle the nlink inc, so new nlink must be correct
2552          */
2553         list_for_each_entry(backref, &rec->backrefs, list) {
2554                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2555                                      backref->name, backref->namelen,
2556                                      backref->filetype, &backref->index, 1);
2557                 if (ret < 0)
2558                         goto out;
2559         }
2560 out:
2561         btrfs_release_path(path);
2562         return ret;
2563 }
2564
2565 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2566                                struct btrfs_root *root,
2567                                struct btrfs_path *path,
2568                                struct inode_record *rec)
2569 {
2570         char *dir_name = "lost+found";
2571         char namebuf[BTRFS_NAME_LEN] = {0};
2572         u64 lost_found_ino;
2573         u32 mode = 0700;
2574         u8 type = 0;
2575         int namelen = 0;
2576         int name_recovered = 0;
2577         int type_recovered = 0;
2578         int ret = 0;
2579
2580         /*
2581          * Get file name and type first before these invalid inode ref
2582          * are deleted by remove_all_invalid_backref()
2583          */
2584         name_recovered = !find_file_name(rec, namebuf, &namelen);
2585         type_recovered = !find_file_type(rec, &type);
2586
2587         if (!name_recovered) {
2588                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2589                        rec->ino, rec->ino);
2590                 namelen = count_digits(rec->ino);
2591                 sprintf(namebuf, "%llu", rec->ino);
2592                 name_recovered = 1;
2593         }
2594         if (!type_recovered) {
2595                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2596                        rec->ino);
2597                 type = BTRFS_FT_REG_FILE;
2598                 type_recovered = 1;
2599         }
2600
2601         ret = reset_nlink(trans, root, path, rec);
2602         if (ret < 0) {
2603                 fprintf(stderr,
2604                         "Failed to reset nlink for inode %llu: %s\n",
2605                         rec->ino, strerror(-ret));
2606                 goto out;
2607         }
2608
2609         if (rec->found_link == 0) {
2610                 lost_found_ino = root->highest_inode;
2611                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2612                         ret = -EOVERFLOW;
2613                         goto out;
2614                 }
2615                 lost_found_ino++;
2616                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2617                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2618                                   mode);
2619                 if (ret < 0) {
2620                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2621                                 dir_name, strerror(-ret));
2622                         goto out;
2623                 }
2624                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2625                                      namebuf, namelen, type, NULL, 1);
2626                 /*
2627                  * Add ".INO" suffix several times to handle case where
2628                  * "FILENAME.INO" is already taken by another file.
2629                  */
2630                 while (ret == -EEXIST) {
2631                         /*
2632                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2633                          */
2634                         if (namelen + count_digits(rec->ino) + 1 >
2635                             BTRFS_NAME_LEN) {
2636                                 ret = -EFBIG;
2637                                 goto out;
2638                         }
2639                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2640                                  ".%llu", rec->ino);
2641                         namelen += count_digits(rec->ino) + 1;
2642                         ret = btrfs_add_link(trans, root, rec->ino,
2643                                              lost_found_ino, namebuf,
2644                                              namelen, type, NULL, 1);
2645                 }
2646                 if (ret < 0) {
2647                         fprintf(stderr,
2648                                 "Failed to link the inode %llu to %s dir: %s\n",
2649                                 rec->ino, dir_name, strerror(-ret));
2650                         goto out;
2651                 }
2652                 /*
2653                  * Just increase the found_link, don't actually add the
2654                  * backref. This will make things easier and this inode
2655                  * record will be freed after the repair is done.
2656                  * So fsck will not report problem about this inode.
2657                  */
2658                 rec->found_link++;
2659                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2660                        namelen, namebuf, dir_name);
2661         }
2662         printf("Fixed the nlink of inode %llu\n", rec->ino);
2663 out:
2664         /*
2665          * Clear the flag anyway, or we will loop forever for the same inode
2666          * as it will not be removed from the bad inode list and the dead loop
2667          * happens.
2668          */
2669         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2670         btrfs_release_path(path);
2671         return ret;
2672 }
2673
2674 /*
2675  * Check if there is any normal(reg or prealloc) file extent for given
2676  * ino.
2677  * This is used to determine the file type when neither its dir_index/item or
2678  * inode_item exists.
2679  *
2680  * This will *NOT* report error, if any error happens, just consider it does
2681  * not have any normal file extent.
2682  */
2683 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2684 {
2685         struct btrfs_path *path;
2686         struct btrfs_key key;
2687         struct btrfs_key found_key;
2688         struct btrfs_file_extent_item *fi;
2689         u8 type;
2690         int ret = 0;
2691
2692         path = btrfs_alloc_path();
2693         if (!path)
2694                 goto out;
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2705                 ret = btrfs_next_leaf(root, path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2713                                       path->slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path->nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_free_path(path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path *path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         path = btrfs_alloc_path();
2930         if (!path)
2931                 return -ENOMEM;
2932
2933         /*
2934          * For nlink repair, it may create a dir and add link, so
2935          * 2 for parent(256)'s dir_index and dir_item
2936          * 2 for lost+found dir's inode_item and inode_ref
2937          * 1 for the new inode_ref of the file
2938          * 2 for lost+found dir's dir_index and dir_item for the file
2939          */
2940         trans = btrfs_start_transaction(root, 7);
2941         if (IS_ERR(trans)) {
2942                 btrfs_free_path(path);
2943                 return PTR_ERR(trans);
2944         }
2945
2946         if (rec->errors & I_ERR_NO_INODE_ITEM)
2947                 ret = repair_inode_no_item(trans, root, path, rec);
2948         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2949                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2950         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2951                 ret = repair_inode_discount_extent(trans, root, path, rec);
2952         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2953                 ret = repair_inode_isize(trans, root, path, rec);
2954         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2955                 ret = repair_inode_orphan_item(trans, root, path, rec);
2956         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2957                 ret = repair_inode_nlinks(trans, root, path, rec);
2958         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2959                 ret = repair_inode_nbytes(trans, root, path, rec);
2960         btrfs_commit_transaction(trans, root);
2961         btrfs_free_path(path);
2962         return ret;
2963 }
2964
2965 static int check_inode_recs(struct btrfs_root *root,
2966                             struct cache_tree *inode_cache)
2967 {
2968         struct cache_extent *cache;
2969         struct ptr_node *node;
2970         struct inode_record *rec;
2971         struct inode_backref *backref;
2972         int stage = 0;
2973         int ret = 0;
2974         int err = 0;
2975         u64 error = 0;
2976         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2977
2978         if (btrfs_root_refs(&root->root_item) == 0) {
2979                 if (!cache_tree_empty(inode_cache))
2980                         fprintf(stderr, "warning line %d\n", __LINE__);
2981                 return 0;
2982         }
2983
2984         /*
2985          * We need to record the highest inode number for later 'lost+found'
2986          * dir creation.
2987          * We must select an ino not used/referred by any existing inode, or
2988          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2989          * this may cause 'lost+found' dir has wrong nlinks.
2990          */
2991         cache = last_cache_extent(inode_cache);
2992         if (cache) {
2993                 node = container_of(cache, struct ptr_node, cache);
2994                 rec = node->data;
2995                 if (rec->ino > root->highest_inode)
2996                         root->highest_inode = rec->ino;
2997         }
2998
2999         /*
3000          * We need to repair backrefs first because we could change some of the
3001          * errors in the inode recs.
3002          *
3003          * We also need to go through and delete invalid backrefs first and then
3004          * add the correct ones second.  We do this because we may get EEXIST
3005          * when adding back the correct index because we hadn't yet deleted the
3006          * invalid index.
3007          *
3008          * For example, if we were missing a dir index then the directories
3009          * isize would be wrong, so if we fixed the isize to what we thought it
3010          * would be and then fixed the backref we'd still have a invalid fs, so
3011          * we need to add back the dir index and then check to see if the isize
3012          * is still wrong.
3013          */
3014         while (stage < 3) {
3015                 stage++;
3016                 if (stage == 3 && !err)
3017                         break;
3018
3019                 cache = search_cache_extent(inode_cache, 0);
3020                 while (repair && cache) {
3021                         node = container_of(cache, struct ptr_node, cache);
3022                         rec = node->data;
3023                         cache = next_cache_extent(cache);
3024
3025                         /* Need to free everything up and rescan */
3026                         if (stage == 3) {
3027                                 remove_cache_extent(inode_cache, &node->cache);
3028                                 free(node);
3029                                 free_inode_rec(rec);
3030                                 continue;
3031                         }
3032
3033                         if (list_empty(&rec->backrefs))
3034                                 continue;
3035
3036                         ret = repair_inode_backrefs(root, rec, inode_cache,
3037                                                     stage == 1);
3038                         if (ret < 0) {
3039                                 err = ret;
3040                                 stage = 2;
3041                                 break;
3042                         } if (ret > 0) {
3043                                 err = -EAGAIN;
3044                         }
3045                 }
3046         }
3047         if (err)
3048                 return err;
3049
3050         rec = get_inode_rec(inode_cache, root_dirid, 0);
3051         BUG_ON(IS_ERR(rec));
3052         if (rec) {
3053                 ret = check_root_dir(rec);
3054                 if (ret) {
3055                         fprintf(stderr, "root %llu root dir %llu error\n",
3056                                 (unsigned long long)root->root_key.objectid,
3057                                 (unsigned long long)root_dirid);
3058                         print_inode_error(root, rec);
3059                         error++;
3060                 }
3061         } else {
3062                 if (repair) {
3063                         struct btrfs_trans_handle *trans;
3064
3065                         trans = btrfs_start_transaction(root, 1);
3066                         if (IS_ERR(trans)) {
3067                                 err = PTR_ERR(trans);
3068                                 return err;
3069                         }
3070
3071                         fprintf(stderr,
3072                                 "root %llu missing its root dir, recreating\n",
3073                                 (unsigned long long)root->objectid);
3074
3075                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3076                         BUG_ON(ret);
3077
3078                         btrfs_commit_transaction(trans, root);
3079                         return -EAGAIN;
3080                 }
3081
3082                 fprintf(stderr, "root %llu root dir %llu not found\n",
3083                         (unsigned long long)root->root_key.objectid,
3084                         (unsigned long long)root_dirid);
3085         }
3086
3087         while (1) {
3088                 cache = search_cache_extent(inode_cache, 0);
3089                 if (!cache)
3090                         break;
3091                 node = container_of(cache, struct ptr_node, cache);
3092                 rec = node->data;
3093                 remove_cache_extent(inode_cache, &node->cache);
3094                 free(node);
3095                 if (rec->ino == root_dirid ||
3096                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3097                         free_inode_rec(rec);
3098                         continue;
3099                 }
3100
3101                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3102                         ret = check_orphan_item(root, rec->ino);
3103                         if (ret == 0)
3104                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3105                         if (can_free_inode_rec(rec)) {
3106                                 free_inode_rec(rec);
3107                                 continue;
3108                         }
3109                 }
3110
3111                 if (!rec->found_inode_item)
3112                         rec->errors |= I_ERR_NO_INODE_ITEM;
3113                 if (rec->found_link != rec->nlink)
3114                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3115                 if (repair) {
3116                         ret = try_repair_inode(root, rec);
3117                         if (ret == 0 && can_free_inode_rec(rec)) {
3118                                 free_inode_rec(rec);
3119                                 continue;
3120                         }
3121                         ret = 0;
3122                 }
3123
3124                 if (!(repair && ret == 0))
3125                         error++;
3126                 print_inode_error(root, rec);
3127                 list_for_each_entry(backref, &rec->backrefs, list) {
3128                         if (!backref->found_dir_item)
3129                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3130                         if (!backref->found_dir_index)
3131                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3132                         if (!backref->found_inode_ref)
3133                                 backref->errors |= REF_ERR_NO_INODE_REF;
3134                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3135                                 " namelen %u name %s filetype %d errors %x",
3136                                 (unsigned long long)backref->dir,
3137                                 (unsigned long long)backref->index,
3138                                 backref->namelen, backref->name,
3139                                 backref->filetype, backref->errors);
3140                         print_ref_error(backref->errors);
3141                 }
3142                 free_inode_rec(rec);
3143         }
3144         return (error > 0) ? -1 : 0;
3145 }
3146
3147 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3148                                         u64 objectid)
3149 {
3150         struct cache_extent *cache;
3151         struct root_record *rec = NULL;
3152         int ret;
3153
3154         cache = lookup_cache_extent(root_cache, objectid, 1);
3155         if (cache) {
3156                 rec = container_of(cache, struct root_record, cache);
3157         } else {
3158                 rec = calloc(1, sizeof(*rec));
3159                 if (!rec)
3160                         return ERR_PTR(-ENOMEM);
3161                 rec->objectid = objectid;
3162                 INIT_LIST_HEAD(&rec->backrefs);
3163                 rec->cache.start = objectid;
3164                 rec->cache.size = 1;
3165
3166                 ret = insert_cache_extent(root_cache, &rec->cache);
3167                 if (ret)
3168                         return ERR_PTR(-EEXIST);
3169         }
3170         return rec;
3171 }
3172
3173 static struct root_backref *get_root_backref(struct root_record *rec,
3174                                              u64 ref_root, u64 dir, u64 index,
3175                                              const char *name, int namelen)
3176 {
3177         struct root_backref *backref;
3178
3179         list_for_each_entry(backref, &rec->backrefs, list) {
3180                 if (backref->ref_root != ref_root || backref->dir != dir ||
3181                     backref->namelen != namelen)
3182                         continue;
3183                 if (memcmp(name, backref->name, namelen))
3184                         continue;
3185                 return backref;
3186         }
3187
3188         backref = calloc(1, sizeof(*backref) + namelen + 1);
3189         if (!backref)
3190                 return NULL;
3191         backref->ref_root = ref_root;
3192         backref->dir = dir;
3193         backref->index = index;
3194         backref->namelen = namelen;
3195         memcpy(backref->name, name, namelen);
3196         backref->name[namelen] = '\0';
3197         list_add_tail(&backref->list, &rec->backrefs);
3198         return backref;
3199 }
3200
3201 static void free_root_record(struct cache_extent *cache)
3202 {
3203         struct root_record *rec;
3204         struct root_backref *backref;
3205
3206         rec = container_of(cache, struct root_record, cache);
3207         while (!list_empty(&rec->backrefs)) {
3208                 backref = to_root_backref(rec->backrefs.next);
3209                 list_del(&backref->list);
3210                 free(backref);
3211         }
3212
3213         kfree(rec);
3214 }
3215
3216 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3217
3218 static int add_root_backref(struct cache_tree *root_cache,
3219                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3220                             const char *name, int namelen,
3221                             int item_type, int errors)
3222 {
3223         struct root_record *rec;
3224         struct root_backref *backref;
3225
3226         rec = get_root_rec(root_cache, root_id);
3227         BUG_ON(IS_ERR(rec));
3228         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3229         BUG_ON(!backref);
3230
3231         backref->errors |= errors;
3232
3233         if (item_type != BTRFS_DIR_ITEM_KEY) {
3234                 if (backref->found_dir_index || backref->found_back_ref ||
3235                     backref->found_forward_ref) {
3236                         if (backref->index != index)
3237                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3238                 } else {
3239                         backref->index = index;
3240                 }
3241         }
3242
3243         if (item_type == BTRFS_DIR_ITEM_KEY) {
3244                 if (backref->found_forward_ref)
3245                         rec->found_ref++;
3246                 backref->found_dir_item = 1;
3247         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3248                 backref->found_dir_index = 1;
3249         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3250                 if (backref->found_forward_ref)
3251                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3252                 else if (backref->found_dir_item)
3253                         rec->found_ref++;
3254                 backref->found_forward_ref = 1;
3255         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3256                 if (backref->found_back_ref)
3257                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3258                 backref->found_back_ref = 1;
3259         } else {
3260                 BUG_ON(1);
3261         }
3262
3263         if (backref->found_forward_ref && backref->found_dir_item)
3264                 backref->reachable = 1;
3265         return 0;
3266 }
3267
3268 static int merge_root_recs(struct btrfs_root *root,
3269                            struct cache_tree *src_cache,
3270                            struct cache_tree *dst_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int ret = 0;
3277
3278         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3279                 free_inode_recs_tree(src_cache);
3280                 return 0;
3281         }
3282
3283         while (1) {
3284                 cache = search_cache_extent(src_cache, 0);
3285                 if (!cache)
3286                         break;
3287                 node = container_of(cache, struct ptr_node, cache);
3288                 rec = node->data;
3289                 remove_cache_extent(src_cache, &node->cache);
3290                 free(node);
3291
3292                 ret = is_child_root(root, root->objectid, rec->ino);
3293                 if (ret < 0)
3294                         break;
3295                 else if (ret == 0)
3296                         goto skip;
3297
3298                 list_for_each_entry(backref, &rec->backrefs, list) {
3299                         BUG_ON(backref->found_inode_ref);
3300                         if (backref->found_dir_item)
3301                                 add_root_backref(dst_cache, rec->ino,
3302                                         root->root_key.objectid, backref->dir,
3303                                         backref->index, backref->name,
3304                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3305                                         backref->errors);
3306                         if (backref->found_dir_index)
3307                                 add_root_backref(dst_cache, rec->ino,
3308                                         root->root_key.objectid, backref->dir,
3309                                         backref->index, backref->name,
3310                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3311                                         backref->errors);
3312                 }
3313 skip:
3314                 free_inode_rec(rec);
3315         }
3316         if (ret < 0)
3317                 return ret;
3318         return 0;
3319 }
3320
3321 static int check_root_refs(struct btrfs_root *root,
3322                            struct cache_tree *root_cache)
3323 {
3324         struct root_record *rec;
3325         struct root_record *ref_root;
3326         struct root_backref *backref;
3327         struct cache_extent *cache;
3328         int loop = 1;
3329         int ret;
3330         int error;
3331         int errors = 0;
3332
3333         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3334         BUG_ON(IS_ERR(rec));
3335         rec->found_ref = 1;
3336
3337         /* fixme: this can not detect circular references */
3338         while (loop) {
3339                 loop = 0;
3340                 cache = search_cache_extent(root_cache, 0);
3341                 while (1) {
3342                         if (!cache)
3343                                 break;
3344                         rec = container_of(cache, struct root_record, cache);
3345                         cache = next_cache_extent(cache);
3346
3347                         if (rec->found_ref == 0)
3348                                 continue;
3349
3350                         list_for_each_entry(backref, &rec->backrefs, list) {
3351                                 if (!backref->reachable)
3352                                         continue;
3353
3354                                 ref_root = get_root_rec(root_cache,
3355                                                         backref->ref_root);
3356                                 BUG_ON(IS_ERR(ref_root));
3357                                 if (ref_root->found_ref > 0)
3358                                         continue;
3359
3360                                 backref->reachable = 0;
3361                                 rec->found_ref--;
3362                                 if (rec->found_ref == 0)
3363                                         loop = 1;
3364                         }
3365                 }
3366         }
3367
3368         cache = search_cache_extent(root_cache, 0);
3369         while (1) {
3370                 if (!cache)
3371                         break;
3372                 rec = container_of(cache, struct root_record, cache);
3373                 cache = next_cache_extent(cache);
3374
3375                 if (rec->found_ref == 0 &&
3376                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3377                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3378                         ret = check_orphan_item(root->fs_info->tree_root,
3379                                                 rec->objectid);
3380                         if (ret == 0)
3381                                 continue;
3382
3383                         /*
3384                          * If we don't have a root item then we likely just have
3385                          * a dir item in a snapshot for this root but no actual
3386                          * ref key or anything so it's meaningless.
3387                          */
3388                         if (!rec->found_root_item)
3389                                 continue;
3390                         errors++;
3391                         fprintf(stderr, "fs tree %llu not referenced\n",
3392                                 (unsigned long long)rec->objectid);
3393                 }
3394
3395                 error = 0;
3396                 if (rec->found_ref > 0 && !rec->found_root_item)
3397                         error = 1;
3398                 list_for_each_entry(backref, &rec->backrefs, list) {
3399                         if (!backref->found_dir_item)
3400                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3401                         if (!backref->found_dir_index)
3402                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3403                         if (!backref->found_back_ref)
3404                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3405                         if (!backref->found_forward_ref)
3406                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3407                         if (backref->reachable && backref->errors)
3408                                 error = 1;
3409                 }
3410                 if (!error)
3411                         continue;
3412
3413                 errors++;
3414                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3415                         (unsigned long long)rec->objectid, rec->found_ref,
3416                          rec->found_root_item ? "" : "not found");
3417
3418                 list_for_each_entry(backref, &rec->backrefs, list) {
3419                         if (!backref->reachable)
3420                                 continue;
3421                         if (!backref->errors && rec->found_root_item)
3422                                 continue;
3423                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3424                                 " index %llu namelen %u name %s errors %x\n",
3425                                 (unsigned long long)backref->ref_root,
3426                                 (unsigned long long)backref->dir,
3427                                 (unsigned long long)backref->index,
3428                                 backref->namelen, backref->name,
3429                                 backref->errors);
3430                         print_ref_error(backref->errors);
3431                 }
3432         }
3433         return errors > 0 ? 1 : 0;
3434 }
3435
3436 static int process_root_ref(struct extent_buffer *eb, int slot,
3437                             struct btrfs_key *key,
3438                             struct cache_tree *root_cache)
3439 {
3440         u64 dirid;
3441         u64 index;
3442         u32 len;
3443         u32 name_len;
3444         struct btrfs_root_ref *ref;
3445         char namebuf[BTRFS_NAME_LEN];
3446         int error;
3447
3448         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3449
3450         dirid = btrfs_root_ref_dirid(eb, ref);
3451         index = btrfs_root_ref_sequence(eb, ref);
3452         name_len = btrfs_root_ref_name_len(eb, ref);
3453
3454         if (name_len <= BTRFS_NAME_LEN) {
3455                 len = name_len;
3456                 error = 0;
3457         } else {
3458                 len = BTRFS_NAME_LEN;
3459                 error = REF_ERR_NAME_TOO_LONG;
3460         }
3461         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3462
3463         if (key->type == BTRFS_ROOT_REF_KEY) {
3464                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3465                                  index, namebuf, len, key->type, error);
3466         } else {
3467                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3468                                  index, namebuf, len, key->type, error);
3469         }
3470         return 0;
3471 }
3472
3473 static void free_corrupt_block(struct cache_extent *cache)
3474 {
3475         struct btrfs_corrupt_block *corrupt;
3476
3477         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3478         free(corrupt);
3479 }
3480
3481 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3482
3483 /*
3484  * Repair the btree of the given root.
3485  *
3486  * The fix is to remove the node key in corrupt_blocks cache_tree.
3487  * and rebalance the tree.
3488  * After the fix, the btree should be writeable.
3489  */
3490 static int repair_btree(struct btrfs_root *root,
3491                         struct cache_tree *corrupt_blocks)
3492 {
3493         struct btrfs_trans_handle *trans;
3494         struct btrfs_path *path;
3495         struct btrfs_corrupt_block *corrupt;
3496         struct cache_extent *cache;
3497         struct btrfs_key key;
3498         u64 offset;
3499         int level;
3500         int ret = 0;
3501
3502         if (cache_tree_empty(corrupt_blocks))
3503                 return 0;
3504
3505         path = btrfs_alloc_path();
3506         if (!path)
3507                 return -ENOMEM;
3508
3509         trans = btrfs_start_transaction(root, 1);
3510         if (IS_ERR(trans)) {
3511                 ret = PTR_ERR(trans);
3512                 fprintf(stderr, "Error starting transaction: %s\n",
3513                         strerror(-ret));
3514                 goto out_free_path;
3515         }
3516         cache = first_cache_extent(corrupt_blocks);
3517         while (cache) {
3518                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3519                                        cache);
3520                 level = corrupt->level;
3521                 path->lowest_level = level;
3522                 key.objectid = corrupt->key.objectid;
3523                 key.type = corrupt->key.type;
3524                 key.offset = corrupt->key.offset;
3525
3526                 /*
3527                  * Here we don't want to do any tree balance, since it may
3528                  * cause a balance with corrupted brother leaf/node,
3529                  * so ins_len set to 0 here.
3530                  * Balance will be done after all corrupt node/leaf is deleted.
3531                  */
3532                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3533                 if (ret < 0)
3534                         goto out;
3535                 offset = btrfs_node_blockptr(path->nodes[level],
3536                                              path->slots[level]);
3537
3538                 /* Remove the ptr */
3539                 ret = btrfs_del_ptr(trans, root, path, level,
3540                                     path->slots[level]);
3541                 if (ret < 0)
3542                         goto out;
3543                 /*
3544                  * Remove the corresponding extent
3545                  * return value is not concerned.
3546                  */
3547                 btrfs_release_path(path);
3548                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3549                                         0, root->root_key.objectid,
3550                                         level - 1, 0);
3551                 cache = next_cache_extent(cache);
3552         }
3553
3554         /* Balance the btree using btrfs_search_slot() */
3555         cache = first_cache_extent(corrupt_blocks);
3556         while (cache) {
3557                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3558                                        cache);
3559                 memcpy(&key, &corrupt->key, sizeof(key));
3560                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3561                 if (ret < 0)
3562                         goto out;
3563                 /* return will always >0 since it won't find the item */
3564                 ret = 0;
3565                 btrfs_release_path(path);
3566                 cache = next_cache_extent(cache);
3567         }
3568 out:
3569         btrfs_commit_transaction(trans, root);
3570 out_free_path:
3571         btrfs_free_path(path);
3572         return ret;
3573 }
3574
3575 static int check_fs_root(struct btrfs_root *root,
3576                          struct cache_tree *root_cache,
3577                          struct walk_control *wc)
3578 {
3579         int ret = 0;
3580         int err = 0;
3581         int wret;
3582         int level;
3583         struct btrfs_path path;
3584         struct shared_node root_node;
3585         struct root_record *rec;
3586         struct btrfs_root_item *root_item = &root->root_item;
3587         struct cache_tree corrupt_blocks;
3588         struct orphan_data_extent *orphan;
3589         struct orphan_data_extent *tmp;
3590         enum btrfs_tree_block_status status;
3591         struct node_refs nrefs;
3592
3593         /*
3594          * Reuse the corrupt_block cache tree to record corrupted tree block
3595          *
3596          * Unlike the usage in extent tree check, here we do it in a per
3597          * fs/subvol tree base.
3598          */
3599         cache_tree_init(&corrupt_blocks);
3600         root->fs_info->corrupt_blocks = &corrupt_blocks;
3601
3602         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3603                 rec = get_root_rec(root_cache, root->root_key.objectid);
3604                 BUG_ON(IS_ERR(rec));
3605                 if (btrfs_root_refs(root_item) > 0)
3606                         rec->found_root_item = 1;
3607         }
3608
3609         btrfs_init_path(&path);
3610         memset(&root_node, 0, sizeof(root_node));
3611         cache_tree_init(&root_node.root_cache);
3612         cache_tree_init(&root_node.inode_cache);
3613         memset(&nrefs, 0, sizeof(nrefs));
3614
3615         /* Move the orphan extent record to corresponding inode_record */
3616         list_for_each_entry_safe(orphan, tmp,
3617                                  &root->orphan_data_extents, list) {
3618                 struct inode_record *inode;
3619
3620                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3621                                       1);
3622                 BUG_ON(IS_ERR(inode));
3623                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3624                 list_move(&orphan->list, &inode->orphan_extents);
3625         }
3626
3627         level = btrfs_header_level(root->node);
3628         memset(wc->nodes, 0, sizeof(wc->nodes));
3629         wc->nodes[level] = &root_node;
3630         wc->active_node = level;
3631         wc->root_level = level;
3632
3633         /* We may not have checked the root block, lets do that now */
3634         if (btrfs_is_leaf(root->node))
3635                 status = btrfs_check_leaf(root, NULL, root->node);
3636         else
3637                 status = btrfs_check_node(root, NULL, root->node);
3638         if (status != BTRFS_TREE_BLOCK_CLEAN)
3639                 return -EIO;
3640
3641         if (btrfs_root_refs(root_item) > 0 ||
3642             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3643                 path.nodes[level] = root->node;
3644                 extent_buffer_get(root->node);
3645                 path.slots[level] = 0;
3646         } else {
3647                 struct btrfs_key key;
3648                 struct btrfs_disk_key found_key;
3649
3650                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3651                 level = root_item->drop_level;
3652                 path.lowest_level = level;
3653                 if (level > btrfs_header_level(root->node) ||
3654                     level >= BTRFS_MAX_LEVEL) {
3655                         error("ignoring invalid drop level: %u", level);
3656                         goto skip_walking;
3657                 }
3658                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3659                 if (wret < 0)
3660                         goto skip_walking;
3661                 btrfs_node_key(path.nodes[level], &found_key,
3662                                 path.slots[level]);
3663                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3664                                         sizeof(found_key)));
3665         }
3666
3667         while (1) {
3668                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3669                 if (wret < 0)
3670                         ret = wret;
3671                 if (wret != 0)
3672                         break;
3673
3674                 wret = walk_up_tree(root, &path, wc, &level);
3675                 if (wret < 0)
3676                         ret = wret;
3677                 if (wret != 0)
3678                         break;
3679         }
3680 skip_walking:
3681         btrfs_release_path(&path);
3682
3683         if (!cache_tree_empty(&corrupt_blocks)) {
3684                 struct cache_extent *cache;
3685                 struct btrfs_corrupt_block *corrupt;
3686
3687                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3688                        root->root_key.objectid);
3689                 cache = first_cache_extent(&corrupt_blocks);
3690                 while (cache) {
3691                         corrupt = container_of(cache,
3692                                                struct btrfs_corrupt_block,
3693                                                cache);
3694                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3695                                cache->start, corrupt->level,
3696                                corrupt->key.objectid, corrupt->key.type,
3697                                corrupt->key.offset);
3698                         cache = next_cache_extent(cache);
3699                 }
3700                 if (repair) {
3701                         printf("Try to repair the btree for root %llu\n",
3702                                root->root_key.objectid);
3703                         ret = repair_btree(root, &corrupt_blocks);
3704                         if (ret < 0)
3705                                 fprintf(stderr, "Failed to repair btree: %s\n",
3706                                         strerror(-ret));
3707                         if (!ret)
3708                                 printf("Btree for root %llu is fixed\n",
3709                                        root->root_key.objectid);
3710                 }
3711         }
3712
3713         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3714         if (err < 0)
3715                 ret = err;
3716
3717         if (root_node.current) {
3718                 root_node.current->checked = 1;
3719                 maybe_free_inode_rec(&root_node.inode_cache,
3720                                 root_node.current);
3721         }
3722
3723         err = check_inode_recs(root, &root_node.inode_cache);
3724         if (!ret)
3725                 ret = err;
3726
3727         free_corrupt_blocks_tree(&corrupt_blocks);
3728         root->fs_info->corrupt_blocks = NULL;
3729         free_orphan_data_extents(&root->orphan_data_extents);
3730         return ret;
3731 }
3732
3733 static int fs_root_objectid(u64 objectid)
3734 {
3735         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3736             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3737                 return 1;
3738         return is_fstree(objectid);
3739 }
3740
3741 static int check_fs_roots(struct btrfs_root *root,
3742                           struct cache_tree *root_cache)
3743 {
3744         struct btrfs_path path;
3745         struct btrfs_key key;
3746         struct walk_control wc;
3747         struct extent_buffer *leaf, *tree_node;
3748         struct btrfs_root *tmp_root;
3749         struct btrfs_root *tree_root = root->fs_info->tree_root;
3750         int ret;
3751         int err = 0;
3752
3753         if (ctx.progress_enabled) {
3754                 ctx.tp = TASK_FS_ROOTS;
3755                 task_start(ctx.info);
3756         }
3757
3758         /*
3759          * Just in case we made any changes to the extent tree that weren't
3760          * reflected into the free space cache yet.
3761          */
3762         if (repair)
3763                 reset_cached_block_groups(root->fs_info);
3764         memset(&wc, 0, sizeof(wc));
3765         cache_tree_init(&wc.shared);
3766         btrfs_init_path(&path);
3767
3768 again:
3769         key.offset = 0;
3770         key.objectid = 0;
3771         key.type = BTRFS_ROOT_ITEM_KEY;
3772         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3773         if (ret < 0) {
3774                 err = 1;
3775                 goto out;
3776         }
3777         tree_node = tree_root->node;
3778         while (1) {
3779                 if (tree_node != tree_root->node) {
3780                         free_root_recs_tree(root_cache);
3781                         btrfs_release_path(&path);
3782                         goto again;
3783                 }
3784                 leaf = path.nodes[0];
3785                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3786                         ret = btrfs_next_leaf(tree_root, &path);
3787                         if (ret) {
3788                                 if (ret < 0)
3789                                         err = 1;
3790                                 break;
3791                         }
3792                         leaf = path.nodes[0];
3793                 }
3794                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3795                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3796                     fs_root_objectid(key.objectid)) {
3797                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3798                                 tmp_root = btrfs_read_fs_root_no_cache(
3799                                                 root->fs_info, &key);
3800                         } else {
3801                                 key.offset = (u64)-1;
3802                                 tmp_root = btrfs_read_fs_root(
3803                                                 root->fs_info, &key);
3804                         }
3805                         if (IS_ERR(tmp_root)) {
3806                                 err = 1;
3807                                 goto next;
3808                         }
3809                         ret = check_fs_root(tmp_root, root_cache, &wc);
3810                         if (ret == -EAGAIN) {
3811                                 free_root_recs_tree(root_cache);
3812                                 btrfs_release_path(&path);
3813                                 goto again;
3814                         }
3815                         if (ret)
3816                                 err = 1;
3817                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3818                                 btrfs_free_fs_root(tmp_root);
3819                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3820                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3821                         process_root_ref(leaf, path.slots[0], &key,
3822                                          root_cache);
3823                 }
3824 next:
3825                 path.slots[0]++;
3826         }
3827 out:
3828         btrfs_release_path(&path);
3829         if (err)
3830                 free_extent_cache_tree(&wc.shared);
3831         if (!cache_tree_empty(&wc.shared))
3832                 fprintf(stderr, "warning line %d\n", __LINE__);
3833
3834         task_stop(ctx.info);
3835
3836         return err;
3837 }
3838
3839 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3840 {
3841         struct list_head *cur = rec->backrefs.next;
3842         struct extent_backref *back;
3843         struct tree_backref *tback;
3844         struct data_backref *dback;
3845         u64 found = 0;
3846         int err = 0;
3847
3848         while(cur != &rec->backrefs) {
3849                 back = to_extent_backref(cur);
3850                 cur = cur->next;
3851                 if (!back->found_extent_tree) {
3852                         err = 1;
3853                         if (!print_errs)
3854                                 goto out;
3855                         if (back->is_data) {
3856                                 dback = to_data_backref(back);
3857                                 fprintf(stderr, "Backref %llu %s %llu"
3858                                         " owner %llu offset %llu num_refs %lu"
3859                                         " not found in extent tree\n",
3860                                         (unsigned long long)rec->start,
3861                                         back->full_backref ?
3862                                         "parent" : "root",
3863                                         back->full_backref ?
3864                                         (unsigned long long)dback->parent:
3865                                         (unsigned long long)dback->root,
3866                                         (unsigned long long)dback->owner,
3867                                         (unsigned long long)dback->offset,
3868                                         (unsigned long)dback->num_refs);
3869                         } else {
3870                                 tback = to_tree_backref(back);
3871                                 fprintf(stderr, "Backref %llu parent %llu"
3872                                         " root %llu not found in extent tree\n",
3873                                         (unsigned long long)rec->start,
3874                                         (unsigned long long)tback->parent,
3875                                         (unsigned long long)tback->root);
3876                         }
3877                 }
3878                 if (!back->is_data && !back->found_ref) {
3879                         err = 1;
3880                         if (!print_errs)
3881                                 goto out;
3882                         tback = to_tree_backref(back);
3883                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3884                                 (unsigned long long)rec->start,
3885                                 back->full_backref ? "parent" : "root",
3886                                 back->full_backref ?
3887                                 (unsigned long long)tback->parent :
3888                                 (unsigned long long)tback->root, back);
3889                 }
3890                 if (back->is_data) {
3891                         dback = to_data_backref(back);
3892                         if (dback->found_ref != dback->num_refs) {
3893                                 err = 1;
3894                                 if (!print_errs)
3895                                         goto out;
3896                                 fprintf(stderr, "Incorrect local backref count"
3897                                         " on %llu %s %llu owner %llu"
3898                                         " offset %llu found %u wanted %u back %p\n",
3899                                         (unsigned long long)rec->start,
3900                                         back->full_backref ?
3901                                         "parent" : "root",
3902                                         back->full_backref ?
3903                                         (unsigned long long)dback->parent:
3904                                         (unsigned long long)dback->root,
3905                                         (unsigned long long)dback->owner,
3906                                         (unsigned long long)dback->offset,
3907                                         dback->found_ref, dback->num_refs, back);
3908                         }
3909                         if (dback->disk_bytenr != rec->start) {
3910                                 err = 1;
3911                                 if (!print_errs)
3912                                         goto out;
3913                                 fprintf(stderr, "Backref disk bytenr does not"
3914                                         " match extent record, bytenr=%llu, "
3915                                         "ref bytenr=%llu\n",
3916                                         (unsigned long long)rec->start,
3917                                         (unsigned long long)dback->disk_bytenr);
3918                         }
3919
3920                         if (dback->bytes != rec->nr) {
3921                                 err = 1;
3922                                 if (!print_errs)
3923                                         goto out;
3924                                 fprintf(stderr, "Backref bytes do not match "
3925                                         "extent backref, bytenr=%llu, ref "
3926                                         "bytes=%llu, backref bytes=%llu\n",
3927                                         (unsigned long long)rec->start,
3928                                         (unsigned long long)rec->nr,
3929                                         (unsigned long long)dback->bytes);
3930                         }
3931                 }
3932                 if (!back->is_data) {
3933                         found += 1;
3934                 } else {
3935                         dback = to_data_backref(back);
3936                         found += dback->found_ref;
3937                 }
3938         }
3939         if (found != rec->refs) {
3940                 err = 1;
3941                 if (!print_errs)
3942                         goto out;
3943                 fprintf(stderr, "Incorrect global backref count "
3944                         "on %llu found %llu wanted %llu\n",
3945                         (unsigned long long)rec->start,
3946                         (unsigned long long)found,
3947                         (unsigned long long)rec->refs);
3948         }
3949 out:
3950         return err;
3951 }
3952
3953 static int free_all_extent_backrefs(struct extent_record *rec)
3954 {
3955         struct extent_backref *back;
3956         struct list_head *cur;
3957         while (!list_empty(&rec->backrefs)) {
3958                 cur = rec->backrefs.next;
3959                 back = to_extent_backref(cur);
3960                 list_del(cur);
3961                 free(back);
3962         }
3963         return 0;
3964 }
3965
3966 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3967                                      struct cache_tree *extent_cache)
3968 {
3969         struct cache_extent *cache;
3970         struct extent_record *rec;
3971
3972         while (1) {
3973                 cache = first_cache_extent(extent_cache);
3974                 if (!cache)
3975                         break;
3976                 rec = container_of(cache, struct extent_record, cache);
3977                 remove_cache_extent(extent_cache, cache);
3978                 free_all_extent_backrefs(rec);
3979                 free(rec);
3980         }
3981 }
3982
3983 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3984                                  struct extent_record *rec)
3985 {
3986         if (rec->content_checked && rec->owner_ref_checked &&
3987             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3988             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3989             !rec->bad_full_backref && !rec->crossing_stripes &&
3990             !rec->wrong_chunk_type) {
3991                 remove_cache_extent(extent_cache, &rec->cache);
3992                 free_all_extent_backrefs(rec);
3993                 list_del_init(&rec->list);
3994                 free(rec);
3995         }
3996         return 0;
3997 }
3998
3999 static int check_owner_ref(struct btrfs_root *root,
4000                             struct extent_record *rec,
4001                             struct extent_buffer *buf)
4002 {
4003         struct extent_backref *node;
4004         struct tree_backref *back;
4005         struct btrfs_root *ref_root;
4006         struct btrfs_key key;
4007         struct btrfs_path path;
4008         struct extent_buffer *parent;
4009         int level;
4010         int found = 0;
4011         int ret;
4012
4013         list_for_each_entry(node, &rec->backrefs, list) {
4014                 if (node->is_data)
4015                         continue;
4016                 if (!node->found_ref)
4017                         continue;
4018                 if (node->full_backref)
4019                         continue;
4020                 back = to_tree_backref(node);
4021                 if (btrfs_header_owner(buf) == back->root)
4022                         return 0;
4023         }
4024         BUG_ON(rec->is_root);
4025
4026         /* try to find the block by search corresponding fs tree */
4027         key.objectid = btrfs_header_owner(buf);
4028         key.type = BTRFS_ROOT_ITEM_KEY;
4029         key.offset = (u64)-1;
4030
4031         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4032         if (IS_ERR(ref_root))
4033                 return 1;
4034
4035         level = btrfs_header_level(buf);
4036         if (level == 0)
4037                 btrfs_item_key_to_cpu(buf, &key, 0);
4038         else
4039                 btrfs_node_key_to_cpu(buf, &key, 0);
4040
4041         btrfs_init_path(&path);
4042         path.lowest_level = level + 1;
4043         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4044         if (ret < 0)
4045                 return 0;
4046
4047         parent = path.nodes[level + 1];
4048         if (parent && buf->start == btrfs_node_blockptr(parent,
4049                                                         path.slots[level + 1]))
4050                 found = 1;
4051
4052         btrfs_release_path(&path);
4053         return found ? 0 : 1;
4054 }
4055
4056 static int is_extent_tree_record(struct extent_record *rec)
4057 {
4058         struct list_head *cur = rec->backrefs.next;
4059         struct extent_backref *node;
4060         struct tree_backref *back;
4061         int is_extent = 0;
4062
4063         while(cur != &rec->backrefs) {
4064                 node = to_extent_backref(cur);
4065                 cur = cur->next;
4066                 if (node->is_data)
4067                         return 0;
4068                 back = to_tree_backref(node);
4069                 if (node->full_backref)
4070                         return 0;
4071                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4072                         is_extent = 1;
4073         }
4074         return is_extent;
4075 }
4076
4077
4078 static int record_bad_block_io(struct btrfs_fs_info *info,
4079                                struct cache_tree *extent_cache,
4080                                u64 start, u64 len)
4081 {
4082         struct extent_record *rec;
4083         struct cache_extent *cache;
4084         struct btrfs_key key;
4085
4086         cache = lookup_cache_extent(extent_cache, start, len);
4087         if (!cache)
4088                 return 0;
4089
4090         rec = container_of(cache, struct extent_record, cache);
4091         if (!is_extent_tree_record(rec))
4092                 return 0;
4093
4094         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4095         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4096 }
4097
4098 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4099                        struct extent_buffer *buf, int slot)
4100 {
4101         if (btrfs_header_level(buf)) {
4102                 struct btrfs_key_ptr ptr1, ptr2;
4103
4104                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4105                                    sizeof(struct btrfs_key_ptr));
4106                 read_extent_buffer(buf, &ptr2,
4107                                    btrfs_node_key_ptr_offset(slot + 1),
4108                                    sizeof(struct btrfs_key_ptr));
4109                 write_extent_buffer(buf, &ptr1,
4110                                     btrfs_node_key_ptr_offset(slot + 1),
4111                                     sizeof(struct btrfs_key_ptr));
4112                 write_extent_buffer(buf, &ptr2,
4113                                     btrfs_node_key_ptr_offset(slot),
4114                                     sizeof(struct btrfs_key_ptr));
4115                 if (slot == 0) {
4116                         struct btrfs_disk_key key;
4117                         btrfs_node_key(buf, &key, 0);
4118                         btrfs_fixup_low_keys(root, path, &key,
4119                                              btrfs_header_level(buf) + 1);
4120                 }
4121         } else {
4122                 struct btrfs_item *item1, *item2;
4123                 struct btrfs_key k1, k2;
4124                 char *item1_data, *item2_data;
4125                 u32 item1_offset, item2_offset, item1_size, item2_size;
4126
4127                 item1 = btrfs_item_nr(slot);
4128                 item2 = btrfs_item_nr(slot + 1);
4129                 btrfs_item_key_to_cpu(buf, &k1, slot);
4130                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4131                 item1_offset = btrfs_item_offset(buf, item1);
4132                 item2_offset = btrfs_item_offset(buf, item2);
4133                 item1_size = btrfs_item_size(buf, item1);
4134                 item2_size = btrfs_item_size(buf, item2);
4135
4136                 item1_data = malloc(item1_size);
4137                 if (!item1_data)
4138                         return -ENOMEM;
4139                 item2_data = malloc(item2_size);
4140                 if (!item2_data) {
4141                         free(item1_data);
4142                         return -ENOMEM;
4143                 }
4144
4145                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4146                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4147
4148                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4149                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4150                 free(item1_data);
4151                 free(item2_data);
4152
4153                 btrfs_set_item_offset(buf, item1, item2_offset);
4154                 btrfs_set_item_offset(buf, item2, item1_offset);
4155                 btrfs_set_item_size(buf, item1, item2_size);
4156                 btrfs_set_item_size(buf, item2, item1_size);
4157
4158                 path->slots[0] = slot;
4159                 btrfs_set_item_key_unsafe(root, path, &k2);
4160                 path->slots[0] = slot + 1;
4161                 btrfs_set_item_key_unsafe(root, path, &k1);
4162         }
4163         return 0;
4164 }
4165
4166 static int fix_key_order(struct btrfs_trans_handle *trans,
4167                          struct btrfs_root *root,
4168                          struct btrfs_path *path)
4169 {
4170         struct extent_buffer *buf;
4171         struct btrfs_key k1, k2;
4172         int i;
4173         int level = path->lowest_level;
4174         int ret = -EIO;
4175
4176         buf = path->nodes[level];
4177         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4178                 if (level) {
4179                         btrfs_node_key_to_cpu(buf, &k1, i);
4180                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4181                 } else {
4182                         btrfs_item_key_to_cpu(buf, &k1, i);
4183                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4184                 }
4185                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4186                         continue;
4187                 ret = swap_values(root, path, buf, i);
4188                 if (ret)
4189                         break;
4190                 btrfs_mark_buffer_dirty(buf);
4191                 i = 0;
4192         }
4193         return ret;
4194 }
4195
4196 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4197                              struct btrfs_root *root,
4198                              struct btrfs_path *path,
4199                              struct extent_buffer *buf, int slot)
4200 {
4201         struct btrfs_key key;
4202         int nritems = btrfs_header_nritems(buf);
4203
4204         btrfs_item_key_to_cpu(buf, &key, slot);
4205
4206         /* These are all the keys we can deal with missing. */
4207         if (key.type != BTRFS_DIR_INDEX_KEY &&
4208             key.type != BTRFS_EXTENT_ITEM_KEY &&
4209             key.type != BTRFS_METADATA_ITEM_KEY &&
4210             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4211             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4212                 return -1;
4213
4214         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4215                (unsigned long long)key.objectid, key.type,
4216                (unsigned long long)key.offset, slot, buf->start);
4217         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4218                               btrfs_item_nr_offset(slot + 1),
4219                               sizeof(struct btrfs_item) *
4220                               (nritems - slot - 1));
4221         btrfs_set_header_nritems(buf, nritems - 1);
4222         if (slot == 0) {
4223                 struct btrfs_disk_key disk_key;
4224
4225                 btrfs_item_key(buf, &disk_key, 0);
4226                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4227         }
4228         btrfs_mark_buffer_dirty(buf);
4229         return 0;
4230 }
4231
4232 static int fix_item_offset(struct btrfs_trans_handle *trans,
4233                            struct btrfs_root *root,
4234                            struct btrfs_path *path)
4235 {
4236         struct extent_buffer *buf;
4237         int i;
4238         int ret = 0;
4239
4240         /* We should only get this for leaves */
4241         BUG_ON(path->lowest_level);
4242         buf = path->nodes[0];
4243 again:
4244         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4245                 unsigned int shift = 0, offset;
4246
4247                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4248                     BTRFS_LEAF_DATA_SIZE(root)) {
4249                         if (btrfs_item_end_nr(buf, i) >
4250                             BTRFS_LEAF_DATA_SIZE(root)) {
4251                                 ret = delete_bogus_item(trans, root, path,
4252                                                         buf, i);
4253                                 if (!ret)
4254                                         goto again;
4255                                 fprintf(stderr, "item is off the end of the "
4256                                         "leaf, can't fix\n");
4257                                 ret = -EIO;
4258                                 break;
4259                         }
4260                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4261                                 btrfs_item_end_nr(buf, i);
4262                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4263                            btrfs_item_offset_nr(buf, i - 1)) {
4264                         if (btrfs_item_end_nr(buf, i) >
4265                             btrfs_item_offset_nr(buf, i - 1)) {
4266                                 ret = delete_bogus_item(trans, root, path,
4267                                                         buf, i);
4268                                 if (!ret)
4269                                         goto again;
4270                                 fprintf(stderr, "items overlap, can't fix\n");
4271                                 ret = -EIO;
4272                                 break;
4273                         }
4274                         shift = btrfs_item_offset_nr(buf, i - 1) -
4275                                 btrfs_item_end_nr(buf, i);
4276                 }
4277                 if (!shift)
4278                         continue;
4279
4280                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4281                        i, shift, (unsigned long long)buf->start);
4282                 offset = btrfs_item_offset_nr(buf, i);
4283                 memmove_extent_buffer(buf,
4284                                       btrfs_leaf_data(buf) + offset + shift,
4285                                       btrfs_leaf_data(buf) + offset,
4286                                       btrfs_item_size_nr(buf, i));
4287                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4288                                       offset + shift);
4289                 btrfs_mark_buffer_dirty(buf);
4290         }
4291
4292         /*
4293          * We may have moved things, in which case we want to exit so we don't
4294          * write those changes out.  Once we have proper abort functionality in
4295          * progs this can be changed to something nicer.
4296          */
4297         BUG_ON(ret);
4298         return ret;
4299 }
4300
4301 /*
4302  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4303  * then just return -EIO.
4304  */
4305 static int try_to_fix_bad_block(struct btrfs_root *root,
4306                                 struct extent_buffer *buf,
4307                                 enum btrfs_tree_block_status status)
4308 {
4309         struct btrfs_trans_handle *trans;
4310         struct ulist *roots;
4311         struct ulist_node *node;
4312         struct btrfs_root *search_root;
4313         struct btrfs_path *path;
4314         struct ulist_iterator iter;
4315         struct btrfs_key root_key, key;
4316         int ret;
4317
4318         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4319             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4320                 return -EIO;
4321
4322         path = btrfs_alloc_path();
4323         if (!path)
4324                 return -EIO;
4325
4326         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4327                                    0, &roots);
4328         if (ret) {
4329                 btrfs_free_path(path);
4330                 return -EIO;
4331         }
4332
4333         ULIST_ITER_INIT(&iter);
4334         while ((node = ulist_next(roots, &iter))) {
4335                 root_key.objectid = node->val;
4336                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4337                 root_key.offset = (u64)-1;
4338
4339                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4340                 if (IS_ERR(root)) {
4341                         ret = -EIO;
4342                         break;
4343                 }
4344
4345
4346                 trans = btrfs_start_transaction(search_root, 0);
4347                 if (IS_ERR(trans)) {
4348                         ret = PTR_ERR(trans);
4349                         break;
4350                 }
4351
4352                 path->lowest_level = btrfs_header_level(buf);
4353                 path->skip_check_block = 1;
4354                 if (path->lowest_level)
4355                         btrfs_node_key_to_cpu(buf, &key, 0);
4356                 else
4357                         btrfs_item_key_to_cpu(buf, &key, 0);
4358                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4359                 if (ret) {
4360                         ret = -EIO;
4361                         btrfs_commit_transaction(trans, search_root);
4362                         break;
4363                 }
4364                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4365                         ret = fix_key_order(trans, search_root, path);
4366                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4367                         ret = fix_item_offset(trans, search_root, path);
4368                 if (ret) {
4369                         btrfs_commit_transaction(trans, search_root);
4370                         break;
4371                 }
4372                 btrfs_release_path(path);
4373                 btrfs_commit_transaction(trans, search_root);
4374         }
4375         ulist_free(roots);
4376         btrfs_free_path(path);
4377         return ret;
4378 }
4379
4380 static int check_block(struct btrfs_root *root,
4381                        struct cache_tree *extent_cache,
4382                        struct extent_buffer *buf, u64 flags)
4383 {
4384         struct extent_record *rec;
4385         struct cache_extent *cache;
4386         struct btrfs_key key;
4387         enum btrfs_tree_block_status status;
4388         int ret = 0;
4389         int level;
4390
4391         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4392         if (!cache)
4393                 return 1;
4394         rec = container_of(cache, struct extent_record, cache);
4395         rec->generation = btrfs_header_generation(buf);
4396
4397         level = btrfs_header_level(buf);
4398         if (btrfs_header_nritems(buf) > 0) {
4399
4400                 if (level == 0)
4401                         btrfs_item_key_to_cpu(buf, &key, 0);
4402                 else
4403                         btrfs_node_key_to_cpu(buf, &key, 0);
4404
4405                 rec->info_objectid = key.objectid;
4406         }
4407         rec->info_level = level;
4408
4409         if (btrfs_is_leaf(buf))
4410                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4411         else
4412                 status = btrfs_check_node(root, &rec->parent_key, buf);
4413
4414         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4415                 if (repair)
4416                         status = try_to_fix_bad_block(root, buf, status);
4417                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4418                         ret = -EIO;
4419                         fprintf(stderr, "bad block %llu\n",
4420                                 (unsigned long long)buf->start);
4421                 } else {
4422                         /*
4423                          * Signal to callers we need to start the scan over
4424                          * again since we'll have cowed blocks.
4425                          */
4426                         ret = -EAGAIN;
4427                 }
4428         } else {
4429                 rec->content_checked = 1;
4430                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4431                         rec->owner_ref_checked = 1;
4432                 else {
4433                         ret = check_owner_ref(root, rec, buf);
4434                         if (!ret)
4435                                 rec->owner_ref_checked = 1;
4436                 }
4437         }
4438         if (!ret)
4439                 maybe_free_extent_rec(extent_cache, rec);
4440         return ret;
4441 }
4442
4443 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4444                                                 u64 parent, u64 root)
4445 {
4446         struct list_head *cur = rec->backrefs.next;
4447         struct extent_backref *node;
4448         struct tree_backref *back;
4449
4450         while(cur != &rec->backrefs) {
4451                 node = to_extent_backref(cur);
4452                 cur = cur->next;
4453                 if (node->is_data)
4454                         continue;
4455                 back = to_tree_backref(node);
4456                 if (parent > 0) {
4457                         if (!node->full_backref)
4458                                 continue;
4459                         if (parent == back->parent)
4460                                 return back;
4461                 } else {
4462                         if (node->full_backref)
4463                                 continue;
4464                         if (back->root == root)
4465                                 return back;
4466                 }
4467         }
4468         return NULL;
4469 }
4470
4471 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4472                                                 u64 parent, u64 root)
4473 {
4474         struct tree_backref *ref = malloc(sizeof(*ref));
4475
4476         if (!ref)
4477                 return NULL;
4478         memset(&ref->node, 0, sizeof(ref->node));
4479         if (parent > 0) {
4480                 ref->parent = parent;
4481                 ref->node.full_backref = 1;
4482         } else {
4483                 ref->root = root;
4484                 ref->node.full_backref = 0;
4485         }
4486         list_add_tail(&ref->node.list, &rec->backrefs);
4487
4488         return ref;
4489 }
4490
4491 static struct data_backref *find_data_backref(struct extent_record *rec,
4492                                                 u64 parent, u64 root,
4493                                                 u64 owner, u64 offset,
4494                                                 int found_ref,
4495                                                 u64 disk_bytenr, u64 bytes)
4496 {
4497         struct list_head *cur = rec->backrefs.next;
4498         struct extent_backref *node;
4499         struct data_backref *back;
4500
4501         while(cur != &rec->backrefs) {
4502                 node = to_extent_backref(cur);
4503                 cur = cur->next;
4504                 if (!node->is_data)
4505                         continue;
4506                 back = to_data_backref(node);
4507                 if (parent > 0) {
4508                         if (!node->full_backref)
4509                                 continue;
4510                         if (parent == back->parent)
4511                                 return back;
4512                 } else {
4513                         if (node->full_backref)
4514                                 continue;
4515                         if (back->root == root && back->owner == owner &&
4516                             back->offset == offset) {
4517                                 if (found_ref && node->found_ref &&
4518                                     (back->bytes != bytes ||
4519                                     back->disk_bytenr != disk_bytenr))
4520                                         continue;
4521                                 return back;
4522                         }
4523                 }
4524         }
4525         return NULL;
4526 }
4527
4528 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root,
4530                                                 u64 owner, u64 offset,
4531                                                 u64 max_size)
4532 {
4533         struct data_backref *ref = malloc(sizeof(*ref));
4534
4535         if (!ref)
4536                 return NULL;
4537         memset(&ref->node, 0, sizeof(ref->node));
4538         ref->node.is_data = 1;
4539
4540         if (parent > 0) {
4541                 ref->parent = parent;
4542                 ref->owner = 0;
4543                 ref->offset = 0;
4544                 ref->node.full_backref = 1;
4545         } else {
4546                 ref->root = root;
4547                 ref->owner = owner;
4548                 ref->offset = offset;
4549                 ref->node.full_backref = 0;
4550         }
4551         ref->bytes = max_size;
4552         ref->found_ref = 0;
4553         ref->num_refs = 0;
4554         list_add_tail(&ref->node.list, &rec->backrefs);
4555         if (max_size > rec->max_size)
4556                 rec->max_size = max_size;
4557         return ref;
4558 }
4559
4560 /* Check if the type of extent matches with its chunk */
4561 static void check_extent_type(struct extent_record *rec)
4562 {
4563         struct btrfs_block_group_cache *bg_cache;
4564
4565         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4566         if (!bg_cache)
4567                 return;
4568
4569         /* data extent, check chunk directly*/
4570         if (!rec->metadata) {
4571                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4572                         rec->wrong_chunk_type = 1;
4573                 return;
4574         }
4575
4576         /* metadata extent, check the obvious case first */
4577         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4578                                  BTRFS_BLOCK_GROUP_METADATA))) {
4579                 rec->wrong_chunk_type = 1;
4580                 return;
4581         }
4582
4583         /*
4584          * Check SYSTEM extent, as it's also marked as metadata, we can only
4585          * make sure it's a SYSTEM extent by its backref
4586          */
4587         if (!list_empty(&rec->backrefs)) {
4588                 struct extent_backref *node;
4589                 struct tree_backref *tback;
4590                 u64 bg_type;
4591
4592                 node = to_extent_backref(rec->backrefs.next);
4593                 if (node->is_data) {
4594                         /* tree block shouldn't have data backref */
4595                         rec->wrong_chunk_type = 1;
4596                         return;
4597                 }
4598                 tback = container_of(node, struct tree_backref, node);
4599
4600                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4601                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4602                 else
4603                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4604                 if (!(bg_cache->flags & bg_type))
4605                         rec->wrong_chunk_type = 1;
4606         }
4607 }
4608
4609 /*
4610  * Allocate a new extent record, fill default values from @tmpl and insert int
4611  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4612  * the cache, otherwise it fails.
4613  */
4614 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4615                 struct extent_record *tmpl)
4616 {
4617         struct extent_record *rec;
4618         int ret = 0;
4619
4620         rec = malloc(sizeof(*rec));
4621         if (!rec)
4622                 return -ENOMEM;
4623         rec->start = tmpl->start;
4624         rec->max_size = tmpl->max_size;
4625         rec->nr = max(tmpl->nr, tmpl->max_size);
4626         rec->found_rec = tmpl->found_rec;
4627         rec->content_checked = tmpl->content_checked;
4628         rec->owner_ref_checked = tmpl->owner_ref_checked;
4629         rec->num_duplicates = 0;
4630         rec->metadata = tmpl->metadata;
4631         rec->flag_block_full_backref = FLAG_UNSET;
4632         rec->bad_full_backref = 0;
4633         rec->crossing_stripes = 0;
4634         rec->wrong_chunk_type = 0;
4635         rec->is_root = tmpl->is_root;
4636         rec->refs = tmpl->refs;
4637         rec->extent_item_refs = tmpl->extent_item_refs;
4638         rec->parent_generation = tmpl->parent_generation;
4639         INIT_LIST_HEAD(&rec->backrefs);
4640         INIT_LIST_HEAD(&rec->dups);
4641         INIT_LIST_HEAD(&rec->list);
4642         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4643         rec->cache.start = tmpl->start;
4644         rec->cache.size = tmpl->nr;
4645         ret = insert_cache_extent(extent_cache, &rec->cache);
4646         BUG_ON(ret);
4647         bytes_used += rec->nr;
4648
4649         if (tmpl->metadata)
4650                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4651                                 global_info->tree_root->nodesize);
4652         check_extent_type(rec);
4653         return ret;
4654 }
4655
4656 /*
4657  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4658  * some are hints:
4659  * - refs              - if found, increase refs
4660  * - is_root           - if found, set
4661  * - content_checked   - if found, set
4662  * - owner_ref_checked - if found, set
4663  *
4664  * If not found, create a new one, initialize and insert.
4665  */
4666 static int add_extent_rec(struct cache_tree *extent_cache,
4667                 struct extent_record *tmpl)
4668 {
4669         struct extent_record *rec;
4670         struct cache_extent *cache;
4671         int ret = 0;
4672         int dup = 0;
4673
4674         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4675         if (cache) {
4676                 rec = container_of(cache, struct extent_record, cache);
4677                 if (tmpl->refs)
4678                         rec->refs++;
4679                 if (rec->nr == 1)
4680                         rec->nr = max(tmpl->nr, tmpl->max_size);
4681
4682                 /*
4683                  * We need to make sure to reset nr to whatever the extent
4684                  * record says was the real size, this way we can compare it to
4685                  * the backrefs.
4686                  */
4687                 if (tmpl->found_rec) {
4688                         if (tmpl->start != rec->start || rec->found_rec) {
4689                                 struct extent_record *tmp;
4690
4691                                 dup = 1;
4692                                 if (list_empty(&rec->list))
4693                                         list_add_tail(&rec->list,
4694                                                       &duplicate_extents);
4695
4696                                 /*
4697                                  * We have to do this song and dance in case we
4698                                  * find an extent record that falls inside of
4699                                  * our current extent record but does not have
4700                                  * the same objectid.
4701                                  */
4702                                 tmp = malloc(sizeof(*tmp));
4703                                 if (!tmp)
4704                                         return -ENOMEM;
4705                                 tmp->start = tmpl->start;
4706                                 tmp->max_size = tmpl->max_size;
4707                                 tmp->nr = tmpl->nr;
4708                                 tmp->found_rec = 1;
4709                                 tmp->metadata = tmpl->metadata;
4710                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4711                                 INIT_LIST_HEAD(&tmp->list);
4712                                 list_add_tail(&tmp->list, &rec->dups);
4713                                 rec->num_duplicates++;
4714                         } else {
4715                                 rec->nr = tmpl->nr;
4716                                 rec->found_rec = 1;
4717                         }
4718                 }
4719
4720                 if (tmpl->extent_item_refs && !dup) {
4721                         if (rec->extent_item_refs) {
4722                                 fprintf(stderr, "block %llu rec "
4723                                         "extent_item_refs %llu, passed %llu\n",
4724                                         (unsigned long long)tmpl->start,
4725                                         (unsigned long long)
4726                                                         rec->extent_item_refs,
4727                                         (unsigned long long)tmpl->extent_item_refs);
4728                         }
4729                         rec->extent_item_refs = tmpl->extent_item_refs;
4730                 }
4731                 if (tmpl->is_root)
4732                         rec->is_root = 1;
4733                 if (tmpl->content_checked)
4734                         rec->content_checked = 1;
4735                 if (tmpl->owner_ref_checked)
4736                         rec->owner_ref_checked = 1;
4737                 memcpy(&rec->parent_key, &tmpl->parent_key,
4738                                 sizeof(tmpl->parent_key));
4739                 if (tmpl->parent_generation)
4740                         rec->parent_generation = tmpl->parent_generation;
4741                 if (rec->max_size < tmpl->max_size)
4742                         rec->max_size = tmpl->max_size;
4743
4744                 /*
4745                  * A metadata extent can't cross stripe_len boundary, otherwise
4746                  * kernel scrub won't be able to handle it.
4747                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4748                  * it.
4749                  */
4750                 if (tmpl->metadata)
4751                         rec->crossing_stripes = check_crossing_stripes(
4752                                 rec->start, global_info->tree_root->nodesize);
4753                 check_extent_type(rec);
4754                 maybe_free_extent_rec(extent_cache, rec);
4755                 return ret;
4756         }
4757
4758         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4759
4760         return ret;
4761 }
4762
4763 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4764                             u64 parent, u64 root, int found_ref)
4765 {
4766         struct extent_record *rec;
4767         struct tree_backref *back;
4768         struct cache_extent *cache;
4769
4770         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4771         if (!cache) {
4772                 struct extent_record tmpl;
4773
4774                 memset(&tmpl, 0, sizeof(tmpl));
4775                 tmpl.start = bytenr;
4776                 tmpl.nr = 1;
4777                 tmpl.metadata = 1;
4778
4779                 add_extent_rec_nolookup(extent_cache, &tmpl);
4780
4781                 /* really a bug in cache_extent implement now */
4782                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4783                 if (!cache)
4784                         return -ENOENT;
4785         }
4786
4787         rec = container_of(cache, struct extent_record, cache);
4788         if (rec->start != bytenr) {
4789                 /*
4790                  * Several cause, from unaligned bytenr to over lapping extents
4791                  */
4792                 return -EEXIST;
4793         }
4794
4795         back = find_tree_backref(rec, parent, root);
4796         if (!back) {
4797                 back = alloc_tree_backref(rec, parent, root);
4798                 if (!back)
4799                         return -ENOMEM;
4800         }
4801
4802         if (found_ref) {
4803                 if (back->node.found_ref) {
4804                         fprintf(stderr, "Extent back ref already exists "
4805                                 "for %llu parent %llu root %llu \n",
4806                                 (unsigned long long)bytenr,
4807                                 (unsigned long long)parent,
4808                                 (unsigned long long)root);
4809                 }
4810                 back->node.found_ref = 1;
4811         } else {
4812                 if (back->node.found_extent_tree) {
4813                         fprintf(stderr, "Extent back ref already exists "
4814                                 "for %llu parent %llu root %llu \n",
4815                                 (unsigned long long)bytenr,
4816                                 (unsigned long long)parent,
4817                                 (unsigned long long)root);
4818                 }
4819                 back->node.found_extent_tree = 1;
4820         }
4821         check_extent_type(rec);
4822         maybe_free_extent_rec(extent_cache, rec);
4823         return 0;
4824 }
4825
4826 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4827                             u64 parent, u64 root, u64 owner, u64 offset,
4828                             u32 num_refs, int found_ref, u64 max_size)
4829 {
4830         struct extent_record *rec;
4831         struct data_backref *back;
4832         struct cache_extent *cache;
4833
4834         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4835         if (!cache) {
4836                 struct extent_record tmpl;
4837
4838                 memset(&tmpl, 0, sizeof(tmpl));
4839                 tmpl.start = bytenr;
4840                 tmpl.nr = 1;
4841                 tmpl.max_size = max_size;
4842
4843                 add_extent_rec_nolookup(extent_cache, &tmpl);
4844
4845                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4846                 if (!cache)
4847                         abort();
4848         }
4849
4850         rec = container_of(cache, struct extent_record, cache);
4851         if (rec->max_size < max_size)
4852                 rec->max_size = max_size;
4853
4854         /*
4855          * If found_ref is set then max_size is the real size and must match the
4856          * existing refs.  So if we have already found a ref then we need to
4857          * make sure that this ref matches the existing one, otherwise we need
4858          * to add a new backref so we can notice that the backrefs don't match
4859          * and we need to figure out who is telling the truth.  This is to
4860          * account for that awful fsync bug I introduced where we'd end up with
4861          * a btrfs_file_extent_item that would have its length include multiple
4862          * prealloc extents or point inside of a prealloc extent.
4863          */
4864         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4865                                  bytenr, max_size);
4866         if (!back) {
4867                 back = alloc_data_backref(rec, parent, root, owner, offset,
4868                                           max_size);
4869                 BUG_ON(!back);
4870         }
4871
4872         if (found_ref) {
4873                 BUG_ON(num_refs != 1);
4874                 if (back->node.found_ref)
4875                         BUG_ON(back->bytes != max_size);
4876                 back->node.found_ref = 1;
4877                 back->found_ref += 1;
4878                 back->bytes = max_size;
4879                 back->disk_bytenr = bytenr;
4880                 rec->refs += 1;
4881                 rec->content_checked = 1;
4882                 rec->owner_ref_checked = 1;
4883         } else {
4884                 if (back->node.found_extent_tree) {
4885                         fprintf(stderr, "Extent back ref already exists "
4886                                 "for %llu parent %llu root %llu "
4887                                 "owner %llu offset %llu num_refs %lu\n",
4888                                 (unsigned long long)bytenr,
4889                                 (unsigned long long)parent,
4890                                 (unsigned long long)root,
4891                                 (unsigned long long)owner,
4892                                 (unsigned long long)offset,
4893                                 (unsigned long)num_refs);
4894                 }
4895                 back->num_refs = num_refs;
4896                 back->node.found_extent_tree = 1;
4897         }
4898         maybe_free_extent_rec(extent_cache, rec);
4899         return 0;
4900 }
4901
4902 static int add_pending(struct cache_tree *pending,
4903                        struct cache_tree *seen, u64 bytenr, u32 size)
4904 {
4905         int ret;
4906         ret = add_cache_extent(seen, bytenr, size);
4907         if (ret)
4908                 return ret;
4909         add_cache_extent(pending, bytenr, size);
4910         return 0;
4911 }
4912
4913 static int pick_next_pending(struct cache_tree *pending,
4914                         struct cache_tree *reada,
4915                         struct cache_tree *nodes,
4916                         u64 last, struct block_info *bits, int bits_nr,
4917                         int *reada_bits)
4918 {
4919         unsigned long node_start = last;
4920         struct cache_extent *cache;
4921         int ret;
4922
4923         cache = search_cache_extent(reada, 0);
4924         if (cache) {
4925                 bits[0].start = cache->start;
4926                 bits[0].size = cache->size;
4927                 *reada_bits = 1;
4928                 return 1;
4929         }
4930         *reada_bits = 0;
4931         if (node_start > 32768)
4932                 node_start -= 32768;
4933
4934         cache = search_cache_extent(nodes, node_start);
4935         if (!cache)
4936                 cache = search_cache_extent(nodes, 0);
4937
4938         if (!cache) {
4939                  cache = search_cache_extent(pending, 0);
4940                  if (!cache)
4941                          return 0;
4942                  ret = 0;
4943                  do {
4944                          bits[ret].start = cache->start;
4945                          bits[ret].size = cache->size;
4946                          cache = next_cache_extent(cache);
4947                          ret++;
4948                  } while (cache && ret < bits_nr);
4949                  return ret;
4950         }
4951
4952         ret = 0;
4953         do {
4954                 bits[ret].start = cache->start;
4955                 bits[ret].size = cache->size;
4956                 cache = next_cache_extent(cache);
4957                 ret++;
4958         } while (cache && ret < bits_nr);
4959
4960         if (bits_nr - ret > 8) {
4961                 u64 lookup = bits[0].start + bits[0].size;
4962                 struct cache_extent *next;
4963                 next = search_cache_extent(pending, lookup);
4964                 while(next) {
4965                         if (next->start - lookup > 32768)
4966                                 break;
4967                         bits[ret].start = next->start;
4968                         bits[ret].size = next->size;
4969                         lookup = next->start + next->size;
4970                         ret++;
4971                         if (ret == bits_nr)
4972                                 break;
4973                         next = next_cache_extent(next);
4974                         if (!next)
4975                                 break;
4976                 }
4977         }
4978         return ret;
4979 }
4980
4981 static void free_chunk_record(struct cache_extent *cache)
4982 {
4983         struct chunk_record *rec;
4984
4985         rec = container_of(cache, struct chunk_record, cache);
4986         list_del_init(&rec->list);
4987         list_del_init(&rec->dextents);
4988         free(rec);
4989 }
4990
4991 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4992 {
4993         cache_tree_free_extents(chunk_cache, free_chunk_record);
4994 }
4995
4996 static void free_device_record(struct rb_node *node)
4997 {
4998         struct device_record *rec;
4999
5000         rec = container_of(node, struct device_record, node);
5001         free(rec);
5002 }
5003
5004 FREE_RB_BASED_TREE(device_cache, free_device_record);
5005
5006 int insert_block_group_record(struct block_group_tree *tree,
5007                               struct block_group_record *bg_rec)
5008 {
5009         int ret;
5010
5011         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5012         if (ret)
5013                 return ret;
5014
5015         list_add_tail(&bg_rec->list, &tree->block_groups);
5016         return 0;
5017 }
5018
5019 static void free_block_group_record(struct cache_extent *cache)
5020 {
5021         struct block_group_record *rec;
5022
5023         rec = container_of(cache, struct block_group_record, cache);
5024         list_del_init(&rec->list);
5025         free(rec);
5026 }
5027
5028 void free_block_group_tree(struct block_group_tree *tree)
5029 {
5030         cache_tree_free_extents(&tree->tree, free_block_group_record);
5031 }
5032
5033 int insert_device_extent_record(struct device_extent_tree *tree,
5034                                 struct device_extent_record *de_rec)
5035 {
5036         int ret;
5037
5038         /*
5039          * Device extent is a bit different from the other extents, because
5040          * the extents which belong to the different devices may have the
5041          * same start and size, so we need use the special extent cache
5042          * search/insert functions.
5043          */
5044         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5045         if (ret)
5046                 return ret;
5047
5048         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5049         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5050         return 0;
5051 }
5052
5053 static void free_device_extent_record(struct cache_extent *cache)
5054 {
5055         struct device_extent_record *rec;
5056
5057         rec = container_of(cache, struct device_extent_record, cache);
5058         if (!list_empty(&rec->chunk_list))
5059                 list_del_init(&rec->chunk_list);
5060         if (!list_empty(&rec->device_list))
5061                 list_del_init(&rec->device_list);
5062         free(rec);
5063 }
5064
5065 void free_device_extent_tree(struct device_extent_tree *tree)
5066 {
5067         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5068 }
5069
5070 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5071 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5072                                  struct extent_buffer *leaf, int slot)
5073 {
5074         struct btrfs_extent_ref_v0 *ref0;
5075         struct btrfs_key key;
5076         int ret;
5077
5078         btrfs_item_key_to_cpu(leaf, &key, slot);
5079         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5080         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5081                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5082                                 0, 0);
5083         } else {
5084                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5085                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5086         }
5087         return ret;
5088 }
5089 #endif
5090
5091 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5092                                             struct btrfs_key *key,
5093                                             int slot)
5094 {
5095         struct btrfs_chunk *ptr;
5096         struct chunk_record *rec;
5097         int num_stripes, i;
5098
5099         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5100         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5101
5102         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5103         if (!rec) {
5104                 fprintf(stderr, "memory allocation failed\n");
5105                 exit(-1);
5106         }
5107
5108         INIT_LIST_HEAD(&rec->list);
5109         INIT_LIST_HEAD(&rec->dextents);
5110         rec->bg_rec = NULL;
5111
5112         rec->cache.start = key->offset;
5113         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5114
5115         rec->generation = btrfs_header_generation(leaf);
5116
5117         rec->objectid = key->objectid;
5118         rec->type = key->type;
5119         rec->offset = key->offset;
5120
5121         rec->length = rec->cache.size;
5122         rec->owner = btrfs_chunk_owner(leaf, ptr);
5123         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5124         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5125         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5126         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5127         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5128         rec->num_stripes = num_stripes;
5129         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5130
5131         for (i = 0; i < rec->num_stripes; ++i) {
5132                 rec->stripes[i].devid =
5133                         btrfs_stripe_devid_nr(leaf, ptr, i);
5134                 rec->stripes[i].offset =
5135                         btrfs_stripe_offset_nr(leaf, ptr, i);
5136                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5137                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5138                                 BTRFS_UUID_SIZE);
5139         }
5140
5141         return rec;
5142 }
5143
5144 static int process_chunk_item(struct cache_tree *chunk_cache,
5145                               struct btrfs_key *key, struct extent_buffer *eb,
5146                               int slot)
5147 {
5148         struct chunk_record *rec;
5149         struct btrfs_chunk *chunk;
5150         int ret = 0;
5151
5152         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5153         /*
5154          * Do extra check for this chunk item,
5155          *
5156          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5157          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5158          * and owner<->key_type check.
5159          */
5160         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5161                                       key->offset);
5162         if (ret < 0) {
5163                 error("chunk(%llu, %llu) is not valid, ignore it",
5164                       key->offset, btrfs_chunk_length(eb, chunk));
5165                 return 0;
5166         }
5167         rec = btrfs_new_chunk_record(eb, key, slot);
5168         ret = insert_cache_extent(chunk_cache, &rec->cache);
5169         if (ret) {
5170                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5171                         rec->offset, rec->length);
5172                 free(rec);
5173         }
5174
5175         return ret;
5176 }
5177
5178 static int process_device_item(struct rb_root *dev_cache,
5179                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5180 {
5181         struct btrfs_dev_item *ptr;
5182         struct device_record *rec;
5183         int ret = 0;
5184
5185         ptr = btrfs_item_ptr(eb,
5186                 slot, struct btrfs_dev_item);
5187
5188         rec = malloc(sizeof(*rec));
5189         if (!rec) {
5190                 fprintf(stderr, "memory allocation failed\n");
5191                 return -ENOMEM;
5192         }
5193
5194         rec->devid = key->offset;
5195         rec->generation = btrfs_header_generation(eb);
5196
5197         rec->objectid = key->objectid;
5198         rec->type = key->type;
5199         rec->offset = key->offset;
5200
5201         rec->devid = btrfs_device_id(eb, ptr);
5202         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5203         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5204
5205         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5206         if (ret) {
5207                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5208                 free(rec);
5209         }
5210
5211         return ret;
5212 }
5213
5214 struct block_group_record *
5215 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5216                              int slot)
5217 {
5218         struct btrfs_block_group_item *ptr;
5219         struct block_group_record *rec;
5220
5221         rec = calloc(1, sizeof(*rec));
5222         if (!rec) {
5223                 fprintf(stderr, "memory allocation failed\n");
5224                 exit(-1);
5225         }
5226
5227         rec->cache.start = key->objectid;
5228         rec->cache.size = key->offset;
5229
5230         rec->generation = btrfs_header_generation(leaf);
5231
5232         rec->objectid = key->objectid;
5233         rec->type = key->type;
5234         rec->offset = key->offset;
5235
5236         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5237         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5238
5239         INIT_LIST_HEAD(&rec->list);
5240
5241         return rec;
5242 }
5243
5244 static int process_block_group_item(struct block_group_tree *block_group_cache,
5245                                     struct btrfs_key *key,
5246                                     struct extent_buffer *eb, int slot)
5247 {
5248         struct block_group_record *rec;
5249         int ret = 0;
5250
5251         rec = btrfs_new_block_group_record(eb, key, slot);
5252         ret = insert_block_group_record(block_group_cache, rec);
5253         if (ret) {
5254                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5255                         rec->objectid, rec->offset);
5256                 free(rec);
5257         }
5258
5259         return ret;
5260 }
5261
5262 struct device_extent_record *
5263 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5264                                struct btrfs_key *key, int slot)
5265 {
5266         struct device_extent_record *rec;
5267         struct btrfs_dev_extent *ptr;
5268
5269         rec = calloc(1, sizeof(*rec));
5270         if (!rec) {
5271                 fprintf(stderr, "memory allocation failed\n");
5272                 exit(-1);
5273         }
5274
5275         rec->cache.objectid = key->objectid;
5276         rec->cache.start = key->offset;
5277
5278         rec->generation = btrfs_header_generation(leaf);
5279
5280         rec->objectid = key->objectid;
5281         rec->type = key->type;
5282         rec->offset = key->offset;
5283
5284         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5285         rec->chunk_objecteid =
5286                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5287         rec->chunk_offset =
5288                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5289         rec->length = btrfs_dev_extent_length(leaf, ptr);
5290         rec->cache.size = rec->length;
5291
5292         INIT_LIST_HEAD(&rec->chunk_list);
5293         INIT_LIST_HEAD(&rec->device_list);
5294
5295         return rec;
5296 }
5297
5298 static int
5299 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5300                            struct btrfs_key *key, struct extent_buffer *eb,
5301                            int slot)
5302 {
5303         struct device_extent_record *rec;
5304         int ret;
5305
5306         rec = btrfs_new_device_extent_record(eb, key, slot);
5307         ret = insert_device_extent_record(dev_extent_cache, rec);
5308         if (ret) {
5309                 fprintf(stderr,
5310                         "Device extent[%llu, %llu, %llu] existed.\n",
5311                         rec->objectid, rec->offset, rec->length);
5312                 free(rec);
5313         }
5314
5315         return ret;
5316 }
5317
5318 static int process_extent_item(struct btrfs_root *root,
5319                                struct cache_tree *extent_cache,
5320                                struct extent_buffer *eb, int slot)
5321 {
5322         struct btrfs_extent_item *ei;
5323         struct btrfs_extent_inline_ref *iref;
5324         struct btrfs_extent_data_ref *dref;
5325         struct btrfs_shared_data_ref *sref;
5326         struct btrfs_key key;
5327         struct extent_record tmpl;
5328         unsigned long end;
5329         unsigned long ptr;
5330         int ret;
5331         int type;
5332         u32 item_size = btrfs_item_size_nr(eb, slot);
5333         u64 refs = 0;
5334         u64 offset;
5335         u64 num_bytes;
5336         int metadata = 0;
5337
5338         btrfs_item_key_to_cpu(eb, &key, slot);
5339
5340         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5341                 metadata = 1;
5342                 num_bytes = root->nodesize;
5343         } else {
5344                 num_bytes = key.offset;
5345         }
5346
5347         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5348                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5349                       key.objectid, root->sectorsize);
5350                 return -EIO;
5351         }
5352         if (item_size < sizeof(*ei)) {
5353 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5354                 struct btrfs_extent_item_v0 *ei0;
5355                 BUG_ON(item_size != sizeof(*ei0));
5356                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5357                 refs = btrfs_extent_refs_v0(eb, ei0);
5358 #else
5359                 BUG();
5360 #endif
5361                 memset(&tmpl, 0, sizeof(tmpl));
5362                 tmpl.start = key.objectid;
5363                 tmpl.nr = num_bytes;
5364                 tmpl.extent_item_refs = refs;
5365                 tmpl.metadata = metadata;
5366                 tmpl.found_rec = 1;
5367                 tmpl.max_size = num_bytes;
5368
5369                 return add_extent_rec(extent_cache, &tmpl);
5370         }
5371
5372         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5373         refs = btrfs_extent_refs(eb, ei);
5374         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5375                 metadata = 1;
5376         else
5377                 metadata = 0;
5378         if (metadata && num_bytes != root->nodesize) {
5379                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5380                       num_bytes, root->nodesize);
5381                 return -EIO;
5382         }
5383         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5384                 error("ignore invalid data extent, length %llu is not aligned to %u",
5385                       num_bytes, root->sectorsize);
5386                 return -EIO;
5387         }
5388
5389         memset(&tmpl, 0, sizeof(tmpl));
5390         tmpl.start = key.objectid;
5391         tmpl.nr = num_bytes;
5392         tmpl.extent_item_refs = refs;
5393         tmpl.metadata = metadata;
5394         tmpl.found_rec = 1;
5395         tmpl.max_size = num_bytes;
5396         add_extent_rec(extent_cache, &tmpl);
5397
5398         ptr = (unsigned long)(ei + 1);
5399         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5400             key.type == BTRFS_EXTENT_ITEM_KEY)
5401                 ptr += sizeof(struct btrfs_tree_block_info);
5402
5403         end = (unsigned long)ei + item_size;
5404         while (ptr < end) {
5405                 iref = (struct btrfs_extent_inline_ref *)ptr;
5406                 type = btrfs_extent_inline_ref_type(eb, iref);
5407                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5408                 switch (type) {
5409                 case BTRFS_TREE_BLOCK_REF_KEY:
5410                         ret = add_tree_backref(extent_cache, key.objectid,
5411                                         0, offset, 0);
5412                         if (ret < 0)
5413                                 error("add_tree_backref failed: %s",
5414                                       strerror(-ret));
5415                         break;
5416                 case BTRFS_SHARED_BLOCK_REF_KEY:
5417                         ret = add_tree_backref(extent_cache, key.objectid,
5418                                         offset, 0, 0);
5419                         if (ret < 0)
5420                                 error("add_tree_backref failed: %s",
5421                                       strerror(-ret));
5422                         break;
5423                 case BTRFS_EXTENT_DATA_REF_KEY:
5424                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5425                         add_data_backref(extent_cache, key.objectid, 0,
5426                                         btrfs_extent_data_ref_root(eb, dref),
5427                                         btrfs_extent_data_ref_objectid(eb,
5428                                                                        dref),
5429                                         btrfs_extent_data_ref_offset(eb, dref),
5430                                         btrfs_extent_data_ref_count(eb, dref),
5431                                         0, num_bytes);
5432                         break;
5433                 case BTRFS_SHARED_DATA_REF_KEY:
5434                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5435                         add_data_backref(extent_cache, key.objectid, offset,
5436                                         0, 0, 0,
5437                                         btrfs_shared_data_ref_count(eb, sref),
5438                                         0, num_bytes);
5439                         break;
5440                 default:
5441                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5442                                 key.objectid, key.type, num_bytes);
5443                         goto out;
5444                 }
5445                 ptr += btrfs_extent_inline_ref_size(type);
5446         }
5447         WARN_ON(ptr > end);
5448 out:
5449         return 0;
5450 }
5451
5452 static int check_cache_range(struct btrfs_root *root,
5453                              struct btrfs_block_group_cache *cache,
5454                              u64 offset, u64 bytes)
5455 {
5456         struct btrfs_free_space *entry;
5457         u64 *logical;
5458         u64 bytenr;
5459         int stripe_len;
5460         int i, nr, ret;
5461
5462         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5463                 bytenr = btrfs_sb_offset(i);
5464                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5465                                        cache->key.objectid, bytenr, 0,
5466                                        &logical, &nr, &stripe_len);
5467                 if (ret)
5468                         return ret;
5469
5470                 while (nr--) {
5471                         if (logical[nr] + stripe_len <= offset)
5472                                 continue;
5473                         if (offset + bytes <= logical[nr])
5474                                 continue;
5475                         if (logical[nr] == offset) {
5476                                 if (stripe_len >= bytes) {
5477                                         kfree(logical);
5478                                         return 0;
5479                                 }
5480                                 bytes -= stripe_len;
5481                                 offset += stripe_len;
5482                         } else if (logical[nr] < offset) {
5483                                 if (logical[nr] + stripe_len >=
5484                                     offset + bytes) {
5485                                         kfree(logical);
5486                                         return 0;
5487                                 }
5488                                 bytes = (offset + bytes) -
5489                                         (logical[nr] + stripe_len);
5490                                 offset = logical[nr] + stripe_len;
5491                         } else {
5492                                 /*
5493                                  * Could be tricky, the super may land in the
5494                                  * middle of the area we're checking.  First
5495                                  * check the easiest case, it's at the end.
5496                                  */
5497                                 if (logical[nr] + stripe_len >=
5498                                     bytes + offset) {
5499                                         bytes = logical[nr] - offset;
5500                                         continue;
5501                                 }
5502
5503                                 /* Check the left side */
5504                                 ret = check_cache_range(root, cache,
5505                                                         offset,
5506                                                         logical[nr] - offset);
5507                                 if (ret) {
5508                                         kfree(logical);
5509                                         return ret;
5510                                 }
5511
5512                                 /* Now we continue with the right side */
5513                                 bytes = (offset + bytes) -
5514                                         (logical[nr] + stripe_len);
5515                                 offset = logical[nr] + stripe_len;
5516                         }
5517                 }
5518
5519                 kfree(logical);
5520         }
5521
5522         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5523         if (!entry) {
5524                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5525                         offset, offset+bytes);
5526                 return -EINVAL;
5527         }
5528
5529         if (entry->offset != offset) {
5530                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5531                         entry->offset);
5532                 return -EINVAL;
5533         }
5534
5535         if (entry->bytes != bytes) {
5536                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5537                         bytes, entry->bytes, offset);
5538                 return -EINVAL;
5539         }
5540
5541         unlink_free_space(cache->free_space_ctl, entry);
5542         free(entry);
5543         return 0;
5544 }
5545
5546 static int verify_space_cache(struct btrfs_root *root,
5547                               struct btrfs_block_group_cache *cache)
5548 {
5549         struct btrfs_path *path;
5550         struct extent_buffer *leaf;
5551         struct btrfs_key key;
5552         u64 last;
5553         int ret = 0;
5554
5555         path = btrfs_alloc_path();
5556         if (!path)
5557                 return -ENOMEM;
5558
5559         root = root->fs_info->extent_root;
5560
5561         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5562
5563         key.objectid = last;
5564         key.offset = 0;
5565         key.type = BTRFS_EXTENT_ITEM_KEY;
5566
5567         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5568         if (ret < 0)
5569                 goto out;
5570         ret = 0;
5571         while (1) {
5572                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5573                         ret = btrfs_next_leaf(root, path);
5574                         if (ret < 0)
5575                                 goto out;
5576                         if (ret > 0) {
5577                                 ret = 0;
5578                                 break;
5579                         }
5580                 }
5581                 leaf = path->nodes[0];
5582                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5583                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5584                         break;
5585                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5586                     key.type != BTRFS_METADATA_ITEM_KEY) {
5587                         path->slots[0]++;
5588                         continue;
5589                 }
5590
5591                 if (last == key.objectid) {
5592                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5593                                 last = key.objectid + key.offset;
5594                         else
5595                                 last = key.objectid + root->nodesize;
5596                         path->slots[0]++;
5597                         continue;
5598                 }
5599
5600                 ret = check_cache_range(root, cache, last,
5601                                         key.objectid - last);
5602                 if (ret)
5603                         break;
5604                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5605                         last = key.objectid + key.offset;
5606                 else
5607                         last = key.objectid + root->nodesize;
5608                 path->slots[0]++;
5609         }
5610
5611         if (last < cache->key.objectid + cache->key.offset)
5612                 ret = check_cache_range(root, cache, last,
5613                                         cache->key.objectid +
5614                                         cache->key.offset - last);
5615
5616 out:
5617         btrfs_free_path(path);
5618
5619         if (!ret &&
5620             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5621                 fprintf(stderr, "There are still entries left in the space "
5622                         "cache\n");
5623                 ret = -EINVAL;
5624         }
5625
5626         return ret;
5627 }
5628
5629 static int check_space_cache(struct btrfs_root *root)
5630 {
5631         struct btrfs_block_group_cache *cache;
5632         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5633         int ret;
5634         int error = 0;
5635
5636         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5637             btrfs_super_generation(root->fs_info->super_copy) !=
5638             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5639                 printf("cache and super generation don't match, space cache "
5640                        "will be invalidated\n");
5641                 return 0;
5642         }
5643
5644         if (ctx.progress_enabled) {
5645                 ctx.tp = TASK_FREE_SPACE;
5646                 task_start(ctx.info);
5647         }
5648
5649         while (1) {
5650                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5651                 if (!cache)
5652                         break;
5653
5654                 start = cache->key.objectid + cache->key.offset;
5655                 if (!cache->free_space_ctl) {
5656                         if (btrfs_init_free_space_ctl(cache,
5657                                                       root->sectorsize)) {
5658                                 ret = -ENOMEM;
5659                                 break;
5660                         }
5661                 } else {
5662                         btrfs_remove_free_space_cache(cache);
5663                 }
5664
5665                 if (btrfs_fs_compat_ro(root->fs_info,
5666                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5667                         ret = exclude_super_stripes(root, cache);
5668                         if (ret) {
5669                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5670                                         strerror(-ret));
5671                                 error++;
5672                                 continue;
5673                         }
5674                         ret = load_free_space_tree(root->fs_info, cache);
5675                         free_excluded_extents(root, cache);
5676                         if (ret < 0) {
5677                                 fprintf(stderr, "could not load free space tree: %s\n",
5678                                         strerror(-ret));
5679                                 error++;
5680                                 continue;
5681                         }
5682                         error += ret;
5683                 } else {
5684                         ret = load_free_space_cache(root->fs_info, cache);
5685                         if (!ret)
5686                                 continue;
5687                 }
5688
5689                 ret = verify_space_cache(root, cache);
5690                 if (ret) {
5691                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5692                                 cache->key.objectid);
5693                         error++;
5694                 }
5695         }
5696
5697         task_stop(ctx.info);
5698
5699         return error ? -EINVAL : 0;
5700 }
5701
5702 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5703                         u64 num_bytes, unsigned long leaf_offset,
5704                         struct extent_buffer *eb) {
5705
5706         u64 offset = 0;
5707         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5708         char *data;
5709         unsigned long csum_offset;
5710         u32 csum;
5711         u32 csum_expected;
5712         u64 read_len;
5713         u64 data_checked = 0;
5714         u64 tmp;
5715         int ret = 0;
5716         int mirror;
5717         int num_copies;
5718
5719         if (num_bytes % root->sectorsize)
5720                 return -EINVAL;
5721
5722         data = malloc(num_bytes);
5723         if (!data)
5724                 return -ENOMEM;
5725
5726         while (offset < num_bytes) {
5727                 mirror = 0;
5728 again:
5729                 read_len = num_bytes - offset;
5730                 /* read as much space once a time */
5731                 ret = read_extent_data(root, data + offset,
5732                                 bytenr + offset, &read_len, mirror);
5733                 if (ret)
5734                         goto out;
5735                 data_checked = 0;
5736                 /* verify every 4k data's checksum */
5737                 while (data_checked < read_len) {
5738                         csum = ~(u32)0;
5739                         tmp = offset + data_checked;
5740
5741                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5742                                                csum, root->sectorsize);
5743                         btrfs_csum_final(csum, (char *)&csum);
5744
5745                         csum_offset = leaf_offset +
5746                                  tmp / root->sectorsize * csum_size;
5747                         read_extent_buffer(eb, (char *)&csum_expected,
5748                                            csum_offset, csum_size);
5749                         /* try another mirror */
5750                         if (csum != csum_expected) {
5751                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5752                                                 mirror, bytenr + tmp,
5753                                                 csum, csum_expected);
5754                                 num_copies = btrfs_num_copies(
5755                                                 &root->fs_info->mapping_tree,
5756                                                 bytenr, num_bytes);
5757                                 if (mirror < num_copies - 1) {
5758                                         mirror += 1;
5759                                         goto again;
5760                                 }
5761                         }
5762                         data_checked += root->sectorsize;
5763                 }
5764                 offset += read_len;
5765         }
5766 out:
5767         free(data);
5768         return ret;
5769 }
5770
5771 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5772                                u64 num_bytes)
5773 {
5774         struct btrfs_path *path;
5775         struct extent_buffer *leaf;
5776         struct btrfs_key key;
5777         int ret;
5778
5779         path = btrfs_alloc_path();
5780         if (!path) {
5781                 fprintf(stderr, "Error allocating path\n");
5782                 return -ENOMEM;
5783         }
5784
5785         key.objectid = bytenr;
5786         key.type = BTRFS_EXTENT_ITEM_KEY;
5787         key.offset = (u64)-1;
5788
5789 again:
5790         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5791                                 0, 0);
5792         if (ret < 0) {
5793                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5794                 btrfs_free_path(path);
5795                 return ret;
5796         } else if (ret) {
5797                 if (path->slots[0] > 0) {
5798                         path->slots[0]--;
5799                 } else {
5800                         ret = btrfs_prev_leaf(root, path);
5801                         if (ret < 0) {
5802                                 goto out;
5803                         } else if (ret > 0) {
5804                                 ret = 0;
5805                                 goto out;
5806                         }
5807                 }
5808         }
5809
5810         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5811
5812         /*
5813          * Block group items come before extent items if they have the same
5814          * bytenr, so walk back one more just in case.  Dear future traveller,
5815          * first congrats on mastering time travel.  Now if it's not too much
5816          * trouble could you go back to 2006 and tell Chris to make the
5817          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5818          * EXTENT_ITEM_KEY please?
5819          */
5820         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5821                 if (path->slots[0] > 0) {
5822                         path->slots[0]--;
5823                 } else {
5824                         ret = btrfs_prev_leaf(root, path);
5825                         if (ret < 0) {
5826                                 goto out;
5827                         } else if (ret > 0) {
5828                                 ret = 0;
5829                                 goto out;
5830                         }
5831                 }
5832                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5833         }
5834
5835         while (num_bytes) {
5836                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5837                         ret = btrfs_next_leaf(root, path);
5838                         if (ret < 0) {
5839                                 fprintf(stderr, "Error going to next leaf "
5840                                         "%d\n", ret);
5841                                 btrfs_free_path(path);
5842                                 return ret;
5843                         } else if (ret) {
5844                                 break;
5845                         }
5846                 }
5847                 leaf = path->nodes[0];
5848                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5849                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5850                         path->slots[0]++;
5851                         continue;
5852                 }
5853                 if (key.objectid + key.offset < bytenr) {
5854                         path->slots[0]++;
5855                         continue;
5856                 }
5857                 if (key.objectid > bytenr + num_bytes)
5858                         break;
5859
5860                 if (key.objectid == bytenr) {
5861                         if (key.offset >= num_bytes) {
5862                                 num_bytes = 0;
5863                                 break;
5864                         }
5865                         num_bytes -= key.offset;
5866                         bytenr += key.offset;
5867                 } else if (key.objectid < bytenr) {
5868                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5869                                 num_bytes = 0;
5870                                 break;
5871                         }
5872                         num_bytes = (bytenr + num_bytes) -
5873                                 (key.objectid + key.offset);
5874                         bytenr = key.objectid + key.offset;
5875                 } else {
5876                         if (key.objectid + key.offset < bytenr + num_bytes) {
5877                                 u64 new_start = key.objectid + key.offset;
5878                                 u64 new_bytes = bytenr + num_bytes - new_start;
5879
5880                                 /*
5881                                  * Weird case, the extent is in the middle of
5882                                  * our range, we'll have to search one side
5883                                  * and then the other.  Not sure if this happens
5884                                  * in real life, but no harm in coding it up
5885                                  * anyway just in case.
5886                                  */
5887                                 btrfs_release_path(path);
5888                                 ret = check_extent_exists(root, new_start,
5889                                                           new_bytes);
5890                                 if (ret) {
5891                                         fprintf(stderr, "Right section didn't "
5892                                                 "have a record\n");
5893                                         break;
5894                                 }
5895                                 num_bytes = key.objectid - bytenr;
5896                                 goto again;
5897                         }
5898                         num_bytes = key.objectid - bytenr;
5899                 }
5900                 path->slots[0]++;
5901         }
5902         ret = 0;
5903
5904 out:
5905         if (num_bytes && !ret) {
5906                 fprintf(stderr, "There are no extents for csum range "
5907                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5908                 ret = 1;
5909         }
5910
5911         btrfs_free_path(path);
5912         return ret;
5913 }
5914
5915 static int check_csums(struct btrfs_root *root)
5916 {
5917         struct btrfs_path *path;
5918         struct extent_buffer *leaf;
5919         struct btrfs_key key;
5920         u64 offset = 0, num_bytes = 0;
5921         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5922         int errors = 0;
5923         int ret;
5924         u64 data_len;
5925         unsigned long leaf_offset;
5926
5927         root = root->fs_info->csum_root;
5928         if (!extent_buffer_uptodate(root->node)) {
5929                 fprintf(stderr, "No valid csum tree found\n");
5930                 return -ENOENT;
5931         }
5932
5933         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5934         key.type = BTRFS_EXTENT_CSUM_KEY;
5935         key.offset = 0;
5936
5937         path = btrfs_alloc_path();
5938         if (!path)
5939                 return -ENOMEM;
5940
5941         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5942         if (ret < 0) {
5943                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5944                 btrfs_free_path(path);
5945                 return ret;
5946         }
5947
5948         if (ret > 0 && path->slots[0])
5949                 path->slots[0]--;
5950         ret = 0;
5951
5952         while (1) {
5953                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5954                         ret = btrfs_next_leaf(root, path);
5955                         if (ret < 0) {
5956                                 fprintf(stderr, "Error going to next leaf "
5957                                         "%d\n", ret);
5958                                 break;
5959                         }
5960                         if (ret)
5961                                 break;
5962                 }
5963                 leaf = path->nodes[0];
5964
5965                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5966                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5967                         path->slots[0]++;
5968                         continue;
5969                 }
5970
5971                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5972                               csum_size) * root->sectorsize;
5973                 if (!check_data_csum)
5974                         goto skip_csum_check;
5975                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5976                 ret = check_extent_csums(root, key.offset, data_len,
5977                                          leaf_offset, leaf);
5978                 if (ret)
5979                         break;
5980 skip_csum_check:
5981                 if (!num_bytes) {
5982                         offset = key.offset;
5983                 } else if (key.offset != offset + num_bytes) {
5984                         ret = check_extent_exists(root, offset, num_bytes);
5985                         if (ret) {
5986                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5987                                         "there is no extent record\n",
5988                                         offset, offset+num_bytes);
5989                                 errors++;
5990                         }
5991                         offset = key.offset;
5992                         num_bytes = 0;
5993                 }
5994                 num_bytes += data_len;
5995                 path->slots[0]++;
5996         }
5997
5998         btrfs_free_path(path);
5999         return errors;
6000 }
6001
6002 static int is_dropped_key(struct btrfs_key *key,
6003                           struct btrfs_key *drop_key) {
6004         if (key->objectid < drop_key->objectid)
6005                 return 1;
6006         else if (key->objectid == drop_key->objectid) {
6007                 if (key->type < drop_key->type)
6008                         return 1;
6009                 else if (key->type == drop_key->type) {
6010                         if (key->offset < drop_key->offset)
6011                                 return 1;
6012                 }
6013         }
6014         return 0;
6015 }
6016
6017 /*
6018  * Here are the rules for FULL_BACKREF.
6019  *
6020  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6021  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6022  *      FULL_BACKREF set.
6023  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6024  *    if it happened after the relocation occurred since we'll have dropped the
6025  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6026  *    have no real way to know for sure.
6027  *
6028  * We process the blocks one root at a time, and we start from the lowest root
6029  * objectid and go to the highest.  So we can just lookup the owner backref for
6030  * the record and if we don't find it then we know it doesn't exist and we have
6031  * a FULL BACKREF.
6032  *
6033  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6034  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6035  * be set or not and then we can check later once we've gathered all the refs.
6036  */
6037 static int calc_extent_flag(struct btrfs_root *root,
6038                            struct cache_tree *extent_cache,
6039                            struct extent_buffer *buf,
6040                            struct root_item_record *ri,
6041                            u64 *flags)
6042 {
6043         struct extent_record *rec;
6044         struct cache_extent *cache;
6045         struct tree_backref *tback;
6046         u64 owner = 0;
6047
6048         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6049         /* we have added this extent before */
6050         BUG_ON(!cache);
6051         rec = container_of(cache, struct extent_record, cache);
6052
6053         /*
6054          * Except file/reloc tree, we can not have
6055          * FULL BACKREF MODE
6056          */
6057         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6058                 goto normal;
6059         /*
6060          * root node
6061          */
6062         if (buf->start == ri->bytenr)
6063                 goto normal;
6064
6065         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6066                 goto full_backref;
6067
6068         owner = btrfs_header_owner(buf);
6069         if (owner == ri->objectid)
6070                 goto normal;
6071
6072         tback = find_tree_backref(rec, 0, owner);
6073         if (!tback)
6074                 goto full_backref;
6075 normal:
6076         *flags = 0;
6077         if (rec->flag_block_full_backref != FLAG_UNSET &&
6078             rec->flag_block_full_backref != 0)
6079                 rec->bad_full_backref = 1;
6080         return 0;
6081 full_backref:
6082         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6083         if (rec->flag_block_full_backref != FLAG_UNSET &&
6084             rec->flag_block_full_backref != 1)
6085                 rec->bad_full_backref = 1;
6086         return 0;
6087 }
6088
6089 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6090 {
6091         fprintf(stderr, "Invalid key type(");
6092         print_key_type(stderr, 0, key_type);
6093         fprintf(stderr, ") found in root(");
6094         print_objectid(stderr, rootid, 0);
6095         fprintf(stderr, ")\n");
6096 }
6097
6098 /*
6099  * Check if the key is valid with its extent buffer.
6100  *
6101  * This is a early check in case invalid key exists in a extent buffer
6102  * This is not comprehensive yet, but should prevent wrong key/item passed
6103  * further
6104  */
6105 static int check_type_with_root(u64 rootid, u8 key_type)
6106 {
6107         switch (key_type) {
6108         /* Only valid in chunk tree */
6109         case BTRFS_DEV_ITEM_KEY:
6110         case BTRFS_CHUNK_ITEM_KEY:
6111                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6112                         goto err;
6113                 break;
6114         /* valid in csum and log tree */
6115         case BTRFS_CSUM_TREE_OBJECTID:
6116                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6117                       is_fstree(rootid)))
6118                         goto err;
6119                 break;
6120         case BTRFS_EXTENT_ITEM_KEY:
6121         case BTRFS_METADATA_ITEM_KEY:
6122         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6123                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6124                         goto err;
6125                 break;
6126         case BTRFS_ROOT_ITEM_KEY:
6127                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6128                         goto err;
6129                 break;
6130         case BTRFS_DEV_EXTENT_KEY:
6131                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6132                         goto err;
6133                 break;
6134         }
6135         return 0;
6136 err:
6137         report_mismatch_key_root(key_type, rootid);
6138         return -EINVAL;
6139 }
6140
6141 static int run_next_block(struct btrfs_root *root,
6142                           struct block_info *bits,
6143                           int bits_nr,
6144                           u64 *last,
6145                           struct cache_tree *pending,
6146                           struct cache_tree *seen,
6147                           struct cache_tree *reada,
6148                           struct cache_tree *nodes,
6149                           struct cache_tree *extent_cache,
6150                           struct cache_tree *chunk_cache,
6151                           struct rb_root *dev_cache,
6152                           struct block_group_tree *block_group_cache,
6153                           struct device_extent_tree *dev_extent_cache,
6154                           struct root_item_record *ri)
6155 {
6156         struct extent_buffer *buf;
6157         struct extent_record *rec = NULL;
6158         u64 bytenr;
6159         u32 size;
6160         u64 parent;
6161         u64 owner;
6162         u64 flags;
6163         u64 ptr;
6164         u64 gen = 0;
6165         int ret = 0;
6166         int i;
6167         int nritems;
6168         struct btrfs_key key;
6169         struct cache_extent *cache;
6170         int reada_bits;
6171
6172         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6173                                     bits_nr, &reada_bits);
6174         if (nritems == 0)
6175                 return 1;
6176
6177         if (!reada_bits) {
6178                 for(i = 0; i < nritems; i++) {
6179                         ret = add_cache_extent(reada, bits[i].start,
6180                                                bits[i].size);
6181                         if (ret == -EEXIST)
6182                                 continue;
6183
6184                         /* fixme, get the parent transid */
6185                         readahead_tree_block(root, bits[i].start,
6186                                              bits[i].size, 0);
6187                 }
6188         }
6189         *last = bits[0].start;
6190         bytenr = bits[0].start;
6191         size = bits[0].size;
6192
6193         cache = lookup_cache_extent(pending, bytenr, size);
6194         if (cache) {
6195                 remove_cache_extent(pending, cache);
6196                 free(cache);
6197         }
6198         cache = lookup_cache_extent(reada, bytenr, size);
6199         if (cache) {
6200                 remove_cache_extent(reada, cache);
6201                 free(cache);
6202         }
6203         cache = lookup_cache_extent(nodes, bytenr, size);
6204         if (cache) {
6205                 remove_cache_extent(nodes, cache);
6206                 free(cache);
6207         }
6208         cache = lookup_cache_extent(extent_cache, bytenr, size);
6209         if (cache) {
6210                 rec = container_of(cache, struct extent_record, cache);
6211                 gen = rec->parent_generation;
6212         }
6213
6214         /* fixme, get the real parent transid */
6215         buf = read_tree_block(root, bytenr, size, gen);
6216         if (!extent_buffer_uptodate(buf)) {
6217                 record_bad_block_io(root->fs_info,
6218                                     extent_cache, bytenr, size);
6219                 goto out;
6220         }
6221
6222         nritems = btrfs_header_nritems(buf);
6223
6224         flags = 0;
6225         if (!init_extent_tree) {
6226                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6227                                        btrfs_header_level(buf), 1, NULL,
6228                                        &flags);
6229                 if (ret < 0) {
6230                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6231                         if (ret < 0) {
6232                                 fprintf(stderr, "Couldn't calc extent flags\n");
6233                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6234                         }
6235                 }
6236         } else {
6237                 flags = 0;
6238                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6239                 if (ret < 0) {
6240                         fprintf(stderr, "Couldn't calc extent flags\n");
6241                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6242                 }
6243         }
6244
6245         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6246                 if (ri != NULL &&
6247                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6248                     ri->objectid == btrfs_header_owner(buf)) {
6249                         /*
6250                          * Ok we got to this block from it's original owner and
6251                          * we have FULL_BACKREF set.  Relocation can leave
6252                          * converted blocks over so this is altogether possible,
6253                          * however it's not possible if the generation > the
6254                          * last snapshot, so check for this case.
6255                          */
6256                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6257                             btrfs_header_generation(buf) > ri->last_snapshot) {
6258                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6259                                 rec->bad_full_backref = 1;
6260                         }
6261                 }
6262         } else {
6263                 if (ri != NULL &&
6264                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6265                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6266                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6267                         rec->bad_full_backref = 1;
6268                 }
6269         }
6270
6271         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6272                 rec->flag_block_full_backref = 1;
6273                 parent = bytenr;
6274                 owner = 0;
6275         } else {
6276                 rec->flag_block_full_backref = 0;
6277                 parent = 0;
6278                 owner = btrfs_header_owner(buf);
6279         }
6280
6281         ret = check_block(root, extent_cache, buf, flags);
6282         if (ret)
6283                 goto out;
6284
6285         if (btrfs_is_leaf(buf)) {
6286                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6287                 for (i = 0; i < nritems; i++) {
6288                         struct btrfs_file_extent_item *fi;
6289                         btrfs_item_key_to_cpu(buf, &key, i);
6290                         /*
6291                          * Check key type against the leaf owner.
6292                          * Could filter quite a lot of early error if
6293                          * owner is correct
6294                          */
6295                         if (check_type_with_root(btrfs_header_owner(buf),
6296                                                  key.type)) {
6297                                 fprintf(stderr, "ignoring invalid key\n");
6298                                 continue;
6299                         }
6300                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6301                                 process_extent_item(root, extent_cache, buf,
6302                                                     i);
6303                                 continue;
6304                         }
6305                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6306                                 process_extent_item(root, extent_cache, buf,
6307                                                     i);
6308                                 continue;
6309                         }
6310                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6311                                 total_csum_bytes +=
6312                                         btrfs_item_size_nr(buf, i);
6313                                 continue;
6314                         }
6315                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6316                                 process_chunk_item(chunk_cache, &key, buf, i);
6317                                 continue;
6318                         }
6319                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6320                                 process_device_item(dev_cache, &key, buf, i);
6321                                 continue;
6322                         }
6323                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6324                                 process_block_group_item(block_group_cache,
6325                                         &key, buf, i);
6326                                 continue;
6327                         }
6328                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6329                                 process_device_extent_item(dev_extent_cache,
6330                                         &key, buf, i);
6331                                 continue;
6332
6333                         }
6334                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6335 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6336                                 process_extent_ref_v0(extent_cache, buf, i);
6337 #else
6338                                 BUG();
6339 #endif
6340                                 continue;
6341                         }
6342
6343                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6344                                 ret = add_tree_backref(extent_cache,
6345                                                 key.objectid, 0, key.offset, 0);
6346                                 if (ret < 0)
6347                                         error("add_tree_backref failed: %s",
6348                                               strerror(-ret));
6349                                 continue;
6350                         }
6351                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6352                                 ret = add_tree_backref(extent_cache,
6353                                                 key.objectid, key.offset, 0, 0);
6354                                 if (ret < 0)
6355                                         error("add_tree_backref failed: %s",
6356                                               strerror(-ret));
6357                                 continue;
6358                         }
6359                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6360                                 struct btrfs_extent_data_ref *ref;
6361                                 ref = btrfs_item_ptr(buf, i,
6362                                                 struct btrfs_extent_data_ref);
6363                                 add_data_backref(extent_cache,
6364                                         key.objectid, 0,
6365                                         btrfs_extent_data_ref_root(buf, ref),
6366                                         btrfs_extent_data_ref_objectid(buf,
6367                                                                        ref),
6368                                         btrfs_extent_data_ref_offset(buf, ref),
6369                                         btrfs_extent_data_ref_count(buf, ref),
6370                                         0, root->sectorsize);
6371                                 continue;
6372                         }
6373                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6374                                 struct btrfs_shared_data_ref *ref;
6375                                 ref = btrfs_item_ptr(buf, i,
6376                                                 struct btrfs_shared_data_ref);
6377                                 add_data_backref(extent_cache,
6378                                         key.objectid, key.offset, 0, 0, 0,
6379                                         btrfs_shared_data_ref_count(buf, ref),
6380                                         0, root->sectorsize);
6381                                 continue;
6382                         }
6383                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6384                                 struct bad_item *bad;
6385
6386                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6387                                         continue;
6388                                 if (!owner)
6389                                         continue;
6390                                 bad = malloc(sizeof(struct bad_item));
6391                                 if (!bad)
6392                                         continue;
6393                                 INIT_LIST_HEAD(&bad->list);
6394                                 memcpy(&bad->key, &key,
6395                                        sizeof(struct btrfs_key));
6396                                 bad->root_id = owner;
6397                                 list_add_tail(&bad->list, &delete_items);
6398                                 continue;
6399                         }
6400                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6401                                 continue;
6402                         fi = btrfs_item_ptr(buf, i,
6403                                             struct btrfs_file_extent_item);
6404                         if (btrfs_file_extent_type(buf, fi) ==
6405                             BTRFS_FILE_EXTENT_INLINE)
6406                                 continue;
6407                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6408                                 continue;
6409
6410                         data_bytes_allocated +=
6411                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6412                         if (data_bytes_allocated < root->sectorsize) {
6413                                 abort();
6414                         }
6415                         data_bytes_referenced +=
6416                                 btrfs_file_extent_num_bytes(buf, fi);
6417                         add_data_backref(extent_cache,
6418                                 btrfs_file_extent_disk_bytenr(buf, fi),
6419                                 parent, owner, key.objectid, key.offset -
6420                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6421                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6422                 }
6423         } else {
6424                 int level;
6425                 struct btrfs_key first_key;
6426
6427                 first_key.objectid = 0;
6428
6429                 if (nritems > 0)
6430                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6431                 level = btrfs_header_level(buf);
6432                 for (i = 0; i < nritems; i++) {
6433                         struct extent_record tmpl;
6434
6435                         ptr = btrfs_node_blockptr(buf, i);
6436                         size = root->nodesize;
6437                         btrfs_node_key_to_cpu(buf, &key, i);
6438                         if (ri != NULL) {
6439                                 if ((level == ri->drop_level)
6440                                     && is_dropped_key(&key, &ri->drop_key)) {
6441                                         continue;
6442                                 }
6443                         }
6444
6445                         memset(&tmpl, 0, sizeof(tmpl));
6446                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6447                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6448                         tmpl.start = ptr;
6449                         tmpl.nr = size;
6450                         tmpl.refs = 1;
6451                         tmpl.metadata = 1;
6452                         tmpl.max_size = size;
6453                         ret = add_extent_rec(extent_cache, &tmpl);
6454                         if (ret < 0)
6455                                 goto out;
6456
6457                         ret = add_tree_backref(extent_cache, ptr, parent,
6458                                         owner, 1);
6459                         if (ret < 0) {
6460                                 error("add_tree_backref failed: %s",
6461                                       strerror(-ret));
6462                                 continue;
6463                         }
6464
6465                         if (level > 1) {
6466                                 add_pending(nodes, seen, ptr, size);
6467                         } else {
6468                                 add_pending(pending, seen, ptr, size);
6469                         }
6470                 }
6471                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6472                                       nritems) * sizeof(struct btrfs_key_ptr);
6473         }
6474         total_btree_bytes += buf->len;
6475         if (fs_root_objectid(btrfs_header_owner(buf)))
6476                 total_fs_tree_bytes += buf->len;
6477         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6478                 total_extent_tree_bytes += buf->len;
6479         if (!found_old_backref &&
6480             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6481             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6482             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6483                 found_old_backref = 1;
6484 out:
6485         free_extent_buffer(buf);
6486         return ret;
6487 }
6488
6489 static int add_root_to_pending(struct extent_buffer *buf,
6490                                struct cache_tree *extent_cache,
6491                                struct cache_tree *pending,
6492                                struct cache_tree *seen,
6493                                struct cache_tree *nodes,
6494                                u64 objectid)
6495 {
6496         struct extent_record tmpl;
6497         int ret;
6498
6499         if (btrfs_header_level(buf) > 0)
6500                 add_pending(nodes, seen, buf->start, buf->len);
6501         else
6502                 add_pending(pending, seen, buf->start, buf->len);
6503
6504         memset(&tmpl, 0, sizeof(tmpl));
6505         tmpl.start = buf->start;
6506         tmpl.nr = buf->len;
6507         tmpl.is_root = 1;
6508         tmpl.refs = 1;
6509         tmpl.metadata = 1;
6510         tmpl.max_size = buf->len;
6511         add_extent_rec(extent_cache, &tmpl);
6512
6513         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6514             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6515                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6516                                 0, 1);
6517         else
6518                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6519                                 1);
6520         return ret;
6521 }
6522
6523 /* as we fix the tree, we might be deleting blocks that
6524  * we're tracking for repair.  This hook makes sure we
6525  * remove any backrefs for blocks as we are fixing them.
6526  */
6527 static int free_extent_hook(struct btrfs_trans_handle *trans,
6528                             struct btrfs_root *root,
6529                             u64 bytenr, u64 num_bytes, u64 parent,
6530                             u64 root_objectid, u64 owner, u64 offset,
6531                             int refs_to_drop)
6532 {
6533         struct extent_record *rec;
6534         struct cache_extent *cache;
6535         int is_data;
6536         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6537
6538         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6539         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6540         if (!cache)
6541                 return 0;
6542
6543         rec = container_of(cache, struct extent_record, cache);
6544         if (is_data) {
6545                 struct data_backref *back;
6546                 back = find_data_backref(rec, parent, root_objectid, owner,
6547                                          offset, 1, bytenr, num_bytes);
6548                 if (!back)
6549                         goto out;
6550                 if (back->node.found_ref) {
6551                         back->found_ref -= refs_to_drop;
6552                         if (rec->refs)
6553                                 rec->refs -= refs_to_drop;
6554                 }
6555                 if (back->node.found_extent_tree) {
6556                         back->num_refs -= refs_to_drop;
6557                         if (rec->extent_item_refs)
6558                                 rec->extent_item_refs -= refs_to_drop;
6559                 }
6560                 if (back->found_ref == 0)
6561                         back->node.found_ref = 0;
6562                 if (back->num_refs == 0)
6563                         back->node.found_extent_tree = 0;
6564
6565                 if (!back->node.found_extent_tree && back->node.found_ref) {
6566                         list_del(&back->node.list);
6567                         free(back);
6568                 }
6569         } else {
6570                 struct tree_backref *back;
6571                 back = find_tree_backref(rec, parent, root_objectid);
6572                 if (!back)
6573                         goto out;
6574                 if (back->node.found_ref) {
6575                         if (rec->refs)
6576                                 rec->refs--;
6577                         back->node.found_ref = 0;
6578                 }
6579                 if (back->node.found_extent_tree) {
6580                         if (rec->extent_item_refs)
6581                                 rec->extent_item_refs--;
6582                         back->node.found_extent_tree = 0;
6583                 }
6584                 if (!back->node.found_extent_tree && back->node.found_ref) {
6585                         list_del(&back->node.list);
6586                         free(back);
6587                 }
6588         }
6589         maybe_free_extent_rec(extent_cache, rec);
6590 out:
6591         return 0;
6592 }
6593
6594 static int delete_extent_records(struct btrfs_trans_handle *trans,
6595                                  struct btrfs_root *root,
6596                                  struct btrfs_path *path,
6597                                  u64 bytenr, u64 new_len)
6598 {
6599         struct btrfs_key key;
6600         struct btrfs_key found_key;
6601         struct extent_buffer *leaf;
6602         int ret;
6603         int slot;
6604
6605
6606         key.objectid = bytenr;
6607         key.type = (u8)-1;
6608         key.offset = (u64)-1;
6609
6610         while(1) {
6611                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6612                                         &key, path, 0, 1);
6613                 if (ret < 0)
6614                         break;
6615
6616                 if (ret > 0) {
6617                         ret = 0;
6618                         if (path->slots[0] == 0)
6619                                 break;
6620                         path->slots[0]--;
6621                 }
6622                 ret = 0;
6623
6624                 leaf = path->nodes[0];
6625                 slot = path->slots[0];
6626
6627                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6628                 if (found_key.objectid != bytenr)
6629                         break;
6630
6631                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6632                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6633                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6634                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6635                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6636                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6637                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6638                         btrfs_release_path(path);
6639                         if (found_key.type == 0) {
6640                                 if (found_key.offset == 0)
6641                                         break;
6642                                 key.offset = found_key.offset - 1;
6643                                 key.type = found_key.type;
6644                         }
6645                         key.type = found_key.type - 1;
6646                         key.offset = (u64)-1;
6647                         continue;
6648                 }
6649
6650                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6651                         found_key.objectid, found_key.type, found_key.offset);
6652
6653                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6654                 if (ret)
6655                         break;
6656                 btrfs_release_path(path);
6657
6658                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6659                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6660                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6661                                 found_key.offset : root->nodesize;
6662
6663                         ret = btrfs_update_block_group(trans, root, bytenr,
6664                                                        bytes, 0, 0);
6665                         if (ret)
6666                                 break;
6667                 }
6668         }
6669
6670         btrfs_release_path(path);
6671         return ret;
6672 }
6673
6674 /*
6675  * for a single backref, this will allocate a new extent
6676  * and add the backref to it.
6677  */
6678 static int record_extent(struct btrfs_trans_handle *trans,
6679                          struct btrfs_fs_info *info,
6680                          struct btrfs_path *path,
6681                          struct extent_record *rec,
6682                          struct extent_backref *back,
6683                          int allocated, u64 flags)
6684 {
6685         int ret;
6686         struct btrfs_root *extent_root = info->extent_root;
6687         struct extent_buffer *leaf;
6688         struct btrfs_key ins_key;
6689         struct btrfs_extent_item *ei;
6690         struct tree_backref *tback;
6691         struct data_backref *dback;
6692         struct btrfs_tree_block_info *bi;
6693
6694         if (!back->is_data)
6695                 rec->max_size = max_t(u64, rec->max_size,
6696                                     info->extent_root->nodesize);
6697
6698         if (!allocated) {
6699                 u32 item_size = sizeof(*ei);
6700
6701                 if (!back->is_data)
6702                         item_size += sizeof(*bi);
6703
6704                 ins_key.objectid = rec->start;
6705                 ins_key.offset = rec->max_size;
6706                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6707
6708                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6709                                         &ins_key, item_size);
6710                 if (ret)
6711                         goto fail;
6712
6713                 leaf = path->nodes[0];
6714                 ei = btrfs_item_ptr(leaf, path->slots[0],
6715                                     struct btrfs_extent_item);
6716
6717                 btrfs_set_extent_refs(leaf, ei, 0);
6718                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6719
6720                 if (back->is_data) {
6721                         btrfs_set_extent_flags(leaf, ei,
6722                                                BTRFS_EXTENT_FLAG_DATA);
6723                 } else {
6724                         struct btrfs_disk_key copy_key;;
6725
6726                         tback = to_tree_backref(back);
6727                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6728                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6729                                              sizeof(*bi));
6730
6731                         btrfs_set_disk_key_objectid(&copy_key,
6732                                                     rec->info_objectid);
6733                         btrfs_set_disk_key_type(&copy_key, 0);
6734                         btrfs_set_disk_key_offset(&copy_key, 0);
6735
6736                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6737                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6738
6739                         btrfs_set_extent_flags(leaf, ei,
6740                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6741                 }
6742
6743                 btrfs_mark_buffer_dirty(leaf);
6744                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6745                                                rec->max_size, 1, 0);
6746                 if (ret)
6747                         goto fail;
6748                 btrfs_release_path(path);
6749         }
6750
6751         if (back->is_data) {
6752                 u64 parent;
6753                 int i;
6754
6755                 dback = to_data_backref(back);
6756                 if (back->full_backref)
6757                         parent = dback->parent;
6758                 else
6759                         parent = 0;
6760
6761                 for (i = 0; i < dback->found_ref; i++) {
6762                         /* if parent != 0, we're doing a full backref
6763                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6764                          * just makes the backref allocator create a data
6765                          * backref
6766                          */
6767                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6768                                                    rec->start, rec->max_size,
6769                                                    parent,
6770                                                    dback->root,
6771                                                    parent ?
6772                                                    BTRFS_FIRST_FREE_OBJECTID :
6773                                                    dback->owner,
6774                                                    dback->offset);
6775                         if (ret)
6776                                 break;
6777                 }
6778                 fprintf(stderr, "adding new data backref"
6779                                 " on %llu %s %llu owner %llu"
6780                                 " offset %llu found %d\n",
6781                                 (unsigned long long)rec->start,
6782                                 back->full_backref ?
6783                                 "parent" : "root",
6784                                 back->full_backref ?
6785                                 (unsigned long long)parent :
6786                                 (unsigned long long)dback->root,
6787                                 (unsigned long long)dback->owner,
6788                                 (unsigned long long)dback->offset,
6789                                 dback->found_ref);
6790         } else {
6791                 u64 parent;
6792
6793                 tback = to_tree_backref(back);
6794                 if (back->full_backref)
6795                         parent = tback->parent;
6796                 else
6797                         parent = 0;
6798
6799                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6800                                            rec->start, rec->max_size,
6801                                            parent, tback->root, 0, 0);
6802                 fprintf(stderr, "adding new tree backref on "
6803                         "start %llu len %llu parent %llu root %llu\n",
6804                         rec->start, rec->max_size, parent, tback->root);
6805         }
6806 fail:
6807         btrfs_release_path(path);
6808         return ret;
6809 }
6810
6811 static struct extent_entry *find_entry(struct list_head *entries,
6812                                        u64 bytenr, u64 bytes)
6813 {
6814         struct extent_entry *entry = NULL;
6815
6816         list_for_each_entry(entry, entries, list) {
6817                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6818                         return entry;
6819         }
6820
6821         return NULL;
6822 }
6823
6824 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6825 {
6826         struct extent_entry *entry, *best = NULL, *prev = NULL;
6827
6828         list_for_each_entry(entry, entries, list) {
6829                 if (!prev) {
6830                         prev = entry;
6831                         continue;
6832                 }
6833
6834                 /*
6835                  * If there are as many broken entries as entries then we know
6836                  * not to trust this particular entry.
6837                  */
6838                 if (entry->broken == entry->count)
6839                         continue;
6840
6841                 /*
6842                  * If our current entry == best then we can't be sure our best
6843                  * is really the best, so we need to keep searching.
6844                  */
6845                 if (best && best->count == entry->count) {
6846                         prev = entry;
6847                         best = NULL;
6848                         continue;
6849                 }
6850
6851                 /* Prev == entry, not good enough, have to keep searching */
6852                 if (!prev->broken && prev->count == entry->count)
6853                         continue;
6854
6855                 if (!best)
6856                         best = (prev->count > entry->count) ? prev : entry;
6857                 else if (best->count < entry->count)
6858                         best = entry;
6859                 prev = entry;
6860         }
6861
6862         return best;
6863 }
6864
6865 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6866                       struct data_backref *dback, struct extent_entry *entry)
6867 {
6868         struct btrfs_trans_handle *trans;
6869         struct btrfs_root *root;
6870         struct btrfs_file_extent_item *fi;
6871         struct extent_buffer *leaf;
6872         struct btrfs_key key;
6873         u64 bytenr, bytes;
6874         int ret, err;
6875
6876         key.objectid = dback->root;
6877         key.type = BTRFS_ROOT_ITEM_KEY;
6878         key.offset = (u64)-1;
6879         root = btrfs_read_fs_root(info, &key);
6880         if (IS_ERR(root)) {
6881                 fprintf(stderr, "Couldn't find root for our ref\n");
6882                 return -EINVAL;
6883         }
6884
6885         /*
6886          * The backref points to the original offset of the extent if it was
6887          * split, so we need to search down to the offset we have and then walk
6888          * forward until we find the backref we're looking for.
6889          */
6890         key.objectid = dback->owner;
6891         key.type = BTRFS_EXTENT_DATA_KEY;
6892         key.offset = dback->offset;
6893         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6894         if (ret < 0) {
6895                 fprintf(stderr, "Error looking up ref %d\n", ret);
6896                 return ret;
6897         }
6898
6899         while (1) {
6900                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6901                         ret = btrfs_next_leaf(root, path);
6902                         if (ret) {
6903                                 fprintf(stderr, "Couldn't find our ref, next\n");
6904                                 return -EINVAL;
6905                         }
6906                 }
6907                 leaf = path->nodes[0];
6908                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6909                 if (key.objectid != dback->owner ||
6910                     key.type != BTRFS_EXTENT_DATA_KEY) {
6911                         fprintf(stderr, "Couldn't find our ref, search\n");
6912                         return -EINVAL;
6913                 }
6914                 fi = btrfs_item_ptr(leaf, path->slots[0],
6915                                     struct btrfs_file_extent_item);
6916                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6917                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6918
6919                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6920                         break;
6921                 path->slots[0]++;
6922         }
6923
6924         btrfs_release_path(path);
6925
6926         trans = btrfs_start_transaction(root, 1);
6927         if (IS_ERR(trans))
6928                 return PTR_ERR(trans);
6929
6930         /*
6931          * Ok we have the key of the file extent we want to fix, now we can cow
6932          * down to the thing and fix it.
6933          */
6934         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6935         if (ret < 0) {
6936                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6937                         key.objectid, key.type, key.offset, ret);
6938                 goto out;
6939         }
6940         if (ret > 0) {
6941                 fprintf(stderr, "Well that's odd, we just found this key "
6942                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6943                         key.offset);
6944                 ret = -EINVAL;
6945                 goto out;
6946         }
6947         leaf = path->nodes[0];
6948         fi = btrfs_item_ptr(leaf, path->slots[0],
6949                             struct btrfs_file_extent_item);
6950
6951         if (btrfs_file_extent_compression(leaf, fi) &&
6952             dback->disk_bytenr != entry->bytenr) {
6953                 fprintf(stderr, "Ref doesn't match the record start and is "
6954                         "compressed, please take a btrfs-image of this file "
6955                         "system and send it to a btrfs developer so they can "
6956                         "complete this functionality for bytenr %Lu\n",
6957                         dback->disk_bytenr);
6958                 ret = -EINVAL;
6959                 goto out;
6960         }
6961
6962         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6963                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6964         } else if (dback->disk_bytenr > entry->bytenr) {
6965                 u64 off_diff, offset;
6966
6967                 off_diff = dback->disk_bytenr - entry->bytenr;
6968                 offset = btrfs_file_extent_offset(leaf, fi);
6969                 if (dback->disk_bytenr + offset +
6970                     btrfs_file_extent_num_bytes(leaf, fi) >
6971                     entry->bytenr + entry->bytes) {
6972                         fprintf(stderr, "Ref is past the entry end, please "
6973                                 "take a btrfs-image of this file system and "
6974                                 "send it to a btrfs developer, ref %Lu\n",
6975                                 dback->disk_bytenr);
6976                         ret = -EINVAL;
6977                         goto out;
6978                 }
6979                 offset += off_diff;
6980                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6981                 btrfs_set_file_extent_offset(leaf, fi, offset);
6982         } else if (dback->disk_bytenr < entry->bytenr) {
6983                 u64 offset;
6984
6985                 offset = btrfs_file_extent_offset(leaf, fi);
6986                 if (dback->disk_bytenr + offset < entry->bytenr) {
6987                         fprintf(stderr, "Ref is before the entry start, please"
6988                                 " take a btrfs-image of this file system and "
6989                                 "send it to a btrfs developer, ref %Lu\n",
6990                                 dback->disk_bytenr);
6991                         ret = -EINVAL;
6992                         goto out;
6993                 }
6994
6995                 offset += dback->disk_bytenr;
6996                 offset -= entry->bytenr;
6997                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6998                 btrfs_set_file_extent_offset(leaf, fi, offset);
6999         }
7000
7001         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7002
7003         /*
7004          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7005          * only do this if we aren't using compression, otherwise it's a
7006          * trickier case.
7007          */
7008         if (!btrfs_file_extent_compression(leaf, fi))
7009                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7010         else
7011                 printf("ram bytes may be wrong?\n");
7012         btrfs_mark_buffer_dirty(leaf);
7013 out:
7014         err = btrfs_commit_transaction(trans, root);
7015         btrfs_release_path(path);
7016         return ret ? ret : err;
7017 }
7018
7019 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7020                            struct extent_record *rec)
7021 {
7022         struct extent_backref *back;
7023         struct data_backref *dback;
7024         struct extent_entry *entry, *best = NULL;
7025         LIST_HEAD(entries);
7026         int nr_entries = 0;
7027         int broken_entries = 0;
7028         int ret = 0;
7029         short mismatch = 0;
7030
7031         /*
7032          * Metadata is easy and the backrefs should always agree on bytenr and
7033          * size, if not we've got bigger issues.
7034          */
7035         if (rec->metadata)
7036                 return 0;
7037
7038         list_for_each_entry(back, &rec->backrefs, list) {
7039                 if (back->full_backref || !back->is_data)
7040                         continue;
7041
7042                 dback = to_data_backref(back);
7043
7044                 /*
7045                  * We only pay attention to backrefs that we found a real
7046                  * backref for.
7047                  */
7048                 if (dback->found_ref == 0)
7049                         continue;
7050
7051                 /*
7052                  * For now we only catch when the bytes don't match, not the
7053                  * bytenr.  We can easily do this at the same time, but I want
7054                  * to have a fs image to test on before we just add repair
7055                  * functionality willy-nilly so we know we won't screw up the
7056                  * repair.
7057                  */
7058
7059                 entry = find_entry(&entries, dback->disk_bytenr,
7060                                    dback->bytes);
7061                 if (!entry) {
7062                         entry = malloc(sizeof(struct extent_entry));
7063                         if (!entry) {
7064                                 ret = -ENOMEM;
7065                                 goto out;
7066                         }
7067                         memset(entry, 0, sizeof(*entry));
7068                         entry->bytenr = dback->disk_bytenr;
7069                         entry->bytes = dback->bytes;
7070                         list_add_tail(&entry->list, &entries);
7071                         nr_entries++;
7072                 }
7073
7074                 /*
7075                  * If we only have on entry we may think the entries agree when
7076                  * in reality they don't so we have to do some extra checking.
7077                  */
7078                 if (dback->disk_bytenr != rec->start ||
7079                     dback->bytes != rec->nr || back->broken)
7080                         mismatch = 1;
7081
7082                 if (back->broken) {
7083                         entry->broken++;
7084                         broken_entries++;
7085                 }
7086
7087                 entry->count++;
7088         }
7089
7090         /* Yay all the backrefs agree, carry on good sir */
7091         if (nr_entries <= 1 && !mismatch)
7092                 goto out;
7093
7094         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7095                 "%Lu\n", rec->start);
7096
7097         /*
7098          * First we want to see if the backrefs can agree amongst themselves who
7099          * is right, so figure out which one of the entries has the highest
7100          * count.
7101          */
7102         best = find_most_right_entry(&entries);
7103
7104         /*
7105          * Ok so we may have an even split between what the backrefs think, so
7106          * this is where we use the extent ref to see what it thinks.
7107          */
7108         if (!best) {
7109                 entry = find_entry(&entries, rec->start, rec->nr);
7110                 if (!entry && (!broken_entries || !rec->found_rec)) {
7111                         fprintf(stderr, "Backrefs don't agree with each other "
7112                                 "and extent record doesn't agree with anybody,"
7113                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7114                                 rec->start, rec->nr);
7115                         ret = -EINVAL;
7116                         goto out;
7117                 } else if (!entry) {
7118                         /*
7119                          * Ok our backrefs were broken, we'll assume this is the
7120                          * correct value and add an entry for this range.
7121                          */
7122                         entry = malloc(sizeof(struct extent_entry));
7123                         if (!entry) {
7124                                 ret = -ENOMEM;
7125                                 goto out;
7126                         }
7127                         memset(entry, 0, sizeof(*entry));
7128                         entry->bytenr = rec->start;
7129                         entry->bytes = rec->nr;
7130                         list_add_tail(&entry->list, &entries);
7131                         nr_entries++;
7132                 }
7133                 entry->count++;
7134                 best = find_most_right_entry(&entries);
7135                 if (!best) {
7136                         fprintf(stderr, "Backrefs and extent record evenly "
7137                                 "split on who is right, this is going to "
7138                                 "require user input to fix bytenr %Lu bytes "
7139                                 "%Lu\n", rec->start, rec->nr);
7140                         ret = -EINVAL;
7141                         goto out;
7142                 }
7143         }
7144
7145         /*
7146          * I don't think this can happen currently as we'll abort() if we catch
7147          * this case higher up, but in case somebody removes that we still can't
7148          * deal with it properly here yet, so just bail out of that's the case.
7149          */
7150         if (best->bytenr != rec->start) {
7151                 fprintf(stderr, "Extent start and backref starts don't match, "
7152                         "please use btrfs-image on this file system and send "
7153                         "it to a btrfs developer so they can make fsck fix "
7154                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7155                         rec->start, rec->nr);
7156                 ret = -EINVAL;
7157                 goto out;
7158         }
7159
7160         /*
7161          * Ok great we all agreed on an extent record, let's go find the real
7162          * references and fix up the ones that don't match.
7163          */
7164         list_for_each_entry(back, &rec->backrefs, list) {
7165                 if (back->full_backref || !back->is_data)
7166                         continue;
7167
7168                 dback = to_data_backref(back);
7169
7170                 /*
7171                  * Still ignoring backrefs that don't have a real ref attached
7172                  * to them.
7173                  */
7174                 if (dback->found_ref == 0)
7175                         continue;
7176
7177                 if (dback->bytes == best->bytes &&
7178                     dback->disk_bytenr == best->bytenr)
7179                         continue;
7180
7181                 ret = repair_ref(info, path, dback, best);
7182                 if (ret)
7183                         goto out;
7184         }
7185
7186         /*
7187          * Ok we messed with the actual refs, which means we need to drop our
7188          * entire cache and go back and rescan.  I know this is a huge pain and
7189          * adds a lot of extra work, but it's the only way to be safe.  Once all
7190          * the backrefs agree we may not need to do anything to the extent
7191          * record itself.
7192          */
7193         ret = -EAGAIN;
7194 out:
7195         while (!list_empty(&entries)) {
7196                 entry = list_entry(entries.next, struct extent_entry, list);
7197                 list_del_init(&entry->list);
7198                 free(entry);
7199         }
7200         return ret;
7201 }
7202
7203 static int process_duplicates(struct btrfs_root *root,
7204                               struct cache_tree *extent_cache,
7205                               struct extent_record *rec)
7206 {
7207         struct extent_record *good, *tmp;
7208         struct cache_extent *cache;
7209         int ret;
7210
7211         /*
7212          * If we found a extent record for this extent then return, or if we
7213          * have more than one duplicate we are likely going to need to delete
7214          * something.
7215          */
7216         if (rec->found_rec || rec->num_duplicates > 1)
7217                 return 0;
7218
7219         /* Shouldn't happen but just in case */
7220         BUG_ON(!rec->num_duplicates);
7221
7222         /*
7223          * So this happens if we end up with a backref that doesn't match the
7224          * actual extent entry.  So either the backref is bad or the extent
7225          * entry is bad.  Either way we want to have the extent_record actually
7226          * reflect what we found in the extent_tree, so we need to take the
7227          * duplicate out and use that as the extent_record since the only way we
7228          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7229          */
7230         remove_cache_extent(extent_cache, &rec->cache);
7231
7232         good = to_extent_record(rec->dups.next);
7233         list_del_init(&good->list);
7234         INIT_LIST_HEAD(&good->backrefs);
7235         INIT_LIST_HEAD(&good->dups);
7236         good->cache.start = good->start;
7237         good->cache.size = good->nr;
7238         good->content_checked = 0;
7239         good->owner_ref_checked = 0;
7240         good->num_duplicates = 0;
7241         good->refs = rec->refs;
7242         list_splice_init(&rec->backrefs, &good->backrefs);
7243         while (1) {
7244                 cache = lookup_cache_extent(extent_cache, good->start,
7245                                             good->nr);
7246                 if (!cache)
7247                         break;
7248                 tmp = container_of(cache, struct extent_record, cache);
7249
7250                 /*
7251                  * If we find another overlapping extent and it's found_rec is
7252                  * set then it's a duplicate and we need to try and delete
7253                  * something.
7254                  */
7255                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7256                         if (list_empty(&good->list))
7257                                 list_add_tail(&good->list,
7258                                               &duplicate_extents);
7259                         good->num_duplicates += tmp->num_duplicates + 1;
7260                         list_splice_init(&tmp->dups, &good->dups);
7261                         list_del_init(&tmp->list);
7262                         list_add_tail(&tmp->list, &good->dups);
7263                         remove_cache_extent(extent_cache, &tmp->cache);
7264                         continue;
7265                 }
7266
7267                 /*
7268                  * Ok we have another non extent item backed extent rec, so lets
7269                  * just add it to this extent and carry on like we did above.
7270                  */
7271                 good->refs += tmp->refs;
7272                 list_splice_init(&tmp->backrefs, &good->backrefs);
7273                 remove_cache_extent(extent_cache, &tmp->cache);
7274                 free(tmp);
7275         }
7276         ret = insert_cache_extent(extent_cache, &good->cache);
7277         BUG_ON(ret);
7278         free(rec);
7279         return good->num_duplicates ? 0 : 1;
7280 }
7281
7282 static int delete_duplicate_records(struct btrfs_root *root,
7283                                     struct extent_record *rec)
7284 {
7285         struct btrfs_trans_handle *trans;
7286         LIST_HEAD(delete_list);
7287         struct btrfs_path *path;
7288         struct extent_record *tmp, *good, *n;
7289         int nr_del = 0;
7290         int ret = 0, err;
7291         struct btrfs_key key;
7292
7293         path = btrfs_alloc_path();
7294         if (!path) {
7295                 ret = -ENOMEM;
7296                 goto out;
7297         }
7298
7299         good = rec;
7300         /* Find the record that covers all of the duplicates. */
7301         list_for_each_entry(tmp, &rec->dups, list) {
7302                 if (good->start < tmp->start)
7303                         continue;
7304                 if (good->nr > tmp->nr)
7305                         continue;
7306
7307                 if (tmp->start + tmp->nr < good->start + good->nr) {
7308                         fprintf(stderr, "Ok we have overlapping extents that "
7309                                 "aren't completely covered by each other, this "
7310                                 "is going to require more careful thought.  "
7311                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7312                                 tmp->start, tmp->nr, good->start, good->nr);
7313                         abort();
7314                 }
7315                 good = tmp;
7316         }
7317
7318         if (good != rec)
7319                 list_add_tail(&rec->list, &delete_list);
7320
7321         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7322                 if (tmp == good)
7323                         continue;
7324                 list_move_tail(&tmp->list, &delete_list);
7325         }
7326
7327         root = root->fs_info->extent_root;
7328         trans = btrfs_start_transaction(root, 1);
7329         if (IS_ERR(trans)) {
7330                 ret = PTR_ERR(trans);
7331                 goto out;
7332         }
7333
7334         list_for_each_entry(tmp, &delete_list, list) {
7335                 if (tmp->found_rec == 0)
7336                         continue;
7337                 key.objectid = tmp->start;
7338                 key.type = BTRFS_EXTENT_ITEM_KEY;
7339                 key.offset = tmp->nr;
7340
7341                 /* Shouldn't happen but just in case */
7342                 if (tmp->metadata) {
7343                         fprintf(stderr, "Well this shouldn't happen, extent "
7344                                 "record overlaps but is metadata? "
7345                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7346                         abort();
7347                 }
7348
7349                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7350                 if (ret) {
7351                         if (ret > 0)
7352                                 ret = -EINVAL;
7353                         break;
7354                 }
7355                 ret = btrfs_del_item(trans, root, path);
7356                 if (ret)
7357                         break;
7358                 btrfs_release_path(path);
7359                 nr_del++;
7360         }
7361         err = btrfs_commit_transaction(trans, root);
7362         if (err && !ret)
7363                 ret = err;
7364 out:
7365         while (!list_empty(&delete_list)) {
7366                 tmp = to_extent_record(delete_list.next);
7367                 list_del_init(&tmp->list);
7368                 if (tmp == rec)
7369                         continue;
7370                 free(tmp);
7371         }
7372
7373         while (!list_empty(&rec->dups)) {
7374                 tmp = to_extent_record(rec->dups.next);
7375                 list_del_init(&tmp->list);
7376                 free(tmp);
7377         }
7378
7379         btrfs_free_path(path);
7380
7381         if (!ret && !nr_del)
7382                 rec->num_duplicates = 0;
7383
7384         return ret ? ret : nr_del;
7385 }
7386
7387 static int find_possible_backrefs(struct btrfs_fs_info *info,
7388                                   struct btrfs_path *path,
7389                                   struct cache_tree *extent_cache,
7390                                   struct extent_record *rec)
7391 {
7392         struct btrfs_root *root;
7393         struct extent_backref *back;
7394         struct data_backref *dback;
7395         struct cache_extent *cache;
7396         struct btrfs_file_extent_item *fi;
7397         struct btrfs_key key;
7398         u64 bytenr, bytes;
7399         int ret;
7400
7401         list_for_each_entry(back, &rec->backrefs, list) {
7402                 /* Don't care about full backrefs (poor unloved backrefs) */
7403                 if (back->full_backref || !back->is_data)
7404                         continue;
7405
7406                 dback = to_data_backref(back);
7407
7408                 /* We found this one, we don't need to do a lookup */
7409                 if (dback->found_ref)
7410                         continue;
7411
7412                 key.objectid = dback->root;
7413                 key.type = BTRFS_ROOT_ITEM_KEY;
7414                 key.offset = (u64)-1;
7415
7416                 root = btrfs_read_fs_root(info, &key);
7417
7418                 /* No root, definitely a bad ref, skip */
7419                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7420                         continue;
7421                 /* Other err, exit */
7422                 if (IS_ERR(root))
7423                         return PTR_ERR(root);
7424
7425                 key.objectid = dback->owner;
7426                 key.type = BTRFS_EXTENT_DATA_KEY;
7427                 key.offset = dback->offset;
7428                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7429                 if (ret) {
7430                         btrfs_release_path(path);
7431                         if (ret < 0)
7432                                 return ret;
7433                         /* Didn't find it, we can carry on */
7434                         ret = 0;
7435                         continue;
7436                 }
7437
7438                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7439                                     struct btrfs_file_extent_item);
7440                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7441                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7442                 btrfs_release_path(path);
7443                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7444                 if (cache) {
7445                         struct extent_record *tmp;
7446                         tmp = container_of(cache, struct extent_record, cache);
7447
7448                         /*
7449                          * If we found an extent record for the bytenr for this
7450                          * particular backref then we can't add it to our
7451                          * current extent record.  We only want to add backrefs
7452                          * that don't have a corresponding extent item in the
7453                          * extent tree since they likely belong to this record
7454                          * and we need to fix it if it doesn't match bytenrs.
7455                          */
7456                         if  (tmp->found_rec)
7457                                 continue;
7458                 }
7459
7460                 dback->found_ref += 1;
7461                 dback->disk_bytenr = bytenr;
7462                 dback->bytes = bytes;
7463
7464                 /*
7465                  * Set this so the verify backref code knows not to trust the
7466                  * values in this backref.
7467                  */
7468                 back->broken = 1;
7469         }
7470
7471         return 0;
7472 }
7473
7474 /*
7475  * Record orphan data ref into corresponding root.
7476  *
7477  * Return 0 if the extent item contains data ref and recorded.
7478  * Return 1 if the extent item contains no useful data ref
7479  *   On that case, it may contains only shared_dataref or metadata backref
7480  *   or the file extent exists(this should be handled by the extent bytenr
7481  *   recovery routine)
7482  * Return <0 if something goes wrong.
7483  */
7484 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7485                                       struct extent_record *rec)
7486 {
7487         struct btrfs_key key;
7488         struct btrfs_root *dest_root;
7489         struct extent_backref *back;
7490         struct data_backref *dback;
7491         struct orphan_data_extent *orphan;
7492         struct btrfs_path *path;
7493         int recorded_data_ref = 0;
7494         int ret = 0;
7495
7496         if (rec->metadata)
7497                 return 1;
7498         path = btrfs_alloc_path();
7499         if (!path)
7500                 return -ENOMEM;
7501         list_for_each_entry(back, &rec->backrefs, list) {
7502                 if (back->full_backref || !back->is_data ||
7503                     !back->found_extent_tree)
7504                         continue;
7505                 dback = to_data_backref(back);
7506                 if (dback->found_ref)
7507                         continue;
7508                 key.objectid = dback->root;
7509                 key.type = BTRFS_ROOT_ITEM_KEY;
7510                 key.offset = (u64)-1;
7511
7512                 dest_root = btrfs_read_fs_root(fs_info, &key);
7513
7514                 /* For non-exist root we just skip it */
7515                 if (IS_ERR(dest_root) || !dest_root)
7516                         continue;
7517
7518                 key.objectid = dback->owner;
7519                 key.type = BTRFS_EXTENT_DATA_KEY;
7520                 key.offset = dback->offset;
7521
7522                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7523                 /*
7524                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7525                  * we need to record it for inode/file extent rebuild.
7526                  * For ret > 0, we record it only for file extent rebuild.
7527                  * For ret == 0, the file extent exists but only bytenr
7528                  * mismatch, let the original bytenr fix routine to handle,
7529                  * don't record it.
7530                  */
7531                 if (ret == 0)
7532                         continue;
7533                 ret = 0;
7534                 orphan = malloc(sizeof(*orphan));
7535                 if (!orphan) {
7536                         ret = -ENOMEM;
7537                         goto out;
7538                 }
7539                 INIT_LIST_HEAD(&orphan->list);
7540                 orphan->root = dback->root;
7541                 orphan->objectid = dback->owner;
7542                 orphan->offset = dback->offset;
7543                 orphan->disk_bytenr = rec->cache.start;
7544                 orphan->disk_len = rec->cache.size;
7545                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7546                 recorded_data_ref = 1;
7547         }
7548 out:
7549         btrfs_free_path(path);
7550         if (!ret)
7551                 return !recorded_data_ref;
7552         else
7553                 return ret;
7554 }
7555
7556 /*
7557  * when an incorrect extent item is found, this will delete
7558  * all of the existing entries for it and recreate them
7559  * based on what the tree scan found.
7560  */
7561 static int fixup_extent_refs(struct btrfs_fs_info *info,
7562                              struct cache_tree *extent_cache,
7563                              struct extent_record *rec)
7564 {
7565         struct btrfs_trans_handle *trans = NULL;
7566         int ret;
7567         struct btrfs_path *path;
7568         struct list_head *cur = rec->backrefs.next;
7569         struct cache_extent *cache;
7570         struct extent_backref *back;
7571         int allocated = 0;
7572         u64 flags = 0;
7573
7574         if (rec->flag_block_full_backref)
7575                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7576
7577         path = btrfs_alloc_path();
7578         if (!path)
7579                 return -ENOMEM;
7580
7581         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7582                 /*
7583                  * Sometimes the backrefs themselves are so broken they don't
7584                  * get attached to any meaningful rec, so first go back and
7585                  * check any of our backrefs that we couldn't find and throw
7586                  * them into the list if we find the backref so that
7587                  * verify_backrefs can figure out what to do.
7588                  */
7589                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7590                 if (ret < 0)
7591                         goto out;
7592         }
7593
7594         /* step one, make sure all of the backrefs agree */
7595         ret = verify_backrefs(info, path, rec);
7596         if (ret < 0)
7597                 goto out;
7598
7599         trans = btrfs_start_transaction(info->extent_root, 1);
7600         if (IS_ERR(trans)) {
7601                 ret = PTR_ERR(trans);
7602                 goto out;
7603         }
7604
7605         /* step two, delete all the existing records */
7606         ret = delete_extent_records(trans, info->extent_root, path,
7607                                     rec->start, rec->max_size);
7608
7609         if (ret < 0)
7610                 goto out;
7611
7612         /* was this block corrupt?  If so, don't add references to it */
7613         cache = lookup_cache_extent(info->corrupt_blocks,
7614                                     rec->start, rec->max_size);
7615         if (cache) {
7616                 ret = 0;
7617                 goto out;
7618         }
7619
7620         /* step three, recreate all the refs we did find */
7621         while(cur != &rec->backrefs) {
7622                 back = to_extent_backref(cur);
7623                 cur = cur->next;
7624
7625                 /*
7626                  * if we didn't find any references, don't create a
7627                  * new extent record
7628                  */
7629                 if (!back->found_ref)
7630                         continue;
7631
7632                 rec->bad_full_backref = 0;
7633                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7634                 allocated = 1;
7635
7636                 if (ret)
7637                         goto out;
7638         }
7639 out:
7640         if (trans) {
7641                 int err = btrfs_commit_transaction(trans, info->extent_root);
7642                 if (!ret)
7643                         ret = err;
7644         }
7645
7646         btrfs_free_path(path);
7647         return ret;
7648 }
7649
7650 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7651                               struct extent_record *rec)
7652 {
7653         struct btrfs_trans_handle *trans;
7654         struct btrfs_root *root = fs_info->extent_root;
7655         struct btrfs_path *path;
7656         struct btrfs_extent_item *ei;
7657         struct btrfs_key key;
7658         u64 flags;
7659         int ret = 0;
7660
7661         key.objectid = rec->start;
7662         if (rec->metadata) {
7663                 key.type = BTRFS_METADATA_ITEM_KEY;
7664                 key.offset = rec->info_level;
7665         } else {
7666                 key.type = BTRFS_EXTENT_ITEM_KEY;
7667                 key.offset = rec->max_size;
7668         }
7669
7670         path = btrfs_alloc_path();
7671         if (!path)
7672                 return -ENOMEM;
7673
7674         trans = btrfs_start_transaction(root, 0);
7675         if (IS_ERR(trans)) {
7676                 btrfs_free_path(path);
7677                 return PTR_ERR(trans);
7678         }
7679
7680         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7681         if (ret < 0) {
7682                 btrfs_free_path(path);
7683                 btrfs_commit_transaction(trans, root);
7684                 return ret;
7685         } else if (ret) {
7686                 fprintf(stderr, "Didn't find extent for %llu\n",
7687                         (unsigned long long)rec->start);
7688                 btrfs_free_path(path);
7689                 btrfs_commit_transaction(trans, root);
7690                 return -ENOENT;
7691         }
7692
7693         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7694                             struct btrfs_extent_item);
7695         flags = btrfs_extent_flags(path->nodes[0], ei);
7696         if (rec->flag_block_full_backref) {
7697                 fprintf(stderr, "setting full backref on %llu\n",
7698                         (unsigned long long)key.objectid);
7699                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7700         } else {
7701                 fprintf(stderr, "clearing full backref on %llu\n",
7702                         (unsigned long long)key.objectid);
7703                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7704         }
7705         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7706         btrfs_mark_buffer_dirty(path->nodes[0]);
7707         btrfs_free_path(path);
7708         return btrfs_commit_transaction(trans, root);
7709 }
7710
7711 /* right now we only prune from the extent allocation tree */
7712 static int prune_one_block(struct btrfs_trans_handle *trans,
7713                            struct btrfs_fs_info *info,
7714                            struct btrfs_corrupt_block *corrupt)
7715 {
7716         int ret;
7717         struct btrfs_path path;
7718         struct extent_buffer *eb;
7719         u64 found;
7720         int slot;
7721         int nritems;
7722         int level = corrupt->level + 1;
7723
7724         btrfs_init_path(&path);
7725 again:
7726         /* we want to stop at the parent to our busted block */
7727         path.lowest_level = level;
7728
7729         ret = btrfs_search_slot(trans, info->extent_root,
7730                                 &corrupt->key, &path, -1, 1);
7731
7732         if (ret < 0)
7733                 goto out;
7734
7735         eb = path.nodes[level];
7736         if (!eb) {
7737                 ret = -ENOENT;
7738                 goto out;
7739         }
7740
7741         /*
7742          * hopefully the search gave us the block we want to prune,
7743          * lets try that first
7744          */
7745         slot = path.slots[level];
7746         found =  btrfs_node_blockptr(eb, slot);
7747         if (found == corrupt->cache.start)
7748                 goto del_ptr;
7749
7750         nritems = btrfs_header_nritems(eb);
7751
7752         /* the search failed, lets scan this node and hope we find it */
7753         for (slot = 0; slot < nritems; slot++) {
7754                 found =  btrfs_node_blockptr(eb, slot);
7755                 if (found == corrupt->cache.start)
7756                         goto del_ptr;
7757         }
7758         /*
7759          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7760          * to this block
7761          */
7762         if (eb == info->extent_root->node) {
7763                 ret = -ENOENT;
7764                 goto out;
7765         } else {
7766                 level++;
7767                 btrfs_release_path(&path);
7768                 goto again;
7769         }
7770
7771 del_ptr:
7772         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7773         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7774
7775 out:
7776         btrfs_release_path(&path);
7777         return ret;
7778 }
7779
7780 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7781 {
7782         struct btrfs_trans_handle *trans = NULL;
7783         struct cache_extent *cache;
7784         struct btrfs_corrupt_block *corrupt;
7785
7786         while (1) {
7787                 cache = search_cache_extent(info->corrupt_blocks, 0);
7788                 if (!cache)
7789                         break;
7790                 if (!trans) {
7791                         trans = btrfs_start_transaction(info->extent_root, 1);
7792                         if (IS_ERR(trans))
7793                                 return PTR_ERR(trans);
7794                 }
7795                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7796                 prune_one_block(trans, info, corrupt);
7797                 remove_cache_extent(info->corrupt_blocks, cache);
7798         }
7799         if (trans)
7800                 return btrfs_commit_transaction(trans, info->extent_root);
7801         return 0;
7802 }
7803
7804 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7805 {
7806         struct btrfs_block_group_cache *cache;
7807         u64 start, end;
7808         int ret;
7809
7810         while (1) {
7811                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7812                                             &start, &end, EXTENT_DIRTY);
7813                 if (ret)
7814                         break;
7815                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7816                                    GFP_NOFS);
7817         }
7818
7819         start = 0;
7820         while (1) {
7821                 cache = btrfs_lookup_first_block_group(fs_info, start);
7822                 if (!cache)
7823                         break;
7824                 if (cache->cached)
7825                         cache->cached = 0;
7826                 start = cache->key.objectid + cache->key.offset;
7827         }
7828 }
7829
7830 static int check_extent_refs(struct btrfs_root *root,
7831                              struct cache_tree *extent_cache)
7832 {
7833         struct extent_record *rec;
7834         struct cache_extent *cache;
7835         int err = 0;
7836         int ret = 0;
7837         int fixed = 0;
7838         int had_dups = 0;
7839         int recorded = 0;
7840
7841         if (repair) {
7842                 /*
7843                  * if we're doing a repair, we have to make sure
7844                  * we don't allocate from the problem extents.
7845                  * In the worst case, this will be all the
7846                  * extents in the FS
7847                  */
7848                 cache = search_cache_extent(extent_cache, 0);
7849                 while(cache) {
7850                         rec = container_of(cache, struct extent_record, cache);
7851                         set_extent_dirty(root->fs_info->excluded_extents,
7852                                          rec->start,
7853                                          rec->start + rec->max_size - 1,
7854                                          GFP_NOFS);
7855                         cache = next_cache_extent(cache);
7856                 }
7857
7858                 /* pin down all the corrupted blocks too */
7859                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7860                 while(cache) {
7861                         set_extent_dirty(root->fs_info->excluded_extents,
7862                                          cache->start,
7863                                          cache->start + cache->size - 1,
7864                                          GFP_NOFS);
7865                         cache = next_cache_extent(cache);
7866                 }
7867                 prune_corrupt_blocks(root->fs_info);
7868                 reset_cached_block_groups(root->fs_info);
7869         }
7870
7871         reset_cached_block_groups(root->fs_info);
7872
7873         /*
7874          * We need to delete any duplicate entries we find first otherwise we
7875          * could mess up the extent tree when we have backrefs that actually
7876          * belong to a different extent item and not the weird duplicate one.
7877          */
7878         while (repair && !list_empty(&duplicate_extents)) {
7879                 rec = to_extent_record(duplicate_extents.next);
7880                 list_del_init(&rec->list);
7881
7882                 /* Sometimes we can find a backref before we find an actual
7883                  * extent, so we need to process it a little bit to see if there
7884                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7885                  * if this is a backref screwup.  If we need to delete stuff
7886                  * process_duplicates() will return 0, otherwise it will return
7887                  * 1 and we
7888                  */
7889                 if (process_duplicates(root, extent_cache, rec))
7890                         continue;
7891                 ret = delete_duplicate_records(root, rec);
7892                 if (ret < 0)
7893                         return ret;
7894                 /*
7895                  * delete_duplicate_records will return the number of entries
7896                  * deleted, so if it's greater than 0 then we know we actually
7897                  * did something and we need to remove.
7898                  */
7899                 if (ret)
7900                         had_dups = 1;
7901         }
7902
7903         if (had_dups)
7904                 return -EAGAIN;
7905
7906         while(1) {
7907                 int cur_err = 0;
7908
7909                 fixed = 0;
7910                 recorded = 0;
7911                 cache = search_cache_extent(extent_cache, 0);
7912                 if (!cache)
7913                         break;
7914                 rec = container_of(cache, struct extent_record, cache);
7915                 if (rec->num_duplicates) {
7916                         fprintf(stderr, "extent item %llu has multiple extent "
7917                                 "items\n", (unsigned long long)rec->start);
7918                         err = 1;
7919                         cur_err = 1;
7920                 }
7921
7922                 if (rec->refs != rec->extent_item_refs) {
7923                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7924                                 (unsigned long long)rec->start,
7925                                 (unsigned long long)rec->nr);
7926                         fprintf(stderr, "extent item %llu, found %llu\n",
7927                                 (unsigned long long)rec->extent_item_refs,
7928                                 (unsigned long long)rec->refs);
7929                         ret = record_orphan_data_extents(root->fs_info, rec);
7930                         if (ret < 0)
7931                                 goto repair_abort;
7932                         if (ret == 0) {
7933                                 recorded = 1;
7934                         } else {
7935                                 /*
7936                                  * we can't use the extent to repair file
7937                                  * extent, let the fallback method handle it.
7938                                  */
7939                                 if (!fixed && repair) {
7940                                         ret = fixup_extent_refs(
7941                                                         root->fs_info,
7942                                                         extent_cache, rec);
7943                                         if (ret)
7944                                                 goto repair_abort;
7945                                         fixed = 1;
7946                                 }
7947                         }
7948                         err = 1;
7949                         cur_err = 1;
7950                 }
7951                 if (all_backpointers_checked(rec, 1)) {
7952                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7953                                 (unsigned long long)rec->start,
7954                                 (unsigned long long)rec->nr);
7955
7956                         if (!fixed && !recorded && repair) {
7957                                 ret = fixup_extent_refs(root->fs_info,
7958                                                         extent_cache, rec);
7959                                 if (ret)
7960                                         goto repair_abort;
7961                                 fixed = 1;
7962                         }
7963                         cur_err = 1;
7964                         err = 1;
7965                 }
7966                 if (!rec->owner_ref_checked) {
7967                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7968                                 (unsigned long long)rec->start,
7969                                 (unsigned long long)rec->nr);
7970                         if (!fixed && !recorded && repair) {
7971                                 ret = fixup_extent_refs(root->fs_info,
7972                                                         extent_cache, rec);
7973                                 if (ret)
7974                                         goto repair_abort;
7975                                 fixed = 1;
7976                         }
7977                         err = 1;
7978                         cur_err = 1;
7979                 }
7980                 if (rec->bad_full_backref) {
7981                         fprintf(stderr, "bad full backref, on [%llu]\n",
7982                                 (unsigned long long)rec->start);
7983                         if (repair) {
7984                                 ret = fixup_extent_flags(root->fs_info, rec);
7985                                 if (ret)
7986                                         goto repair_abort;
7987                                 fixed = 1;
7988                         }
7989                         err = 1;
7990                         cur_err = 1;
7991                 }
7992                 /*
7993                  * Although it's not a extent ref's problem, we reuse this
7994                  * routine for error reporting.
7995                  * No repair function yet.
7996                  */
7997                 if (rec->crossing_stripes) {
7998                         fprintf(stderr,
7999                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8000                                 rec->start, rec->start + rec->max_size);
8001                         err = 1;
8002                         cur_err = 1;
8003                 }
8004
8005                 if (rec->wrong_chunk_type) {
8006                         fprintf(stderr,
8007                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8008                                 rec->start, rec->start + rec->max_size);
8009                         err = 1;
8010                         cur_err = 1;
8011                 }
8012
8013                 remove_cache_extent(extent_cache, cache);
8014                 free_all_extent_backrefs(rec);
8015                 if (!init_extent_tree && repair && (!cur_err || fixed))
8016                         clear_extent_dirty(root->fs_info->excluded_extents,
8017                                            rec->start,
8018                                            rec->start + rec->max_size - 1,
8019                                            GFP_NOFS);
8020                 free(rec);
8021         }
8022 repair_abort:
8023         if (repair) {
8024                 if (ret && ret != -EAGAIN) {
8025                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8026                         exit(1);
8027                 } else if (!ret) {
8028                         struct btrfs_trans_handle *trans;
8029
8030                         root = root->fs_info->extent_root;
8031                         trans = btrfs_start_transaction(root, 1);
8032                         if (IS_ERR(trans)) {
8033                                 ret = PTR_ERR(trans);
8034                                 goto repair_abort;
8035                         }
8036
8037                         btrfs_fix_block_accounting(trans, root);
8038                         ret = btrfs_commit_transaction(trans, root);
8039                         if (ret)
8040                                 goto repair_abort;
8041                 }
8042                 if (err)
8043                         fprintf(stderr, "repaired damaged extent references\n");
8044                 return ret;
8045         }
8046         return err;
8047 }
8048
8049 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8050 {
8051         u64 stripe_size;
8052
8053         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8054                 stripe_size = length;
8055                 stripe_size /= num_stripes;
8056         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8057                 stripe_size = length * 2;
8058                 stripe_size /= num_stripes;
8059         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8060                 stripe_size = length;
8061                 stripe_size /= (num_stripes - 1);
8062         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8063                 stripe_size = length;
8064                 stripe_size /= (num_stripes - 2);
8065         } else {
8066                 stripe_size = length;
8067         }
8068         return stripe_size;
8069 }
8070
8071 /*
8072  * Check the chunk with its block group/dev list ref:
8073  * Return 0 if all refs seems valid.
8074  * Return 1 if part of refs seems valid, need later check for rebuild ref
8075  * like missing block group and needs to search extent tree to rebuild them.
8076  * Return -1 if essential refs are missing and unable to rebuild.
8077  */
8078 static int check_chunk_refs(struct chunk_record *chunk_rec,
8079                             struct block_group_tree *block_group_cache,
8080                             struct device_extent_tree *dev_extent_cache,
8081                             int silent)
8082 {
8083         struct cache_extent *block_group_item;
8084         struct block_group_record *block_group_rec;
8085         struct cache_extent *dev_extent_item;
8086         struct device_extent_record *dev_extent_rec;
8087         u64 devid;
8088         u64 offset;
8089         u64 length;
8090         int metadump_v2 = 0;
8091         int i;
8092         int ret = 0;
8093
8094         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8095                                                chunk_rec->offset,
8096                                                chunk_rec->length);
8097         if (block_group_item) {
8098                 block_group_rec = container_of(block_group_item,
8099                                                struct block_group_record,
8100                                                cache);
8101                 if (chunk_rec->length != block_group_rec->offset ||
8102                     chunk_rec->offset != block_group_rec->objectid ||
8103                     (!metadump_v2 &&
8104                      chunk_rec->type_flags != block_group_rec->flags)) {
8105                         if (!silent)
8106                                 fprintf(stderr,
8107                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8108                                         chunk_rec->objectid,
8109                                         chunk_rec->type,
8110                                         chunk_rec->offset,
8111                                         chunk_rec->length,
8112                                         chunk_rec->offset,
8113                                         chunk_rec->type_flags,
8114                                         block_group_rec->objectid,
8115                                         block_group_rec->type,
8116                                         block_group_rec->offset,
8117                                         block_group_rec->offset,
8118                                         block_group_rec->objectid,
8119                                         block_group_rec->flags);
8120                         ret = -1;
8121                 } else {
8122                         list_del_init(&block_group_rec->list);
8123                         chunk_rec->bg_rec = block_group_rec;
8124                 }
8125         } else {
8126                 if (!silent)
8127                         fprintf(stderr,
8128                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8129                                 chunk_rec->objectid,
8130                                 chunk_rec->type,
8131                                 chunk_rec->offset,
8132                                 chunk_rec->length,
8133                                 chunk_rec->offset,
8134                                 chunk_rec->type_flags);
8135                 ret = 1;
8136         }
8137
8138         if (metadump_v2)
8139                 return ret;
8140
8141         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8142                                     chunk_rec->num_stripes);
8143         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8144                 devid = chunk_rec->stripes[i].devid;
8145                 offset = chunk_rec->stripes[i].offset;
8146                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8147                                                        devid, offset, length);
8148                 if (dev_extent_item) {
8149                         dev_extent_rec = container_of(dev_extent_item,
8150                                                 struct device_extent_record,
8151                                                 cache);
8152                         if (dev_extent_rec->objectid != devid ||
8153                             dev_extent_rec->offset != offset ||
8154                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8155                             dev_extent_rec->length != length) {
8156                                 if (!silent)
8157                                         fprintf(stderr,
8158                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8159                                                 chunk_rec->objectid,
8160                                                 chunk_rec->type,
8161                                                 chunk_rec->offset,
8162                                                 chunk_rec->stripes[i].devid,
8163                                                 chunk_rec->stripes[i].offset,
8164                                                 dev_extent_rec->objectid,
8165                                                 dev_extent_rec->offset,
8166                                                 dev_extent_rec->length);
8167                                 ret = -1;
8168                         } else {
8169                                 list_move(&dev_extent_rec->chunk_list,
8170                                           &chunk_rec->dextents);
8171                         }
8172                 } else {
8173                         if (!silent)
8174                                 fprintf(stderr,
8175                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8176                                         chunk_rec->objectid,
8177                                         chunk_rec->type,
8178                                         chunk_rec->offset,
8179                                         chunk_rec->stripes[i].devid,
8180                                         chunk_rec->stripes[i].offset);
8181                         ret = -1;
8182                 }
8183         }
8184         return ret;
8185 }
8186
8187 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8188 int check_chunks(struct cache_tree *chunk_cache,
8189                  struct block_group_tree *block_group_cache,
8190                  struct device_extent_tree *dev_extent_cache,
8191                  struct list_head *good, struct list_head *bad,
8192                  struct list_head *rebuild, int silent)
8193 {
8194         struct cache_extent *chunk_item;
8195         struct chunk_record *chunk_rec;
8196         struct block_group_record *bg_rec;
8197         struct device_extent_record *dext_rec;
8198         int err;
8199         int ret = 0;
8200
8201         chunk_item = first_cache_extent(chunk_cache);
8202         while (chunk_item) {
8203                 chunk_rec = container_of(chunk_item, struct chunk_record,
8204                                          cache);
8205                 err = check_chunk_refs(chunk_rec, block_group_cache,
8206                                        dev_extent_cache, silent);
8207                 if (err < 0)
8208                         ret = err;
8209                 if (err == 0 && good)
8210                         list_add_tail(&chunk_rec->list, good);
8211                 if (err > 0 && rebuild)
8212                         list_add_tail(&chunk_rec->list, rebuild);
8213                 if (err < 0 && bad)
8214                         list_add_tail(&chunk_rec->list, bad);
8215                 chunk_item = next_cache_extent(chunk_item);
8216         }
8217
8218         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8219                 if (!silent)
8220                         fprintf(stderr,
8221                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8222                                 bg_rec->objectid,
8223                                 bg_rec->offset,
8224                                 bg_rec->flags);
8225                 if (!ret)
8226                         ret = 1;
8227         }
8228
8229         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8230                             chunk_list) {
8231                 if (!silent)
8232                         fprintf(stderr,
8233                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8234                                 dext_rec->objectid,
8235                                 dext_rec->offset,
8236                                 dext_rec->length);
8237                 if (!ret)
8238                         ret = 1;
8239         }
8240         return ret;
8241 }
8242
8243
8244 static int check_device_used(struct device_record *dev_rec,
8245                              struct device_extent_tree *dext_cache)
8246 {
8247         struct cache_extent *cache;
8248         struct device_extent_record *dev_extent_rec;
8249         u64 total_byte = 0;
8250
8251         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8252         while (cache) {
8253                 dev_extent_rec = container_of(cache,
8254                                               struct device_extent_record,
8255                                               cache);
8256                 if (dev_extent_rec->objectid != dev_rec->devid)
8257                         break;
8258
8259                 list_del_init(&dev_extent_rec->device_list);
8260                 total_byte += dev_extent_rec->length;
8261                 cache = next_cache_extent(cache);
8262         }
8263
8264         if (total_byte != dev_rec->byte_used) {
8265                 fprintf(stderr,
8266                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8267                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8268                         dev_rec->type, dev_rec->offset);
8269                 return -1;
8270         } else {
8271                 return 0;
8272         }
8273 }
8274
8275 /* check btrfs_dev_item -> btrfs_dev_extent */
8276 static int check_devices(struct rb_root *dev_cache,
8277                          struct device_extent_tree *dev_extent_cache)
8278 {
8279         struct rb_node *dev_node;
8280         struct device_record *dev_rec;
8281         struct device_extent_record *dext_rec;
8282         int err;
8283         int ret = 0;
8284
8285         dev_node = rb_first(dev_cache);
8286         while (dev_node) {
8287                 dev_rec = container_of(dev_node, struct device_record, node);
8288                 err = check_device_used(dev_rec, dev_extent_cache);
8289                 if (err)
8290                         ret = err;
8291
8292                 dev_node = rb_next(dev_node);
8293         }
8294         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8295                             device_list) {
8296                 fprintf(stderr,
8297                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8298                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8299                 if (!ret)
8300                         ret = 1;
8301         }
8302         return ret;
8303 }
8304
8305 static int add_root_item_to_list(struct list_head *head,
8306                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8307                                   u8 level, u8 drop_level,
8308                                   int level_size, struct btrfs_key *drop_key)
8309 {
8310
8311         struct root_item_record *ri_rec;
8312         ri_rec = malloc(sizeof(*ri_rec));
8313         if (!ri_rec)
8314                 return -ENOMEM;
8315         ri_rec->bytenr = bytenr;
8316         ri_rec->objectid = objectid;
8317         ri_rec->level = level;
8318         ri_rec->level_size = level_size;
8319         ri_rec->drop_level = drop_level;
8320         ri_rec->last_snapshot = last_snapshot;
8321         if (drop_key)
8322                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8323         list_add_tail(&ri_rec->list, head);
8324
8325         return 0;
8326 }
8327
8328 static void free_root_item_list(struct list_head *list)
8329 {
8330         struct root_item_record *ri_rec;
8331
8332         while (!list_empty(list)) {
8333                 ri_rec = list_first_entry(list, struct root_item_record,
8334                                           list);
8335                 list_del_init(&ri_rec->list);
8336                 free(ri_rec);
8337         }
8338 }
8339
8340 static int deal_root_from_list(struct list_head *list,
8341                                struct btrfs_root *root,
8342                                struct block_info *bits,
8343                                int bits_nr,
8344                                struct cache_tree *pending,
8345                                struct cache_tree *seen,
8346                                struct cache_tree *reada,
8347                                struct cache_tree *nodes,
8348                                struct cache_tree *extent_cache,
8349                                struct cache_tree *chunk_cache,
8350                                struct rb_root *dev_cache,
8351                                struct block_group_tree *block_group_cache,
8352                                struct device_extent_tree *dev_extent_cache)
8353 {
8354         int ret = 0;
8355         u64 last;
8356
8357         while (!list_empty(list)) {
8358                 struct root_item_record *rec;
8359                 struct extent_buffer *buf;
8360                 rec = list_entry(list->next,
8361                                  struct root_item_record, list);
8362                 last = 0;
8363                 buf = read_tree_block(root->fs_info->tree_root,
8364                                       rec->bytenr, rec->level_size, 0);
8365                 if (!extent_buffer_uptodate(buf)) {
8366                         free_extent_buffer(buf);
8367                         ret = -EIO;
8368                         break;
8369                 }
8370                 ret = add_root_to_pending(buf, extent_cache, pending,
8371                                     seen, nodes, rec->objectid);
8372                 if (ret < 0)
8373                         break;
8374                 /*
8375                  * To rebuild extent tree, we need deal with snapshot
8376                  * one by one, otherwise we deal with node firstly which
8377                  * can maximize readahead.
8378                  */
8379                 while (1) {
8380                         ret = run_next_block(root, bits, bits_nr, &last,
8381                                              pending, seen, reada, nodes,
8382                                              extent_cache, chunk_cache,
8383                                              dev_cache, block_group_cache,
8384                                              dev_extent_cache, rec);
8385                         if (ret != 0)
8386                                 break;
8387                 }
8388                 free_extent_buffer(buf);
8389                 list_del(&rec->list);
8390                 free(rec);
8391                 if (ret < 0)
8392                         break;
8393         }
8394         while (ret >= 0) {
8395                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8396                                      reada, nodes, extent_cache, chunk_cache,
8397                                      dev_cache, block_group_cache,
8398                                      dev_extent_cache, NULL);
8399                 if (ret != 0) {
8400                         if (ret > 0)
8401                                 ret = 0;
8402                         break;
8403                 }
8404         }
8405         return ret;
8406 }
8407
8408 static int check_chunks_and_extents(struct btrfs_root *root)
8409 {
8410         struct rb_root dev_cache;
8411         struct cache_tree chunk_cache;
8412         struct block_group_tree block_group_cache;
8413         struct device_extent_tree dev_extent_cache;
8414         struct cache_tree extent_cache;
8415         struct cache_tree seen;
8416         struct cache_tree pending;
8417         struct cache_tree reada;
8418         struct cache_tree nodes;
8419         struct extent_io_tree excluded_extents;
8420         struct cache_tree corrupt_blocks;
8421         struct btrfs_path path;
8422         struct btrfs_key key;
8423         struct btrfs_key found_key;
8424         int ret, err = 0;
8425         struct block_info *bits;
8426         int bits_nr;
8427         struct extent_buffer *leaf;
8428         int slot;
8429         struct btrfs_root_item ri;
8430         struct list_head dropping_trees;
8431         struct list_head normal_trees;
8432         struct btrfs_root *root1;
8433         u64 objectid;
8434         u32 level_size;
8435         u8 level;
8436
8437         dev_cache = RB_ROOT;
8438         cache_tree_init(&chunk_cache);
8439         block_group_tree_init(&block_group_cache);
8440         device_extent_tree_init(&dev_extent_cache);
8441
8442         cache_tree_init(&extent_cache);
8443         cache_tree_init(&seen);
8444         cache_tree_init(&pending);
8445         cache_tree_init(&nodes);
8446         cache_tree_init(&reada);
8447         cache_tree_init(&corrupt_blocks);
8448         extent_io_tree_init(&excluded_extents);
8449         INIT_LIST_HEAD(&dropping_trees);
8450         INIT_LIST_HEAD(&normal_trees);
8451
8452         if (repair) {
8453                 root->fs_info->excluded_extents = &excluded_extents;
8454                 root->fs_info->fsck_extent_cache = &extent_cache;
8455                 root->fs_info->free_extent_hook = free_extent_hook;
8456                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8457         }
8458
8459         bits_nr = 1024;
8460         bits = malloc(bits_nr * sizeof(struct block_info));
8461         if (!bits) {
8462                 perror("malloc");
8463                 exit(1);
8464         }
8465
8466         if (ctx.progress_enabled) {
8467                 ctx.tp = TASK_EXTENTS;
8468                 task_start(ctx.info);
8469         }
8470
8471 again:
8472         root1 = root->fs_info->tree_root;
8473         level = btrfs_header_level(root1->node);
8474         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8475                                     root1->node->start, 0, level, 0,
8476                                     root1->nodesize, NULL);
8477         if (ret < 0)
8478                 goto out;
8479         root1 = root->fs_info->chunk_root;
8480         level = btrfs_header_level(root1->node);
8481         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8482                                     root1->node->start, 0, level, 0,
8483                                     root1->nodesize, NULL);
8484         if (ret < 0)
8485                 goto out;
8486         btrfs_init_path(&path);
8487         key.offset = 0;
8488         key.objectid = 0;
8489         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8490         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8491                                         &key, &path, 0, 0);
8492         if (ret < 0)
8493                 goto out;
8494         while(1) {
8495                 leaf = path.nodes[0];
8496                 slot = path.slots[0];
8497                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8498                         ret = btrfs_next_leaf(root, &path);
8499                         if (ret != 0)
8500                                 break;
8501                         leaf = path.nodes[0];
8502                         slot = path.slots[0];
8503                 }
8504                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8505                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8506                         unsigned long offset;
8507                         u64 last_snapshot;
8508
8509                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8510                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8511                         last_snapshot = btrfs_root_last_snapshot(&ri);
8512                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8513                                 level = btrfs_root_level(&ri);
8514                                 level_size = root->nodesize;
8515                                 ret = add_root_item_to_list(&normal_trees,
8516                                                 found_key.objectid,
8517                                                 btrfs_root_bytenr(&ri),
8518                                                 last_snapshot, level,
8519                                                 0, level_size, NULL);
8520                                 if (ret < 0)
8521                                         goto out;
8522                         } else {
8523                                 level = btrfs_root_level(&ri);
8524                                 level_size = root->nodesize;
8525                                 objectid = found_key.objectid;
8526                                 btrfs_disk_key_to_cpu(&found_key,
8527                                                       &ri.drop_progress);
8528                                 ret = add_root_item_to_list(&dropping_trees,
8529                                                 objectid,
8530                                                 btrfs_root_bytenr(&ri),
8531                                                 last_snapshot, level,
8532                                                 ri.drop_level,
8533                                                 level_size, &found_key);
8534                                 if (ret < 0)
8535                                         goto out;
8536                         }
8537                 }
8538                 path.slots[0]++;
8539         }
8540         btrfs_release_path(&path);
8541
8542         /*
8543          * check_block can return -EAGAIN if it fixes something, please keep
8544          * this in mind when dealing with return values from these functions, if
8545          * we get -EAGAIN we want to fall through and restart the loop.
8546          */
8547         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8548                                   &seen, &reada, &nodes, &extent_cache,
8549                                   &chunk_cache, &dev_cache, &block_group_cache,
8550                                   &dev_extent_cache);
8551         if (ret < 0) {
8552                 if (ret == -EAGAIN)
8553                         goto loop;
8554                 goto out;
8555         }
8556         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8557                                   &pending, &seen, &reada, &nodes,
8558                                   &extent_cache, &chunk_cache, &dev_cache,
8559                                   &block_group_cache, &dev_extent_cache);
8560         if (ret < 0) {
8561                 if (ret == -EAGAIN)
8562                         goto loop;
8563                 goto out;
8564         }
8565
8566         ret = check_chunks(&chunk_cache, &block_group_cache,
8567                            &dev_extent_cache, NULL, NULL, NULL, 0);
8568         if (ret) {
8569                 if (ret == -EAGAIN)
8570                         goto loop;
8571                 err = ret;
8572         }
8573
8574         ret = check_extent_refs(root, &extent_cache);
8575         if (ret < 0) {
8576                 if (ret == -EAGAIN)
8577                         goto loop;
8578                 goto out;
8579         }
8580
8581         ret = check_devices(&dev_cache, &dev_extent_cache);
8582         if (ret && err)
8583                 ret = err;
8584
8585 out:
8586         task_stop(ctx.info);
8587         if (repair) {
8588                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8589                 extent_io_tree_cleanup(&excluded_extents);
8590                 root->fs_info->fsck_extent_cache = NULL;
8591                 root->fs_info->free_extent_hook = NULL;
8592                 root->fs_info->corrupt_blocks = NULL;
8593                 root->fs_info->excluded_extents = NULL;
8594         }
8595         free(bits);
8596         free_chunk_cache_tree(&chunk_cache);
8597         free_device_cache_tree(&dev_cache);
8598         free_block_group_tree(&block_group_cache);
8599         free_device_extent_tree(&dev_extent_cache);
8600         free_extent_cache_tree(&seen);
8601         free_extent_cache_tree(&pending);
8602         free_extent_cache_tree(&reada);
8603         free_extent_cache_tree(&nodes);
8604         return ret;
8605 loop:
8606         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8607         free_extent_cache_tree(&seen);
8608         free_extent_cache_tree(&pending);
8609         free_extent_cache_tree(&reada);
8610         free_extent_cache_tree(&nodes);
8611         free_chunk_cache_tree(&chunk_cache);
8612         free_block_group_tree(&block_group_cache);
8613         free_device_cache_tree(&dev_cache);
8614         free_device_extent_tree(&dev_extent_cache);
8615         free_extent_record_cache(root->fs_info, &extent_cache);
8616         free_root_item_list(&normal_trees);
8617         free_root_item_list(&dropping_trees);
8618         extent_io_tree_cleanup(&excluded_extents);
8619         goto again;
8620 }
8621
8622 /*
8623  * Check backrefs of a tree block given by @bytenr or @eb.
8624  *
8625  * @root:       the root containing the @bytenr or @eb
8626  * @eb:         tree block extent buffer, can be NULL
8627  * @bytenr:     bytenr of the tree block to search
8628  * @level:      tree level of the tree block
8629  * @owner:      owner of the tree block
8630  *
8631  * Return >0 for any error found and output error message
8632  * Return 0 for no error found
8633  */
8634 static int check_tree_block_ref(struct btrfs_root *root,
8635                                 struct extent_buffer *eb, u64 bytenr,
8636                                 int level, u64 owner)
8637 {
8638         struct btrfs_key key;
8639         struct btrfs_root *extent_root = root->fs_info->extent_root;
8640         struct btrfs_path path;
8641         struct btrfs_extent_item *ei;
8642         struct btrfs_extent_inline_ref *iref;
8643         struct extent_buffer *leaf;
8644         unsigned long end;
8645         unsigned long ptr;
8646         int slot;
8647         int skinny_level;
8648         int type;
8649         u32 nodesize = root->nodesize;
8650         u32 item_size;
8651         u64 offset;
8652         int found_ref = 0;
8653         int err = 0;
8654         int ret;
8655
8656         btrfs_init_path(&path);
8657         key.objectid = bytenr;
8658         if (btrfs_fs_incompat(root->fs_info,
8659                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8660                 key.type = BTRFS_METADATA_ITEM_KEY;
8661         else
8662                 key.type = BTRFS_EXTENT_ITEM_KEY;
8663         key.offset = (u64)-1;
8664
8665         /* Search for the backref in extent tree */
8666         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8667         if (ret < 0) {
8668                 err |= BACKREF_MISSING;
8669                 goto out;
8670         }
8671         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8672         if (ret) {
8673                 err |= BACKREF_MISSING;
8674                 goto out;
8675         }
8676
8677         leaf = path.nodes[0];
8678         slot = path.slots[0];
8679         btrfs_item_key_to_cpu(leaf, &key, slot);
8680
8681         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8682
8683         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8684                 skinny_level = (int)key.offset;
8685                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8686         } else {
8687                 struct btrfs_tree_block_info *info;
8688
8689                 info = (struct btrfs_tree_block_info *)(ei + 1);
8690                 skinny_level = btrfs_tree_block_level(leaf, info);
8691                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8692         }
8693
8694         if (eb) {
8695                 u64 header_gen;
8696                 u64 extent_gen;
8697
8698                 if (!(btrfs_extent_flags(leaf, ei) &
8699                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8700                         error(
8701                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8702                                 key.objectid, nodesize,
8703                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8704                         err = BACKREF_MISMATCH;
8705                 }
8706                 header_gen = btrfs_header_generation(eb);
8707                 extent_gen = btrfs_extent_generation(leaf, ei);
8708                 if (header_gen != extent_gen) {
8709                         error(
8710         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8711                                 key.objectid, nodesize, header_gen,
8712                                 extent_gen);
8713                         err = BACKREF_MISMATCH;
8714                 }
8715                 if (level != skinny_level) {
8716                         error(
8717                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8718                                 key.objectid, nodesize, level, skinny_level);
8719                         err = BACKREF_MISMATCH;
8720                 }
8721                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8722                         error(
8723                         "extent[%llu %u] is referred by other roots than %llu",
8724                                 key.objectid, nodesize, root->objectid);
8725                         err = BACKREF_MISMATCH;
8726                 }
8727         }
8728
8729         /*
8730          * Iterate the extent/metadata item to find the exact backref
8731          */
8732         item_size = btrfs_item_size_nr(leaf, slot);
8733         ptr = (unsigned long)iref;
8734         end = (unsigned long)ei + item_size;
8735         while (ptr < end) {
8736                 iref = (struct btrfs_extent_inline_ref *)ptr;
8737                 type = btrfs_extent_inline_ref_type(leaf, iref);
8738                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8739
8740                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8741                         (offset == root->objectid || offset == owner)) {
8742                         found_ref = 1;
8743                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8744                         /* Check if the backref points to valid referencer */
8745                         found_ref = !check_tree_block_ref(root, NULL, offset,
8746                                                           level + 1, owner);
8747                 }
8748
8749                 if (found_ref)
8750                         break;
8751                 ptr += btrfs_extent_inline_ref_size(type);
8752         }
8753
8754         /*
8755          * Inlined extent item doesn't have what we need, check
8756          * TREE_BLOCK_REF_KEY
8757          */
8758         if (!found_ref) {
8759                 btrfs_release_path(&path);
8760                 key.objectid = bytenr;
8761                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8762                 key.offset = root->objectid;
8763
8764                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8765                 if (!ret)
8766                         found_ref = 1;
8767         }
8768         if (!found_ref)
8769                 err |= BACKREF_MISSING;
8770 out:
8771         btrfs_release_path(&path);
8772         if (eb && (err & BACKREF_MISSING))
8773                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8774                         bytenr, nodesize, owner, level);
8775         return err;
8776 }
8777
8778 /*
8779  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8780  *
8781  * Return >0 any error found and output error message
8782  * Return 0 for no error found
8783  */
8784 static int check_extent_data_item(struct btrfs_root *root,
8785                                   struct extent_buffer *eb, int slot)
8786 {
8787         struct btrfs_file_extent_item *fi;
8788         struct btrfs_path path;
8789         struct btrfs_root *extent_root = root->fs_info->extent_root;
8790         struct btrfs_key fi_key;
8791         struct btrfs_key dbref_key;
8792         struct extent_buffer *leaf;
8793         struct btrfs_extent_item *ei;
8794         struct btrfs_extent_inline_ref *iref;
8795         struct btrfs_extent_data_ref *dref;
8796         u64 owner;
8797         u64 file_extent_gen;
8798         u64 disk_bytenr;
8799         u64 disk_num_bytes;
8800         u64 extent_num_bytes;
8801         u64 extent_flags;
8802         u64 extent_gen;
8803         u32 item_size;
8804         unsigned long end;
8805         unsigned long ptr;
8806         int type;
8807         u64 ref_root;
8808         int found_dbackref = 0;
8809         int err = 0;
8810         int ret;
8811
8812         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8813         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8814         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8815
8816         /* Nothing to check for hole and inline data extents */
8817         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8818             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8819                 return 0;
8820
8821         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8822         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8823         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8824
8825         /* Check unaligned disk_num_bytes and num_bytes */
8826         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8827                 error(
8828 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8829                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8830                         root->sectorsize);
8831                 err |= BYTES_UNALIGNED;
8832         } else {
8833                 data_bytes_allocated += disk_num_bytes;
8834         }
8835         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8836                 error(
8837 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8838                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8839                         root->sectorsize);
8840                 err |= BYTES_UNALIGNED;
8841         } else {
8842                 data_bytes_referenced += extent_num_bytes;
8843         }
8844         owner = btrfs_header_owner(eb);
8845
8846         /* Check the extent item of the file extent in extent tree */
8847         btrfs_init_path(&path);
8848         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8849         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8850         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8851
8852         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8853         if (ret) {
8854                 err |= BACKREF_MISSING;
8855                 goto error;
8856         }
8857
8858         leaf = path.nodes[0];
8859         slot = path.slots[0];
8860         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8861
8862         extent_flags = btrfs_extent_flags(leaf, ei);
8863         extent_gen = btrfs_extent_generation(leaf, ei);
8864
8865         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8866                 error(
8867                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8868                     disk_bytenr, disk_num_bytes,
8869                     BTRFS_EXTENT_FLAG_DATA);
8870                 err |= BACKREF_MISMATCH;
8871         }
8872
8873         if (file_extent_gen < extent_gen) {
8874                 error(
8875 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8876                         disk_bytenr, disk_num_bytes, file_extent_gen,
8877                         extent_gen);
8878                 err |= BACKREF_MISMATCH;
8879         }
8880
8881         /* Check data backref inside that extent item */
8882         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8883         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8884         ptr = (unsigned long)iref;
8885         end = (unsigned long)ei + item_size;
8886         while (ptr < end) {
8887                 iref = (struct btrfs_extent_inline_ref *)ptr;
8888                 type = btrfs_extent_inline_ref_type(leaf, iref);
8889                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8890
8891                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8892                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8893                         if (ref_root == owner || ref_root == root->objectid)
8894                                 found_dbackref = 1;
8895                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8896                         found_dbackref = !check_tree_block_ref(root, NULL,
8897                                 btrfs_extent_inline_ref_offset(leaf, iref),
8898                                 0, owner);
8899                 }
8900
8901                 if (found_dbackref)
8902                         break;
8903                 ptr += btrfs_extent_inline_ref_size(type);
8904         }
8905
8906         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8907         if (!found_dbackref) {
8908                 btrfs_release_path(&path);
8909
8910                 btrfs_init_path(&path);
8911                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8912                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8913                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8914                                 fi_key.objectid, fi_key.offset);
8915
8916                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8917                                         &dbref_key, &path, 0, 0);
8918                 if (!ret)
8919                         found_dbackref = 1;
8920         }
8921
8922         if (!found_dbackref)
8923                 err |= BACKREF_MISSING;
8924 error:
8925         btrfs_release_path(&path);
8926         if (err & BACKREF_MISSING) {
8927                 error("data extent[%llu %llu] backref lost",
8928                       disk_bytenr, disk_num_bytes);
8929         }
8930         return err;
8931 }
8932
8933 /*
8934  * Get real tree block level for the case like shared block
8935  * Return >= 0 as tree level
8936  * Return <0 for error
8937  */
8938 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8939 {
8940         struct extent_buffer *eb;
8941         struct btrfs_path path;
8942         struct btrfs_key key;
8943         struct btrfs_extent_item *ei;
8944         u64 flags;
8945         u64 transid;
8946         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8947         u8 backref_level;
8948         u8 header_level;
8949         int ret;
8950
8951         /* Search extent tree for extent generation and level */
8952         key.objectid = bytenr;
8953         key.type = BTRFS_METADATA_ITEM_KEY;
8954         key.offset = (u64)-1;
8955
8956         btrfs_init_path(&path);
8957         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8958         if (ret < 0)
8959                 goto release_out;
8960         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8961         if (ret < 0)
8962                 goto release_out;
8963         if (ret > 0) {
8964                 ret = -ENOENT;
8965                 goto release_out;
8966         }
8967
8968         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8969         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8970                             struct btrfs_extent_item);
8971         flags = btrfs_extent_flags(path.nodes[0], ei);
8972         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8973                 ret = -ENOENT;
8974                 goto release_out;
8975         }
8976
8977         /* Get transid for later read_tree_block() check */
8978         transid = btrfs_extent_generation(path.nodes[0], ei);
8979
8980         /* Get backref level as one source */
8981         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8982                 backref_level = key.offset;
8983         } else {
8984                 struct btrfs_tree_block_info *info;
8985
8986                 info = (struct btrfs_tree_block_info *)(ei + 1);
8987                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8988         }
8989         btrfs_release_path(&path);
8990
8991         /* Get level from tree block as an alternative source */
8992         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8993         if (!extent_buffer_uptodate(eb)) {
8994                 free_extent_buffer(eb);
8995                 return -EIO;
8996         }
8997         header_level = btrfs_header_level(eb);
8998         free_extent_buffer(eb);
8999
9000         if (header_level != backref_level)
9001                 return -EIO;
9002         return header_level;
9003
9004 release_out:
9005         btrfs_release_path(&path);
9006         return ret;
9007 }
9008
9009 /*
9010  * Check if a tree block backref is valid (points to a valid tree block)
9011  * if level == -1, level will be resolved
9012  * Return >0 for any error found and print error message
9013  */
9014 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9015                                     u64 bytenr, int level)
9016 {
9017         struct btrfs_root *root;
9018         struct btrfs_key key;
9019         struct btrfs_path path;
9020         struct extent_buffer *eb;
9021         struct extent_buffer *node;
9022         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9023         int err = 0;
9024         int ret;
9025
9026         /* Query level for level == -1 special case */
9027         if (level == -1)
9028                 level = query_tree_block_level(fs_info, bytenr);
9029         if (level < 0) {
9030                 err |= REFERENCER_MISSING;
9031                 goto out;
9032         }
9033
9034         key.objectid = root_id;
9035         key.type = BTRFS_ROOT_ITEM_KEY;
9036         key.offset = (u64)-1;
9037
9038         root = btrfs_read_fs_root(fs_info, &key);
9039         if (IS_ERR(root)) {
9040                 err |= REFERENCER_MISSING;
9041                 goto out;
9042         }
9043
9044         /* Read out the tree block to get item/node key */
9045         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9046         if (!extent_buffer_uptodate(eb)) {
9047                 err |= REFERENCER_MISSING;
9048                 free_extent_buffer(eb);
9049                 goto out;
9050         }
9051
9052         /* Empty tree, no need to check key */
9053         if (!btrfs_header_nritems(eb) && !level) {
9054                 free_extent_buffer(eb);
9055                 goto out;
9056         }
9057
9058         if (level)
9059                 btrfs_node_key_to_cpu(eb, &key, 0);
9060         else
9061                 btrfs_item_key_to_cpu(eb, &key, 0);
9062
9063         free_extent_buffer(eb);
9064
9065         btrfs_init_path(&path);
9066         path.lowest_level = level;
9067         /* Search with the first key, to ensure we can reach it */
9068         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9069         if (ret < 0) {
9070                 err |= REFERENCER_MISSING;
9071                 goto release_out;
9072         }
9073
9074         node = path.nodes[level];
9075         if (btrfs_header_bytenr(node) != bytenr) {
9076                 error(
9077         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9078                         bytenr, nodesize, bytenr,
9079                         btrfs_header_bytenr(node));
9080                 err |= REFERENCER_MISMATCH;
9081         }
9082         if (btrfs_header_level(node) != level) {
9083                 error(
9084         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9085                         bytenr, nodesize, level,
9086                         btrfs_header_level(node));
9087                 err |= REFERENCER_MISMATCH;
9088         }
9089
9090 release_out:
9091         btrfs_release_path(&path);
9092 out:
9093         if (err & REFERENCER_MISSING) {
9094                 if (level < 0)
9095                         error("extent [%llu %d] lost referencer (owner: %llu)",
9096                                 bytenr, nodesize, root_id);
9097                 else
9098                         error(
9099                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9100                                 bytenr, nodesize, root_id, level);
9101         }
9102
9103         return err;
9104 }
9105
9106 /*
9107  * Check referencer for shared block backref
9108  * If level == -1, this function will resolve the level.
9109  */
9110 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9111                                      u64 parent, u64 bytenr, int level)
9112 {
9113         struct extent_buffer *eb;
9114         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9115         u32 nr;
9116         int found_parent = 0;
9117         int i;
9118
9119         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9120         if (!extent_buffer_uptodate(eb))
9121                 goto out;
9122
9123         if (level == -1)
9124                 level = query_tree_block_level(fs_info, bytenr);
9125         if (level < 0)
9126                 goto out;
9127
9128         if (level + 1 != btrfs_header_level(eb))
9129                 goto out;
9130
9131         nr = btrfs_header_nritems(eb);
9132         for (i = 0; i < nr; i++) {
9133                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9134                         found_parent = 1;
9135                         break;
9136                 }
9137         }
9138 out:
9139         free_extent_buffer(eb);
9140         if (!found_parent) {
9141                 error(
9142         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9143                         bytenr, nodesize, parent, level);
9144                 return REFERENCER_MISSING;
9145         }
9146         return 0;
9147 }
9148
9149 /*
9150  * Check referencer for normal (inlined) data ref
9151  * If len == 0, it will be resolved by searching in extent tree
9152  */
9153 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9154                                      u64 root_id, u64 objectid, u64 offset,
9155                                      u64 bytenr, u64 len, u32 count)
9156 {
9157         struct btrfs_root *root;
9158         struct btrfs_root *extent_root = fs_info->extent_root;
9159         struct btrfs_key key;
9160         struct btrfs_path path;
9161         struct extent_buffer *leaf;
9162         struct btrfs_file_extent_item *fi;
9163         u32 found_count = 0;
9164         int slot;
9165         int ret = 0;
9166
9167         if (!len) {
9168                 key.objectid = bytenr;
9169                 key.type = BTRFS_EXTENT_ITEM_KEY;
9170                 key.offset = (u64)-1;
9171
9172                 btrfs_init_path(&path);
9173                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9174                 if (ret < 0)
9175                         goto out;
9176                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9177                 if (ret)
9178                         goto out;
9179                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9180                 if (key.objectid != bytenr ||
9181                     key.type != BTRFS_EXTENT_ITEM_KEY)
9182                         goto out;
9183                 len = key.offset;
9184                 btrfs_release_path(&path);
9185         }
9186         key.objectid = root_id;
9187         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9188         key.offset = (u64)-1;
9189         btrfs_init_path(&path);
9190
9191         root = btrfs_read_fs_root(fs_info, &key);
9192         if (IS_ERR(root))
9193                 goto out;
9194
9195         key.objectid = objectid;
9196         key.type = BTRFS_EXTENT_DATA_KEY;
9197         /*
9198          * It can be nasty as data backref offset is
9199          * file offset - file extent offset, which is smaller or
9200          * equal to original backref offset.  The only special case is
9201          * overflow.  So we need to special check and do further search.
9202          */
9203         key.offset = offset & (1ULL << 63) ? 0 : offset;
9204
9205         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9206         if (ret < 0)
9207                 goto out;
9208
9209         /*
9210          * Search afterwards to get correct one
9211          * NOTE: As we must do a comprehensive check on the data backref to
9212          * make sure the dref count also matches, we must iterate all file
9213          * extents for that inode.
9214          */
9215         while (1) {
9216                 leaf = path.nodes[0];
9217                 slot = path.slots[0];
9218
9219                 btrfs_item_key_to_cpu(leaf, &key, slot);
9220                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9221                         break;
9222                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9223                 /*
9224                  * Except normal disk bytenr and disk num bytes, we still
9225                  * need to do extra check on dbackref offset as
9226                  * dbackref offset = file_offset - file_extent_offset
9227                  */
9228                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9229                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9230                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9231                     offset)
9232                         found_count++;
9233
9234                 ret = btrfs_next_item(root, &path);
9235                 if (ret)
9236                         break;
9237         }
9238 out:
9239         btrfs_release_path(&path);
9240         if (found_count != count) {
9241                 error(
9242 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9243                         bytenr, len, root_id, objectid, offset, count, found_count);
9244                 return REFERENCER_MISSING;
9245         }
9246         return 0;
9247 }
9248
9249 /*
9250  * Check if the referencer of a shared data backref exists
9251  */
9252 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9253                                      u64 parent, u64 bytenr)
9254 {
9255         struct extent_buffer *eb;
9256         struct btrfs_key key;
9257         struct btrfs_file_extent_item *fi;
9258         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9259         u32 nr;
9260         int found_parent = 0;
9261         int i;
9262
9263         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9264         if (!extent_buffer_uptodate(eb))
9265                 goto out;
9266
9267         nr = btrfs_header_nritems(eb);
9268         for (i = 0; i < nr; i++) {
9269                 btrfs_item_key_to_cpu(eb, &key, i);
9270                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9271                         continue;
9272
9273                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9274                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9275                         continue;
9276
9277                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9278                         found_parent = 1;
9279                         break;
9280                 }
9281         }
9282
9283 out:
9284         free_extent_buffer(eb);
9285         if (!found_parent) {
9286                 error("shared extent %llu referencer lost (parent: %llu)",
9287                         bytenr, parent);
9288                 return REFERENCER_MISSING;
9289         }
9290         return 0;
9291 }
9292
9293 /*
9294  * This function will check a given extent item, including its backref and
9295  * itself (like crossing stripe boundary and type)
9296  *
9297  * Since we don't use extent_record anymore, introduce new error bit
9298  */
9299 static int check_extent_item(struct btrfs_fs_info *fs_info,
9300                              struct extent_buffer *eb, int slot)
9301 {
9302         struct btrfs_extent_item *ei;
9303         struct btrfs_extent_inline_ref *iref;
9304         struct btrfs_extent_data_ref *dref;
9305         unsigned long end;
9306         unsigned long ptr;
9307         int type;
9308         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9309         u32 item_size = btrfs_item_size_nr(eb, slot);
9310         u64 flags;
9311         u64 offset;
9312         int metadata = 0;
9313         int level;
9314         struct btrfs_key key;
9315         int ret;
9316         int err = 0;
9317
9318         btrfs_item_key_to_cpu(eb, &key, slot);
9319         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9320                 bytes_used += key.offset;
9321         else
9322                 bytes_used += nodesize;
9323
9324         if (item_size < sizeof(*ei)) {
9325                 /*
9326                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9327                  * old thing when on disk format is still un-determined.
9328                  * No need to care about it anymore
9329                  */
9330                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9331                 return -ENOTTY;
9332         }
9333
9334         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9335         flags = btrfs_extent_flags(eb, ei);
9336
9337         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9338                 metadata = 1;
9339         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9340                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9341                       key.objectid, key.objectid + nodesize);
9342                 err |= CROSSING_STRIPE_BOUNDARY;
9343         }
9344
9345         ptr = (unsigned long)(ei + 1);
9346
9347         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9348                 /* Old EXTENT_ITEM metadata */
9349                 struct btrfs_tree_block_info *info;
9350
9351                 info = (struct btrfs_tree_block_info *)ptr;
9352                 level = btrfs_tree_block_level(eb, info);
9353                 ptr += sizeof(struct btrfs_tree_block_info);
9354         } else {
9355                 /* New METADATA_ITEM */
9356                 level = key.offset;
9357         }
9358         end = (unsigned long)ei + item_size;
9359
9360         if (ptr >= end) {
9361                 err |= ITEM_SIZE_MISMATCH;
9362                 goto out;
9363         }
9364
9365         /* Now check every backref in this extent item */
9366 next:
9367         iref = (struct btrfs_extent_inline_ref *)ptr;
9368         type = btrfs_extent_inline_ref_type(eb, iref);
9369         offset = btrfs_extent_inline_ref_offset(eb, iref);
9370         switch (type) {
9371         case BTRFS_TREE_BLOCK_REF_KEY:
9372                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9373                                                level);
9374                 err |= ret;
9375                 break;
9376         case BTRFS_SHARED_BLOCK_REF_KEY:
9377                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9378                                                  level);
9379                 err |= ret;
9380                 break;
9381         case BTRFS_EXTENT_DATA_REF_KEY:
9382                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9383                 ret = check_extent_data_backref(fs_info,
9384                                 btrfs_extent_data_ref_root(eb, dref),
9385                                 btrfs_extent_data_ref_objectid(eb, dref),
9386                                 btrfs_extent_data_ref_offset(eb, dref),
9387                                 key.objectid, key.offset,
9388                                 btrfs_extent_data_ref_count(eb, dref));
9389                 err |= ret;
9390                 break;
9391         case BTRFS_SHARED_DATA_REF_KEY:
9392                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9393                 err |= ret;
9394                 break;
9395         default:
9396                 error("extent[%llu %d %llu] has unknown ref type: %d",
9397                         key.objectid, key.type, key.offset, type);
9398                 err |= UNKNOWN_TYPE;
9399                 goto out;
9400         }
9401
9402         ptr += btrfs_extent_inline_ref_size(type);
9403         if (ptr < end)
9404                 goto next;
9405
9406 out:
9407         return err;
9408 }
9409
9410 /*
9411  * Check if a dev extent item is referred correctly by its chunk
9412  */
9413 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9414                                  struct extent_buffer *eb, int slot)
9415 {
9416         struct btrfs_root *chunk_root = fs_info->chunk_root;
9417         struct btrfs_dev_extent *ptr;
9418         struct btrfs_path path;
9419         struct btrfs_key chunk_key;
9420         struct btrfs_key devext_key;
9421         struct btrfs_chunk *chunk;
9422         struct extent_buffer *l;
9423         int num_stripes;
9424         u64 length;
9425         int i;
9426         int found_chunk = 0;
9427         int ret;
9428
9429         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9430         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9431         length = btrfs_dev_extent_length(eb, ptr);
9432
9433         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9434         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9435         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9436
9437         btrfs_init_path(&path);
9438         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9439         if (ret)
9440                 goto out;
9441
9442         l = path.nodes[0];
9443         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9444         if (btrfs_chunk_length(l, chunk) != length)
9445                 goto out;
9446
9447         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9448         for (i = 0; i < num_stripes; i++) {
9449                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9450                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9451
9452                 if (devid == devext_key.objectid &&
9453                     offset == devext_key.offset) {
9454                         found_chunk = 1;
9455                         break;
9456                 }
9457         }
9458 out:
9459         btrfs_release_path(&path);
9460         if (!found_chunk) {
9461                 error(
9462                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9463                         devext_key.objectid, devext_key.offset, length);
9464                 return REFERENCER_MISSING;
9465         }
9466         return 0;
9467 }
9468
9469 /*
9470  * Check if the used space is correct with the dev item
9471  */
9472 static int check_dev_item(struct btrfs_fs_info *fs_info,
9473                           struct extent_buffer *eb, int slot)
9474 {
9475         struct btrfs_root *dev_root = fs_info->dev_root;
9476         struct btrfs_dev_item *dev_item;
9477         struct btrfs_path path;
9478         struct btrfs_key key;
9479         struct btrfs_dev_extent *ptr;
9480         u64 dev_id;
9481         u64 used;
9482         u64 total = 0;
9483         int ret;
9484
9485         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9486         dev_id = btrfs_device_id(eb, dev_item);
9487         used = btrfs_device_bytes_used(eb, dev_item);
9488
9489         key.objectid = dev_id;
9490         key.type = BTRFS_DEV_EXTENT_KEY;
9491         key.offset = 0;
9492
9493         btrfs_init_path(&path);
9494         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9495         if (ret < 0) {
9496                 btrfs_item_key_to_cpu(eb, &key, slot);
9497                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9498                         key.objectid, key.type, key.offset);
9499                 btrfs_release_path(&path);
9500                 return REFERENCER_MISSING;
9501         }
9502
9503         /* Iterate dev_extents to calculate the used space of a device */
9504         while (1) {
9505                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9506
9507                 if (key.objectid > dev_id)
9508                         break;
9509                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9510                         goto next;
9511
9512                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9513                                      struct btrfs_dev_extent);
9514                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9515 next:
9516                 ret = btrfs_next_item(dev_root, &path);
9517                 if (ret)
9518                         break;
9519         }
9520         btrfs_release_path(&path);
9521
9522         if (used != total) {
9523                 btrfs_item_key_to_cpu(eb, &key, slot);
9524                 error(
9525 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9526                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9527                         BTRFS_DEV_EXTENT_KEY, dev_id);
9528                 return ACCOUNTING_MISMATCH;
9529         }
9530         return 0;
9531 }
9532
9533 /*
9534  * Check a block group item with its referener (chunk) and its used space
9535  * with extent/metadata item
9536  */
9537 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9538                                   struct extent_buffer *eb, int slot)
9539 {
9540         struct btrfs_root *extent_root = fs_info->extent_root;
9541         struct btrfs_root *chunk_root = fs_info->chunk_root;
9542         struct btrfs_block_group_item *bi;
9543         struct btrfs_block_group_item bg_item;
9544         struct btrfs_path path;
9545         struct btrfs_key bg_key;
9546         struct btrfs_key chunk_key;
9547         struct btrfs_key extent_key;
9548         struct btrfs_chunk *chunk;
9549         struct extent_buffer *leaf;
9550         struct btrfs_extent_item *ei;
9551         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9552         u64 flags;
9553         u64 bg_flags;
9554         u64 used;
9555         u64 total = 0;
9556         int ret;
9557         int err = 0;
9558
9559         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9560         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9561         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9562         used = btrfs_block_group_used(&bg_item);
9563         bg_flags = btrfs_block_group_flags(&bg_item);
9564
9565         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9566         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9567         chunk_key.offset = bg_key.objectid;
9568
9569         btrfs_init_path(&path);
9570         /* Search for the referencer chunk */
9571         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9572         if (ret) {
9573                 error(
9574                 "block group[%llu %llu] did not find the related chunk item",
9575                         bg_key.objectid, bg_key.offset);
9576                 err |= REFERENCER_MISSING;
9577         } else {
9578                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9579                                         struct btrfs_chunk);
9580                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9581                                                 bg_key.offset) {
9582                         error(
9583         "block group[%llu %llu] related chunk item length does not match",
9584                                 bg_key.objectid, bg_key.offset);
9585                         err |= REFERENCER_MISMATCH;
9586                 }
9587         }
9588         btrfs_release_path(&path);
9589
9590         /* Search from the block group bytenr */
9591         extent_key.objectid = bg_key.objectid;
9592         extent_key.type = 0;
9593         extent_key.offset = 0;
9594
9595         btrfs_init_path(&path);
9596         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9597         if (ret < 0)
9598                 goto out;
9599
9600         /* Iterate extent tree to account used space */
9601         while (1) {
9602                 leaf = path.nodes[0];
9603                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9604                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9605                         break;
9606
9607                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9608                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9609                         goto next;
9610                 if (extent_key.objectid < bg_key.objectid)
9611                         goto next;
9612
9613                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9614                         total += nodesize;
9615                 else
9616                         total += extent_key.offset;
9617
9618                 ei = btrfs_item_ptr(leaf, path.slots[0],
9619                                     struct btrfs_extent_item);
9620                 flags = btrfs_extent_flags(leaf, ei);
9621                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9622                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9623                                 error(
9624                         "bad extent[%llu, %llu) type mismatch with chunk",
9625                                         extent_key.objectid,
9626                                         extent_key.objectid + extent_key.offset);
9627                                 err |= CHUNK_TYPE_MISMATCH;
9628                         }
9629                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9630                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9631                                     BTRFS_BLOCK_GROUP_METADATA))) {
9632                                 error(
9633                         "bad extent[%llu, %llu) type mismatch with chunk",
9634                                         extent_key.objectid,
9635                                         extent_key.objectid + nodesize);
9636                                 err |= CHUNK_TYPE_MISMATCH;
9637                         }
9638                 }
9639 next:
9640                 ret = btrfs_next_item(extent_root, &path);
9641                 if (ret)
9642                         break;
9643         }
9644
9645 out:
9646         btrfs_release_path(&path);
9647
9648         if (total != used) {
9649                 error(
9650                 "block group[%llu %llu] used %llu but extent items used %llu",
9651                         bg_key.objectid, bg_key.offset, used, total);
9652                 err |= ACCOUNTING_MISMATCH;
9653         }
9654         return err;
9655 }
9656
9657 /*
9658  * Check a chunk item.
9659  * Including checking all referred dev_extents and block group
9660  */
9661 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9662                             struct extent_buffer *eb, int slot)
9663 {
9664         struct btrfs_root *extent_root = fs_info->extent_root;
9665         struct btrfs_root *dev_root = fs_info->dev_root;
9666         struct btrfs_path path;
9667         struct btrfs_key chunk_key;
9668         struct btrfs_key bg_key;
9669         struct btrfs_key devext_key;
9670         struct btrfs_chunk *chunk;
9671         struct extent_buffer *leaf;
9672         struct btrfs_block_group_item *bi;
9673         struct btrfs_block_group_item bg_item;
9674         struct btrfs_dev_extent *ptr;
9675         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9676         u64 length;
9677         u64 chunk_end;
9678         u64 type;
9679         u64 profile;
9680         int num_stripes;
9681         u64 offset;
9682         u64 objectid;
9683         int i;
9684         int ret;
9685         int err = 0;
9686
9687         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9688         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9689         length = btrfs_chunk_length(eb, chunk);
9690         chunk_end = chunk_key.offset + length;
9691         if (!IS_ALIGNED(length, sectorsize)) {
9692                 error("chunk[%llu %llu) not aligned to %u",
9693                         chunk_key.offset, chunk_end, sectorsize);
9694                 err |= BYTES_UNALIGNED;
9695                 goto out;
9696         }
9697
9698         type = btrfs_chunk_type(eb, chunk);
9699         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9700         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9701                 error("chunk[%llu %llu) has no chunk type",
9702                         chunk_key.offset, chunk_end);
9703                 err |= UNKNOWN_TYPE;
9704         }
9705         if (profile && (profile & (profile - 1))) {
9706                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9707                         chunk_key.offset, chunk_end, profile);
9708                 err |= UNKNOWN_TYPE;
9709         }
9710
9711         bg_key.objectid = chunk_key.offset;
9712         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9713         bg_key.offset = length;
9714
9715         btrfs_init_path(&path);
9716         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9717         if (ret) {
9718                 error(
9719                 "chunk[%llu %llu) did not find the related block group item",
9720                         chunk_key.offset, chunk_end);
9721                 err |= REFERENCER_MISSING;
9722         } else{
9723                 leaf = path.nodes[0];
9724                 bi = btrfs_item_ptr(leaf, path.slots[0],
9725                                     struct btrfs_block_group_item);
9726                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9727                                    sizeof(bg_item));
9728                 if (btrfs_block_group_flags(&bg_item) != type) {
9729                         error(
9730 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9731                                 chunk_key.offset, chunk_end, type,
9732                                 btrfs_block_group_flags(&bg_item));
9733                         err |= REFERENCER_MISSING;
9734                 }
9735         }
9736
9737         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9738         for (i = 0; i < num_stripes; i++) {
9739                 btrfs_release_path(&path);
9740                 btrfs_init_path(&path);
9741                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9742                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9743                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9744
9745                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9746                                         0, 0);
9747                 if (ret)
9748                         goto not_match_dev;
9749
9750                 leaf = path.nodes[0];
9751                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9752                                      struct btrfs_dev_extent);
9753                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9754                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9755                 if (objectid != chunk_key.objectid ||
9756                     offset != chunk_key.offset ||
9757                     btrfs_dev_extent_length(leaf, ptr) != length)
9758                         goto not_match_dev;
9759                 continue;
9760 not_match_dev:
9761                 err |= BACKREF_MISSING;
9762                 error(
9763                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9764                         chunk_key.objectid, chunk_end, i);
9765                 continue;
9766         }
9767         btrfs_release_path(&path);
9768 out:
9769         return err;
9770 }
9771
9772 /*
9773  * Main entry function to check known items and update related accounting info
9774  */
9775 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9776 {
9777         struct btrfs_fs_info *fs_info = root->fs_info;
9778         struct btrfs_key key;
9779         int slot = 0;
9780         int type;
9781         struct btrfs_extent_data_ref *dref;
9782         int ret;
9783         int err = 0;
9784
9785 next:
9786         btrfs_item_key_to_cpu(eb, &key, slot);
9787         type = btrfs_key_type(&key);
9788
9789         switch (type) {
9790         case BTRFS_EXTENT_DATA_KEY:
9791                 ret = check_extent_data_item(root, eb, slot);
9792                 err |= ret;
9793                 break;
9794         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9795                 ret = check_block_group_item(fs_info, eb, slot);
9796                 err |= ret;
9797                 break;
9798         case BTRFS_DEV_ITEM_KEY:
9799                 ret = check_dev_item(fs_info, eb, slot);
9800                 err |= ret;
9801                 break;
9802         case BTRFS_CHUNK_ITEM_KEY:
9803                 ret = check_chunk_item(fs_info, eb, slot);
9804                 err |= ret;
9805                 break;
9806         case BTRFS_DEV_EXTENT_KEY:
9807                 ret = check_dev_extent_item(fs_info, eb, slot);
9808                 err |= ret;
9809                 break;
9810         case BTRFS_EXTENT_ITEM_KEY:
9811         case BTRFS_METADATA_ITEM_KEY:
9812                 ret = check_extent_item(fs_info, eb, slot);
9813                 err |= ret;
9814                 break;
9815         case BTRFS_EXTENT_CSUM_KEY:
9816                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9817                 break;
9818         case BTRFS_TREE_BLOCK_REF_KEY:
9819                 ret = check_tree_block_backref(fs_info, key.offset,
9820                                                key.objectid, -1);
9821                 err |= ret;
9822                 break;
9823         case BTRFS_EXTENT_DATA_REF_KEY:
9824                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9825                 ret = check_extent_data_backref(fs_info,
9826                                 btrfs_extent_data_ref_root(eb, dref),
9827                                 btrfs_extent_data_ref_objectid(eb, dref),
9828                                 btrfs_extent_data_ref_offset(eb, dref),
9829                                 key.objectid, 0,
9830                                 btrfs_extent_data_ref_count(eb, dref));
9831                 err |= ret;
9832                 break;
9833         case BTRFS_SHARED_BLOCK_REF_KEY:
9834                 ret = check_shared_block_backref(fs_info, key.offset,
9835                                                  key.objectid, -1);
9836                 err |= ret;
9837                 break;
9838         case BTRFS_SHARED_DATA_REF_KEY:
9839                 ret = check_shared_data_backref(fs_info, key.offset,
9840                                                 key.objectid);
9841                 err |= ret;
9842                 break;
9843         default:
9844                 break;
9845         }
9846
9847         if (++slot < btrfs_header_nritems(eb))
9848                 goto next;
9849
9850         return err;
9851 }
9852
9853 /*
9854  * Helper function for later fs/subvol tree check.  To determine if a tree
9855  * block should be checked.
9856  * This function will ensure only the direct referencer with lowest rootid to
9857  * check a fs/subvolume tree block.
9858  *
9859  * Backref check at extent tree would detect errors like missing subvolume
9860  * tree, so we can do aggressive check to reduce duplicated checks.
9861  */
9862 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9863 {
9864         struct btrfs_root *extent_root = root->fs_info->extent_root;
9865         struct btrfs_key key;
9866         struct btrfs_path path;
9867         struct extent_buffer *leaf;
9868         int slot;
9869         struct btrfs_extent_item *ei;
9870         unsigned long ptr;
9871         unsigned long end;
9872         int type;
9873         u32 item_size;
9874         u64 offset;
9875         struct btrfs_extent_inline_ref *iref;
9876         int ret;
9877
9878         btrfs_init_path(&path);
9879         key.objectid = btrfs_header_bytenr(eb);
9880         key.type = BTRFS_METADATA_ITEM_KEY;
9881         key.offset = (u64)-1;
9882
9883         /*
9884          * Any failure in backref resolving means we can't determine
9885          * whom the tree block belongs to.
9886          * So in that case, we need to check that tree block
9887          */
9888         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9889         if (ret < 0)
9890                 goto need_check;
9891
9892         ret = btrfs_previous_extent_item(extent_root, &path,
9893                                          btrfs_header_bytenr(eb));
9894         if (ret)
9895                 goto need_check;
9896
9897         leaf = path.nodes[0];
9898         slot = path.slots[0];
9899         btrfs_item_key_to_cpu(leaf, &key, slot);
9900         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9901
9902         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9903                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9904         } else {
9905                 struct btrfs_tree_block_info *info;
9906
9907                 info = (struct btrfs_tree_block_info *)(ei + 1);
9908                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9909         }
9910
9911         item_size = btrfs_item_size_nr(leaf, slot);
9912         ptr = (unsigned long)iref;
9913         end = (unsigned long)ei + item_size;
9914         while (ptr < end) {
9915                 iref = (struct btrfs_extent_inline_ref *)ptr;
9916                 type = btrfs_extent_inline_ref_type(leaf, iref);
9917                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9918
9919                 /*
9920                  * We only check the tree block if current root is
9921                  * the lowest referencer of it.
9922                  */
9923                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9924                     offset < root->objectid) {
9925                         btrfs_release_path(&path);
9926                         return 0;
9927                 }
9928
9929                 ptr += btrfs_extent_inline_ref_size(type);
9930         }
9931         /*
9932          * Normally we should also check keyed tree block ref, but that may be
9933          * very time consuming.  Inlined ref should already make us skip a lot
9934          * of refs now.  So skip search keyed tree block ref.
9935          */
9936
9937 need_check:
9938         btrfs_release_path(&path);
9939         return 1;
9940 }
9941
9942 /*
9943  * Traversal function for tree block. We will do:
9944  * 1) Skip shared fs/subvolume tree blocks
9945  * 2) Update related bytes accounting
9946  * 3) Pre-order traversal
9947  */
9948 static int traverse_tree_block(struct btrfs_root *root,
9949                                 struct extent_buffer *node)
9950 {
9951         struct extent_buffer *eb;
9952         struct btrfs_key key;
9953         struct btrfs_key drop_key;
9954         int level;
9955         u64 nr;
9956         int i;
9957         int err = 0;
9958         int ret;
9959
9960         /*
9961          * Skip shared fs/subvolume tree block, in that case they will
9962          * be checked by referencer with lowest rootid
9963          */
9964         if (is_fstree(root->objectid) && !should_check(root, node))
9965                 return 0;
9966
9967         /* Update bytes accounting */
9968         total_btree_bytes += node->len;
9969         if (fs_root_objectid(btrfs_header_owner(node)))
9970                 total_fs_tree_bytes += node->len;
9971         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9972                 total_extent_tree_bytes += node->len;
9973         if (!found_old_backref &&
9974             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9975             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9976             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9977                 found_old_backref = 1;
9978
9979         /* pre-order tranversal, check itself first */
9980         level = btrfs_header_level(node);
9981         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9982                                    btrfs_header_level(node),
9983                                    btrfs_header_owner(node));
9984         err |= ret;
9985         if (err)
9986                 error(
9987         "check %s failed root %llu bytenr %llu level %d, force continue check",
9988                         level ? "node":"leaf", root->objectid,
9989                         btrfs_header_bytenr(node), btrfs_header_level(node));
9990
9991         if (!level) {
9992                 btree_space_waste += btrfs_leaf_free_space(root, node);
9993                 ret = check_leaf_items(root, node);
9994                 err |= ret;
9995                 return err;
9996         }
9997
9998         nr = btrfs_header_nritems(node);
9999         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10000         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10001                 sizeof(struct btrfs_key_ptr);
10002
10003         /* Then check all its children */
10004         for (i = 0; i < nr; i++) {
10005                 u64 blocknr = btrfs_node_blockptr(node, i);
10006
10007                 btrfs_node_key_to_cpu(node, &key, i);
10008                 if (level == root->root_item.drop_level &&
10009                     is_dropped_key(&key, &drop_key))
10010                         continue;
10011
10012                 /*
10013                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10014                  * to call the function itself.
10015                  */
10016                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10017                 if (extent_buffer_uptodate(eb)) {
10018                         ret = traverse_tree_block(root, eb);
10019                         err |= ret;
10020                 }
10021                 free_extent_buffer(eb);
10022         }
10023
10024         return err;
10025 }
10026
10027 /*
10028  * Low memory usage version check_chunks_and_extents.
10029  */
10030 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10031 {
10032         struct btrfs_path path;
10033         struct btrfs_key key;
10034         struct btrfs_root *root1;
10035         struct btrfs_root *cur_root;
10036         int err = 0;
10037         int ret;
10038
10039         root1 = root->fs_info->chunk_root;
10040         ret = traverse_tree_block(root1, root1->node);
10041         err |= ret;
10042
10043         root1 = root->fs_info->tree_root;
10044         ret = traverse_tree_block(root1, root1->node);
10045         err |= ret;
10046
10047         btrfs_init_path(&path);
10048         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10049         key.offset = 0;
10050         key.type = BTRFS_ROOT_ITEM_KEY;
10051
10052         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10053         if (ret) {
10054                 error("cannot find extent treet in tree_root");
10055                 goto out;
10056         }
10057
10058         while (1) {
10059                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10060                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10061                         goto next;
10062                 key.offset = (u64)-1;
10063
10064                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10065                 if (IS_ERR(cur_root) || !cur_root) {
10066                         error("failed to read tree: %lld", key.objectid);
10067                         goto next;
10068                 }
10069
10070                 ret = traverse_tree_block(cur_root, cur_root->node);
10071                 err |= ret;
10072
10073 next:
10074                 ret = btrfs_next_item(root1, &path);
10075                 if (ret)
10076                         goto out;
10077         }
10078
10079 out:
10080         btrfs_release_path(&path);
10081         return err;
10082 }
10083
10084 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10085                            struct btrfs_root *root, int overwrite)
10086 {
10087         struct extent_buffer *c;
10088         struct extent_buffer *old = root->node;
10089         int level;
10090         int ret;
10091         struct btrfs_disk_key disk_key = {0,0,0};
10092
10093         level = 0;
10094
10095         if (overwrite) {
10096                 c = old;
10097                 extent_buffer_get(c);
10098                 goto init;
10099         }
10100         c = btrfs_alloc_free_block(trans, root,
10101                                    root->nodesize,
10102                                    root->root_key.objectid,
10103                                    &disk_key, level, 0, 0);
10104         if (IS_ERR(c)) {
10105                 c = old;
10106                 extent_buffer_get(c);
10107                 overwrite = 1;
10108         }
10109 init:
10110         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10111         btrfs_set_header_level(c, level);
10112         btrfs_set_header_bytenr(c, c->start);
10113         btrfs_set_header_generation(c, trans->transid);
10114         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10115         btrfs_set_header_owner(c, root->root_key.objectid);
10116
10117         write_extent_buffer(c, root->fs_info->fsid,
10118                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10119
10120         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10121                             btrfs_header_chunk_tree_uuid(c),
10122                             BTRFS_UUID_SIZE);
10123
10124         btrfs_mark_buffer_dirty(c);
10125         /*
10126          * this case can happen in the following case:
10127          *
10128          * 1.overwrite previous root.
10129          *
10130          * 2.reinit reloc data root, this is because we skip pin
10131          * down reloc data tree before which means we can allocate
10132          * same block bytenr here.
10133          */
10134         if (old->start == c->start) {
10135                 btrfs_set_root_generation(&root->root_item,
10136                                           trans->transid);
10137                 root->root_item.level = btrfs_header_level(root->node);
10138                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10139                                         &root->root_key, &root->root_item);
10140                 if (ret) {
10141                         free_extent_buffer(c);
10142                         return ret;
10143                 }
10144         }
10145         free_extent_buffer(old);
10146         root->node = c;
10147         add_root_to_dirty_list(root);
10148         return 0;
10149 }
10150
10151 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10152                                 struct extent_buffer *eb, int tree_root)
10153 {
10154         struct extent_buffer *tmp;
10155         struct btrfs_root_item *ri;
10156         struct btrfs_key key;
10157         u64 bytenr;
10158         u32 nodesize;
10159         int level = btrfs_header_level(eb);
10160         int nritems;
10161         int ret;
10162         int i;
10163
10164         /*
10165          * If we have pinned this block before, don't pin it again.
10166          * This can not only avoid forever loop with broken filesystem
10167          * but also give us some speedups.
10168          */
10169         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10170                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10171                 return 0;
10172
10173         btrfs_pin_extent(fs_info, eb->start, eb->len);
10174
10175         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10176         nritems = btrfs_header_nritems(eb);
10177         for (i = 0; i < nritems; i++) {
10178                 if (level == 0) {
10179                         btrfs_item_key_to_cpu(eb, &key, i);
10180                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10181                                 continue;
10182                         /* Skip the extent root and reloc roots */
10183                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10184                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10185                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10186                                 continue;
10187                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10188                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10189
10190                         /*
10191                          * If at any point we start needing the real root we
10192                          * will have to build a stump root for the root we are
10193                          * in, but for now this doesn't actually use the root so
10194                          * just pass in extent_root.
10195                          */
10196                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10197                                               nodesize, 0);
10198                         if (!extent_buffer_uptodate(tmp)) {
10199                                 fprintf(stderr, "Error reading root block\n");
10200                                 return -EIO;
10201                         }
10202                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10203                         free_extent_buffer(tmp);
10204                         if (ret)
10205                                 return ret;
10206                 } else {
10207                         bytenr = btrfs_node_blockptr(eb, i);
10208
10209                         /* If we aren't the tree root don't read the block */
10210                         if (level == 1 && !tree_root) {
10211                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10212                                 continue;
10213                         }
10214
10215                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10216                                               nodesize, 0);
10217                         if (!extent_buffer_uptodate(tmp)) {
10218                                 fprintf(stderr, "Error reading tree block\n");
10219                                 return -EIO;
10220                         }
10221                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10222                         free_extent_buffer(tmp);
10223                         if (ret)
10224                                 return ret;
10225                 }
10226         }
10227
10228         return 0;
10229 }
10230
10231 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10232 {
10233         int ret;
10234
10235         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10236         if (ret)
10237                 return ret;
10238
10239         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10240 }
10241
10242 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10243 {
10244         struct btrfs_block_group_cache *cache;
10245         struct btrfs_path *path;
10246         struct extent_buffer *leaf;
10247         struct btrfs_chunk *chunk;
10248         struct btrfs_key key;
10249         int ret;
10250         u64 start;
10251
10252         path = btrfs_alloc_path();
10253         if (!path)
10254                 return -ENOMEM;
10255
10256         key.objectid = 0;
10257         key.type = BTRFS_CHUNK_ITEM_KEY;
10258         key.offset = 0;
10259
10260         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10261         if (ret < 0) {
10262                 btrfs_free_path(path);
10263                 return ret;
10264         }
10265
10266         /*
10267          * We do this in case the block groups were screwed up and had alloc
10268          * bits that aren't actually set on the chunks.  This happens with
10269          * restored images every time and could happen in real life I guess.
10270          */
10271         fs_info->avail_data_alloc_bits = 0;
10272         fs_info->avail_metadata_alloc_bits = 0;
10273         fs_info->avail_system_alloc_bits = 0;
10274
10275         /* First we need to create the in-memory block groups */
10276         while (1) {
10277                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10278                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10279                         if (ret < 0) {
10280                                 btrfs_free_path(path);
10281                                 return ret;
10282                         }
10283                         if (ret) {
10284                                 ret = 0;
10285                                 break;
10286                         }
10287                 }
10288                 leaf = path->nodes[0];
10289                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10290                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10291                         path->slots[0]++;
10292                         continue;
10293                 }
10294
10295                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10296                                        struct btrfs_chunk);
10297                 btrfs_add_block_group(fs_info, 0,
10298                                       btrfs_chunk_type(leaf, chunk),
10299                                       key.objectid, key.offset,
10300                                       btrfs_chunk_length(leaf, chunk));
10301                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10302                                  key.offset + btrfs_chunk_length(leaf, chunk),
10303                                  GFP_NOFS);
10304                 path->slots[0]++;
10305         }
10306         start = 0;
10307         while (1) {
10308                 cache = btrfs_lookup_first_block_group(fs_info, start);
10309                 if (!cache)
10310                         break;
10311                 cache->cached = 1;
10312                 start = cache->key.objectid + cache->key.offset;
10313         }
10314
10315         btrfs_free_path(path);
10316         return 0;
10317 }
10318
10319 static int reset_balance(struct btrfs_trans_handle *trans,
10320                          struct btrfs_fs_info *fs_info)
10321 {
10322         struct btrfs_root *root = fs_info->tree_root;
10323         struct btrfs_path *path;
10324         struct extent_buffer *leaf;
10325         struct btrfs_key key;
10326         int del_slot, del_nr = 0;
10327         int ret;
10328         int found = 0;
10329
10330         path = btrfs_alloc_path();
10331         if (!path)
10332                 return -ENOMEM;
10333
10334         key.objectid = BTRFS_BALANCE_OBJECTID;
10335         key.type = BTRFS_BALANCE_ITEM_KEY;
10336         key.offset = 0;
10337
10338         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10339         if (ret) {
10340                 if (ret > 0)
10341                         ret = 0;
10342                 if (!ret)
10343                         goto reinit_data_reloc;
10344                 else
10345                         goto out;
10346         }
10347
10348         ret = btrfs_del_item(trans, root, path);
10349         if (ret)
10350                 goto out;
10351         btrfs_release_path(path);
10352
10353         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10354         key.type = BTRFS_ROOT_ITEM_KEY;
10355         key.offset = 0;
10356
10357         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10358         if (ret < 0)
10359                 goto out;
10360         while (1) {
10361                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10362                         if (!found)
10363                                 break;
10364
10365                         if (del_nr) {
10366                                 ret = btrfs_del_items(trans, root, path,
10367                                                       del_slot, del_nr);
10368                                 del_nr = 0;
10369                                 if (ret)
10370                                         goto out;
10371                         }
10372                         key.offset++;
10373                         btrfs_release_path(path);
10374
10375                         found = 0;
10376                         ret = btrfs_search_slot(trans, root, &key, path,
10377                                                 -1, 1);
10378                         if (ret < 0)
10379                                 goto out;
10380                         continue;
10381                 }
10382                 found = 1;
10383                 leaf = path->nodes[0];
10384                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10385                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10386                         break;
10387                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10388                         path->slots[0]++;
10389                         continue;
10390                 }
10391                 if (!del_nr) {
10392                         del_slot = path->slots[0];
10393                         del_nr = 1;
10394                 } else {
10395                         del_nr++;
10396                 }
10397                 path->slots[0]++;
10398         }
10399
10400         if (del_nr) {
10401                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10402                 if (ret)
10403                         goto out;
10404         }
10405         btrfs_release_path(path);
10406
10407 reinit_data_reloc:
10408         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10409         key.type = BTRFS_ROOT_ITEM_KEY;
10410         key.offset = (u64)-1;
10411         root = btrfs_read_fs_root(fs_info, &key);
10412         if (IS_ERR(root)) {
10413                 fprintf(stderr, "Error reading data reloc tree\n");
10414                 ret = PTR_ERR(root);
10415                 goto out;
10416         }
10417         record_root_in_trans(trans, root);
10418         ret = btrfs_fsck_reinit_root(trans, root, 0);
10419         if (ret)
10420                 goto out;
10421         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10422 out:
10423         btrfs_free_path(path);
10424         return ret;
10425 }
10426
10427 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10428                               struct btrfs_fs_info *fs_info)
10429 {
10430         u64 start = 0;
10431         int ret;
10432
10433         /*
10434          * The only reason we don't do this is because right now we're just
10435          * walking the trees we find and pinning down their bytes, we don't look
10436          * at any of the leaves.  In order to do mixed groups we'd have to check
10437          * the leaves of any fs roots and pin down the bytes for any file
10438          * extents we find.  Not hard but why do it if we don't have to?
10439          */
10440         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10441                 fprintf(stderr, "We don't support re-initing the extent tree "
10442                         "for mixed block groups yet, please notify a btrfs "
10443                         "developer you want to do this so they can add this "
10444                         "functionality.\n");
10445                 return -EINVAL;
10446         }
10447
10448         /*
10449          * first we need to walk all of the trees except the extent tree and pin
10450          * down the bytes that are in use so we don't overwrite any existing
10451          * metadata.
10452          */
10453         ret = pin_metadata_blocks(fs_info);
10454         if (ret) {
10455                 fprintf(stderr, "error pinning down used bytes\n");
10456                 return ret;
10457         }
10458
10459         /*
10460          * Need to drop all the block groups since we're going to recreate all
10461          * of them again.
10462          */
10463         btrfs_free_block_groups(fs_info);
10464         ret = reset_block_groups(fs_info);
10465         if (ret) {
10466                 fprintf(stderr, "error resetting the block groups\n");
10467                 return ret;
10468         }
10469
10470         /* Ok we can allocate now, reinit the extent root */
10471         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10472         if (ret) {
10473                 fprintf(stderr, "extent root initialization failed\n");
10474                 /*
10475                  * When the transaction code is updated we should end the
10476                  * transaction, but for now progs only knows about commit so
10477                  * just return an error.
10478                  */
10479                 return ret;
10480         }
10481
10482         /*
10483          * Now we have all the in-memory block groups setup so we can make
10484          * allocations properly, and the metadata we care about is safe since we
10485          * pinned all of it above.
10486          */
10487         while (1) {
10488                 struct btrfs_block_group_cache *cache;
10489
10490                 cache = btrfs_lookup_first_block_group(fs_info, start);
10491                 if (!cache)
10492                         break;
10493                 start = cache->key.objectid + cache->key.offset;
10494                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10495                                         &cache->key, &cache->item,
10496                                         sizeof(cache->item));
10497                 if (ret) {
10498                         fprintf(stderr, "Error adding block group\n");
10499                         return ret;
10500                 }
10501                 btrfs_extent_post_op(trans, fs_info->extent_root);
10502         }
10503
10504         ret = reset_balance(trans, fs_info);
10505         if (ret)
10506                 fprintf(stderr, "error resetting the pending balance\n");
10507
10508         return ret;
10509 }
10510
10511 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10512 {
10513         struct btrfs_path *path;
10514         struct btrfs_trans_handle *trans;
10515         struct btrfs_key key;
10516         int ret;
10517
10518         printf("Recowing metadata block %llu\n", eb->start);
10519         key.objectid = btrfs_header_owner(eb);
10520         key.type = BTRFS_ROOT_ITEM_KEY;
10521         key.offset = (u64)-1;
10522
10523         root = btrfs_read_fs_root(root->fs_info, &key);
10524         if (IS_ERR(root)) {
10525                 fprintf(stderr, "Couldn't find owner root %llu\n",
10526                         key.objectid);
10527                 return PTR_ERR(root);
10528         }
10529
10530         path = btrfs_alloc_path();
10531         if (!path)
10532                 return -ENOMEM;
10533
10534         trans = btrfs_start_transaction(root, 1);
10535         if (IS_ERR(trans)) {
10536                 btrfs_free_path(path);
10537                 return PTR_ERR(trans);
10538         }
10539
10540         path->lowest_level = btrfs_header_level(eb);
10541         if (path->lowest_level)
10542                 btrfs_node_key_to_cpu(eb, &key, 0);
10543         else
10544                 btrfs_item_key_to_cpu(eb, &key, 0);
10545
10546         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10547         btrfs_commit_transaction(trans, root);
10548         btrfs_free_path(path);
10549         return ret;
10550 }
10551
10552 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10553 {
10554         struct btrfs_path *path;
10555         struct btrfs_trans_handle *trans;
10556         struct btrfs_key key;
10557         int ret;
10558
10559         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10560                bad->key.type, bad->key.offset);
10561         key.objectid = bad->root_id;
10562         key.type = BTRFS_ROOT_ITEM_KEY;
10563         key.offset = (u64)-1;
10564
10565         root = btrfs_read_fs_root(root->fs_info, &key);
10566         if (IS_ERR(root)) {
10567                 fprintf(stderr, "Couldn't find owner root %llu\n",
10568                         key.objectid);
10569                 return PTR_ERR(root);
10570         }
10571
10572         path = btrfs_alloc_path();
10573         if (!path)
10574                 return -ENOMEM;
10575
10576         trans = btrfs_start_transaction(root, 1);
10577         if (IS_ERR(trans)) {
10578                 btrfs_free_path(path);
10579                 return PTR_ERR(trans);
10580         }
10581
10582         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10583         if (ret) {
10584                 if (ret > 0)
10585                         ret = 0;
10586                 goto out;
10587         }
10588         ret = btrfs_del_item(trans, root, path);
10589 out:
10590         btrfs_commit_transaction(trans, root);
10591         btrfs_free_path(path);
10592         return ret;
10593 }
10594
10595 static int zero_log_tree(struct btrfs_root *root)
10596 {
10597         struct btrfs_trans_handle *trans;
10598         int ret;
10599
10600         trans = btrfs_start_transaction(root, 1);
10601         if (IS_ERR(trans)) {
10602                 ret = PTR_ERR(trans);
10603                 return ret;
10604         }
10605         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10606         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10607         ret = btrfs_commit_transaction(trans, root);
10608         return ret;
10609 }
10610
10611 static int populate_csum(struct btrfs_trans_handle *trans,
10612                          struct btrfs_root *csum_root, char *buf, u64 start,
10613                          u64 len)
10614 {
10615         u64 offset = 0;
10616         u64 sectorsize;
10617         int ret = 0;
10618
10619         while (offset < len) {
10620                 sectorsize = csum_root->sectorsize;
10621                 ret = read_extent_data(csum_root, buf, start + offset,
10622                                        &sectorsize, 0);
10623                 if (ret)
10624                         break;
10625                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10626                                             start + offset, buf, sectorsize);
10627                 if (ret)
10628                         break;
10629                 offset += sectorsize;
10630         }
10631         return ret;
10632 }
10633
10634 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10635                                       struct btrfs_root *csum_root,
10636                                       struct btrfs_root *cur_root)
10637 {
10638         struct btrfs_path *path;
10639         struct btrfs_key key;
10640         struct extent_buffer *node;
10641         struct btrfs_file_extent_item *fi;
10642         char *buf = NULL;
10643         u64 start = 0;
10644         u64 len = 0;
10645         int slot = 0;
10646         int ret = 0;
10647
10648         path = btrfs_alloc_path();
10649         if (!path)
10650                 return -ENOMEM;
10651         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10652         if (!buf) {
10653                 ret = -ENOMEM;
10654                 goto out;
10655         }
10656
10657         key.objectid = 0;
10658         key.offset = 0;
10659         key.type = 0;
10660
10661         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10662         if (ret < 0)
10663                 goto out;
10664         /* Iterate all regular file extents and fill its csum */
10665         while (1) {
10666                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10667
10668                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10669                         goto next;
10670                 node = path->nodes[0];
10671                 slot = path->slots[0];
10672                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10673                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10674                         goto next;
10675                 start = btrfs_file_extent_disk_bytenr(node, fi);
10676                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10677
10678                 ret = populate_csum(trans, csum_root, buf, start, len);
10679                 if (ret == -EEXIST)
10680                         ret = 0;
10681                 if (ret < 0)
10682                         goto out;
10683 next:
10684                 /*
10685                  * TODO: if next leaf is corrupted, jump to nearest next valid
10686                  * leaf.
10687                  */
10688                 ret = btrfs_next_item(cur_root, path);
10689                 if (ret < 0)
10690                         goto out;
10691                 if (ret > 0) {
10692                         ret = 0;
10693                         goto out;
10694                 }
10695         }
10696
10697 out:
10698         btrfs_free_path(path);
10699         free(buf);
10700         return ret;
10701 }
10702
10703 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10704                                   struct btrfs_root *csum_root)
10705 {
10706         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10707         struct btrfs_path *path;
10708         struct btrfs_root *tree_root = fs_info->tree_root;
10709         struct btrfs_root *cur_root;
10710         struct extent_buffer *node;
10711         struct btrfs_key key;
10712         int slot = 0;
10713         int ret = 0;
10714
10715         path = btrfs_alloc_path();
10716         if (!path)
10717                 return -ENOMEM;
10718
10719         key.objectid = BTRFS_FS_TREE_OBJECTID;
10720         key.offset = 0;
10721         key.type = BTRFS_ROOT_ITEM_KEY;
10722
10723         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10724         if (ret < 0)
10725                 goto out;
10726         if (ret > 0) {
10727                 ret = -ENOENT;
10728                 goto out;
10729         }
10730
10731         while (1) {
10732                 node = path->nodes[0];
10733                 slot = path->slots[0];
10734                 btrfs_item_key_to_cpu(node, &key, slot);
10735                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10736                         goto out;
10737                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10738                         goto next;
10739                 if (!is_fstree(key.objectid))
10740                         goto next;
10741                 key.offset = (u64)-1;
10742
10743                 cur_root = btrfs_read_fs_root(fs_info, &key);
10744                 if (IS_ERR(cur_root) || !cur_root) {
10745                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10746                                 key.objectid);
10747                         goto out;
10748                 }
10749                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10750                                 cur_root);
10751                 if (ret < 0)
10752                         goto out;
10753 next:
10754                 ret = btrfs_next_item(tree_root, path);
10755                 if (ret > 0) {
10756                         ret = 0;
10757                         goto out;
10758                 }
10759                 if (ret < 0)
10760                         goto out;
10761         }
10762
10763 out:
10764         btrfs_free_path(path);
10765         return ret;
10766 }
10767
10768 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10769                                       struct btrfs_root *csum_root)
10770 {
10771         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10772         struct btrfs_path *path;
10773         struct btrfs_extent_item *ei;
10774         struct extent_buffer *leaf;
10775         char *buf;
10776         struct btrfs_key key;
10777         int ret;
10778
10779         path = btrfs_alloc_path();
10780         if (!path)
10781                 return -ENOMEM;
10782
10783         key.objectid = 0;
10784         key.type = BTRFS_EXTENT_ITEM_KEY;
10785         key.offset = 0;
10786
10787         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10788         if (ret < 0) {
10789                 btrfs_free_path(path);
10790                 return ret;
10791         }
10792
10793         buf = malloc(csum_root->sectorsize);
10794         if (!buf) {
10795                 btrfs_free_path(path);
10796                 return -ENOMEM;
10797         }
10798
10799         while (1) {
10800                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10801                         ret = btrfs_next_leaf(extent_root, path);
10802                         if (ret < 0)
10803                                 break;
10804                         if (ret) {
10805                                 ret = 0;
10806                                 break;
10807                         }
10808                 }
10809                 leaf = path->nodes[0];
10810
10811                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10812                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10813                         path->slots[0]++;
10814                         continue;
10815                 }
10816
10817                 ei = btrfs_item_ptr(leaf, path->slots[0],
10818                                     struct btrfs_extent_item);
10819                 if (!(btrfs_extent_flags(leaf, ei) &
10820                       BTRFS_EXTENT_FLAG_DATA)) {
10821                         path->slots[0]++;
10822                         continue;
10823                 }
10824
10825                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10826                                     key.offset);
10827                 if (ret)
10828                         break;
10829                 path->slots[0]++;
10830         }
10831
10832         btrfs_free_path(path);
10833         free(buf);
10834         return ret;
10835 }
10836
10837 /*
10838  * Recalculate the csum and put it into the csum tree.
10839  *
10840  * Extent tree init will wipe out all the extent info, so in that case, we
10841  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10842  * will use fs/subvol trees to init the csum tree.
10843  */
10844 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10845                           struct btrfs_root *csum_root,
10846                           int search_fs_tree)
10847 {
10848         if (search_fs_tree)
10849                 return fill_csum_tree_from_fs(trans, csum_root);
10850         else
10851                 return fill_csum_tree_from_extent(trans, csum_root);
10852 }
10853
10854 static void free_roots_info_cache(void)
10855 {
10856         if (!roots_info_cache)
10857                 return;
10858
10859         while (!cache_tree_empty(roots_info_cache)) {
10860                 struct cache_extent *entry;
10861                 struct root_item_info *rii;
10862
10863                 entry = first_cache_extent(roots_info_cache);
10864                 if (!entry)
10865                         break;
10866                 remove_cache_extent(roots_info_cache, entry);
10867                 rii = container_of(entry, struct root_item_info, cache_extent);
10868                 free(rii);
10869         }
10870
10871         free(roots_info_cache);
10872         roots_info_cache = NULL;
10873 }
10874
10875 static int build_roots_info_cache(struct btrfs_fs_info *info)
10876 {
10877         int ret = 0;
10878         struct btrfs_key key;
10879         struct extent_buffer *leaf;
10880         struct btrfs_path *path;
10881
10882         if (!roots_info_cache) {
10883                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10884                 if (!roots_info_cache)
10885                         return -ENOMEM;
10886                 cache_tree_init(roots_info_cache);
10887         }
10888
10889         path = btrfs_alloc_path();
10890         if (!path)
10891                 return -ENOMEM;
10892
10893         key.objectid = 0;
10894         key.type = BTRFS_EXTENT_ITEM_KEY;
10895         key.offset = 0;
10896
10897         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10898         if (ret < 0)
10899                 goto out;
10900         leaf = path->nodes[0];
10901
10902         while (1) {
10903                 struct btrfs_key found_key;
10904                 struct btrfs_extent_item *ei;
10905                 struct btrfs_extent_inline_ref *iref;
10906                 int slot = path->slots[0];
10907                 int type;
10908                 u64 flags;
10909                 u64 root_id;
10910                 u8 level;
10911                 struct cache_extent *entry;
10912                 struct root_item_info *rii;
10913
10914                 if (slot >= btrfs_header_nritems(leaf)) {
10915                         ret = btrfs_next_leaf(info->extent_root, path);
10916                         if (ret < 0) {
10917                                 break;
10918                         } else if (ret) {
10919                                 ret = 0;
10920                                 break;
10921                         }
10922                         leaf = path->nodes[0];
10923                         slot = path->slots[0];
10924                 }
10925
10926                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10927
10928                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10929                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10930                         goto next;
10931
10932                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10933                 flags = btrfs_extent_flags(leaf, ei);
10934
10935                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10936                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10937                         goto next;
10938
10939                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10940                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10941                         level = found_key.offset;
10942                 } else {
10943                         struct btrfs_tree_block_info *binfo;
10944
10945                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10946                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10947                         level = btrfs_tree_block_level(leaf, binfo);
10948                 }
10949
10950                 /*
10951                  * For a root extent, it must be of the following type and the
10952                  * first (and only one) iref in the item.
10953                  */
10954                 type = btrfs_extent_inline_ref_type(leaf, iref);
10955                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10956                         goto next;
10957
10958                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10959                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10960                 if (!entry) {
10961                         rii = malloc(sizeof(struct root_item_info));
10962                         if (!rii) {
10963                                 ret = -ENOMEM;
10964                                 goto out;
10965                         }
10966                         rii->cache_extent.start = root_id;
10967                         rii->cache_extent.size = 1;
10968                         rii->level = (u8)-1;
10969                         entry = &rii->cache_extent;
10970                         ret = insert_cache_extent(roots_info_cache, entry);
10971                         ASSERT(ret == 0);
10972                 } else {
10973                         rii = container_of(entry, struct root_item_info,
10974                                            cache_extent);
10975                 }
10976
10977                 ASSERT(rii->cache_extent.start == root_id);
10978                 ASSERT(rii->cache_extent.size == 1);
10979
10980                 if (level > rii->level || rii->level == (u8)-1) {
10981                         rii->level = level;
10982                         rii->bytenr = found_key.objectid;
10983                         rii->gen = btrfs_extent_generation(leaf, ei);
10984                         rii->node_count = 1;
10985                 } else if (level == rii->level) {
10986                         rii->node_count++;
10987                 }
10988 next:
10989                 path->slots[0]++;
10990         }
10991
10992 out:
10993         btrfs_free_path(path);
10994
10995         return ret;
10996 }
10997
10998 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10999                                   struct btrfs_path *path,
11000                                   const struct btrfs_key *root_key,
11001                                   const int read_only_mode)
11002 {
11003         const u64 root_id = root_key->objectid;
11004         struct cache_extent *entry;
11005         struct root_item_info *rii;
11006         struct btrfs_root_item ri;
11007         unsigned long offset;
11008
11009         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11010         if (!entry) {
11011                 fprintf(stderr,
11012                         "Error: could not find extent items for root %llu\n",
11013                         root_key->objectid);
11014                 return -ENOENT;
11015         }
11016
11017         rii = container_of(entry, struct root_item_info, cache_extent);
11018         ASSERT(rii->cache_extent.start == root_id);
11019         ASSERT(rii->cache_extent.size == 1);
11020
11021         if (rii->node_count != 1) {
11022                 fprintf(stderr,
11023                         "Error: could not find btree root extent for root %llu\n",
11024                         root_id);
11025                 return -ENOENT;
11026         }
11027
11028         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11029         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11030
11031         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11032             btrfs_root_level(&ri) != rii->level ||
11033             btrfs_root_generation(&ri) != rii->gen) {
11034
11035                 /*
11036                  * If we're in repair mode but our caller told us to not update
11037                  * the root item, i.e. just check if it needs to be updated, don't
11038                  * print this message, since the caller will call us again shortly
11039                  * for the same root item without read only mode (the caller will
11040                  * open a transaction first).
11041                  */
11042                 if (!(read_only_mode && repair))
11043                         fprintf(stderr,
11044                                 "%sroot item for root %llu,"
11045                                 " current bytenr %llu, current gen %llu, current level %u,"
11046                                 " new bytenr %llu, new gen %llu, new level %u\n",
11047                                 (read_only_mode ? "" : "fixing "),
11048                                 root_id,
11049                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11050                                 btrfs_root_level(&ri),
11051                                 rii->bytenr, rii->gen, rii->level);
11052
11053                 if (btrfs_root_generation(&ri) > rii->gen) {
11054                         fprintf(stderr,
11055                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11056                                 root_id, btrfs_root_generation(&ri), rii->gen);
11057                         return -EINVAL;
11058                 }
11059
11060                 if (!read_only_mode) {
11061                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11062                         btrfs_set_root_level(&ri, rii->level);
11063                         btrfs_set_root_generation(&ri, rii->gen);
11064                         write_extent_buffer(path->nodes[0], &ri,
11065                                             offset, sizeof(ri));
11066                 }
11067
11068                 return 1;
11069         }
11070
11071         return 0;
11072 }
11073
11074 /*
11075  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11076  * caused read-only snapshots to be corrupted if they were created at a moment
11077  * when the source subvolume/snapshot had orphan items. The issue was that the
11078  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11079  * node instead of the post orphan cleanup root node.
11080  * So this function, and its callees, just detects and fixes those cases. Even
11081  * though the regression was for read-only snapshots, this function applies to
11082  * any snapshot/subvolume root.
11083  * This must be run before any other repair code - not doing it so, makes other
11084  * repair code delete or modify backrefs in the extent tree for example, which
11085  * will result in an inconsistent fs after repairing the root items.
11086  */
11087 static int repair_root_items(struct btrfs_fs_info *info)
11088 {
11089         struct btrfs_path *path = NULL;
11090         struct btrfs_key key;
11091         struct extent_buffer *leaf;
11092         struct btrfs_trans_handle *trans = NULL;
11093         int ret = 0;
11094         int bad_roots = 0;
11095         int need_trans = 0;
11096
11097         ret = build_roots_info_cache(info);
11098         if (ret)
11099                 goto out;
11100
11101         path = btrfs_alloc_path();
11102         if (!path) {
11103                 ret = -ENOMEM;
11104                 goto out;
11105         }
11106
11107         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11108         key.type = BTRFS_ROOT_ITEM_KEY;
11109         key.offset = 0;
11110
11111 again:
11112         /*
11113          * Avoid opening and committing transactions if a leaf doesn't have
11114          * any root items that need to be fixed, so that we avoid rotating
11115          * backup roots unnecessarily.
11116          */
11117         if (need_trans) {
11118                 trans = btrfs_start_transaction(info->tree_root, 1);
11119                 if (IS_ERR(trans)) {
11120                         ret = PTR_ERR(trans);
11121                         goto out;
11122                 }
11123         }
11124
11125         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11126                                 0, trans ? 1 : 0);
11127         if (ret < 0)
11128                 goto out;
11129         leaf = path->nodes[0];
11130
11131         while (1) {
11132                 struct btrfs_key found_key;
11133
11134                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11135                         int no_more_keys = find_next_key(path, &key);
11136
11137                         btrfs_release_path(path);
11138                         if (trans) {
11139                                 ret = btrfs_commit_transaction(trans,
11140                                                                info->tree_root);
11141                                 trans = NULL;
11142                                 if (ret < 0)
11143                                         goto out;
11144                         }
11145                         need_trans = 0;
11146                         if (no_more_keys)
11147                                 break;
11148                         goto again;
11149                 }
11150
11151                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11152
11153                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11154                         goto next;
11155                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11156                         goto next;
11157
11158                 ret = maybe_repair_root_item(info, path, &found_key,
11159                                              trans ? 0 : 1);
11160                 if (ret < 0)
11161                         goto out;
11162                 if (ret) {
11163                         if (!trans && repair) {
11164                                 need_trans = 1;
11165                                 key = found_key;
11166                                 btrfs_release_path(path);
11167                                 goto again;
11168                         }
11169                         bad_roots++;
11170                 }
11171 next:
11172                 path->slots[0]++;
11173         }
11174         ret = 0;
11175 out:
11176         free_roots_info_cache();
11177         btrfs_free_path(path);
11178         if (trans)
11179                 btrfs_commit_transaction(trans, info->tree_root);
11180         if (ret < 0)
11181                 return ret;
11182
11183         return bad_roots;
11184 }
11185
11186 const char * const cmd_check_usage[] = {
11187         "btrfs check [options] <device>",
11188         "Check structural integrity of a filesystem (unmounted).",
11189         "Check structural integrity of an unmounted filesystem. Verify internal",
11190         "trees' consistency and item connectivity. In the repair mode try to",
11191         "fix the problems found. ",
11192         "WARNING: the repair mode is considered dangerous",
11193         "",
11194         "-s|--super <superblock>     use this superblock copy",
11195         "-b|--backup                 use the first valid backup root copy",
11196         "--repair                    try to repair the filesystem",
11197         "--readonly                  run in read-only mode (default)",
11198         "--init-csum-tree            create a new CRC tree",
11199         "--init-extent-tree          create a new extent tree",
11200         "--mode <MODE>               select mode, allows to make some memory/IO",
11201         "                            trade-offs, where MODE is one of:",
11202         "                            original - read inodes and extents to memory (requires",
11203         "                                       more memory, does less IO)",
11204         "                            lowmem   - try to use less memory but read blocks again",
11205         "                                       when needed",
11206         "--check-data-csum           verify checksums of data blocks",
11207         "-Q|--qgroup-report           print a report on qgroup consistency",
11208         "-E|--subvol-extents <subvolid>",
11209         "                            print subvolume extents and sharing state",
11210         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11211         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11212         "-p|--progress               indicate progress",
11213         NULL
11214 };
11215
11216 int cmd_check(int argc, char **argv)
11217 {
11218         struct cache_tree root_cache;
11219         struct btrfs_root *root;
11220         struct btrfs_fs_info *info;
11221         u64 bytenr = 0;
11222         u64 subvolid = 0;
11223         u64 tree_root_bytenr = 0;
11224         u64 chunk_root_bytenr = 0;
11225         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11226         int ret;
11227         u64 num;
11228         int init_csum_tree = 0;
11229         int readonly = 0;
11230         int qgroup_report = 0;
11231         int qgroups_repaired = 0;
11232         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11233
11234         while(1) {
11235                 int c;
11236                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11237                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11238                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11239                         GETOPT_VAL_MODE };
11240                 static const struct option long_options[] = {
11241                         { "super", required_argument, NULL, 's' },
11242                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11243                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11244                         { "init-csum-tree", no_argument, NULL,
11245                                 GETOPT_VAL_INIT_CSUM },
11246                         { "init-extent-tree", no_argument, NULL,
11247                                 GETOPT_VAL_INIT_EXTENT },
11248                         { "check-data-csum", no_argument, NULL,
11249                                 GETOPT_VAL_CHECK_CSUM },
11250                         { "backup", no_argument, NULL, 'b' },
11251                         { "subvol-extents", required_argument, NULL, 'E' },
11252                         { "qgroup-report", no_argument, NULL, 'Q' },
11253                         { "tree-root", required_argument, NULL, 'r' },
11254                         { "chunk-root", required_argument, NULL,
11255                                 GETOPT_VAL_CHUNK_TREE },
11256                         { "progress", no_argument, NULL, 'p' },
11257                         { "mode", required_argument, NULL,
11258                                 GETOPT_VAL_MODE },
11259                         { NULL, 0, NULL, 0}
11260                 };
11261
11262                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11263                 if (c < 0)
11264                         break;
11265                 switch(c) {
11266                         case 'a': /* ignored */ break;
11267                         case 'b':
11268                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11269                                 break;
11270                         case 's':
11271                                 num = arg_strtou64(optarg);
11272                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11273                                         fprintf(stderr,
11274                                                 "ERROR: super mirror should be less than: %d\n",
11275                                                 BTRFS_SUPER_MIRROR_MAX);
11276                                         exit(1);
11277                                 }
11278                                 bytenr = btrfs_sb_offset(((int)num));
11279                                 printf("using SB copy %llu, bytenr %llu\n", num,
11280                                        (unsigned long long)bytenr);
11281                                 break;
11282                         case 'Q':
11283                                 qgroup_report = 1;
11284                                 break;
11285                         case 'E':
11286                                 subvolid = arg_strtou64(optarg);
11287                                 break;
11288                         case 'r':
11289                                 tree_root_bytenr = arg_strtou64(optarg);
11290                                 break;
11291                         case GETOPT_VAL_CHUNK_TREE:
11292                                 chunk_root_bytenr = arg_strtou64(optarg);
11293                                 break;
11294                         case 'p':
11295                                 ctx.progress_enabled = true;
11296                                 break;
11297                         case '?':
11298                         case 'h':
11299                                 usage(cmd_check_usage);
11300                         case GETOPT_VAL_REPAIR:
11301                                 printf("enabling repair mode\n");
11302                                 repair = 1;
11303                                 ctree_flags |= OPEN_CTREE_WRITES;
11304                                 break;
11305                         case GETOPT_VAL_READONLY:
11306                                 readonly = 1;
11307                                 break;
11308                         case GETOPT_VAL_INIT_CSUM:
11309                                 printf("Creating a new CRC tree\n");
11310                                 init_csum_tree = 1;
11311                                 repair = 1;
11312                                 ctree_flags |= OPEN_CTREE_WRITES;
11313                                 break;
11314                         case GETOPT_VAL_INIT_EXTENT:
11315                                 init_extent_tree = 1;
11316                                 ctree_flags |= (OPEN_CTREE_WRITES |
11317                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11318                                 repair = 1;
11319                                 break;
11320                         case GETOPT_VAL_CHECK_CSUM:
11321                                 check_data_csum = 1;
11322                                 break;
11323                         case GETOPT_VAL_MODE:
11324                                 check_mode = parse_check_mode(optarg);
11325                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11326                                         error("unknown mode: %s", optarg);
11327                                         exit(1);
11328                                 }
11329                                 break;
11330                 }
11331         }
11332
11333         if (check_argc_exact(argc - optind, 1))
11334                 usage(cmd_check_usage);
11335
11336         if (ctx.progress_enabled) {
11337                 ctx.tp = TASK_NOTHING;
11338                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11339         }
11340
11341         /* This check is the only reason for --readonly to exist */
11342         if (readonly && repair) {
11343                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11344                 exit(1);
11345         }
11346
11347         /*
11348          * Not supported yet
11349          */
11350         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11351                 error("Low memory mode doesn't support repair yet");
11352                 exit(1);
11353         }
11354
11355         radix_tree_init();
11356         cache_tree_init(&root_cache);
11357
11358         if((ret = check_mounted(argv[optind])) < 0) {
11359                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11360                 goto err_out;
11361         } else if(ret) {
11362                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11363                 ret = -EBUSY;
11364                 goto err_out;
11365         }
11366
11367         /* only allow partial opening under repair mode */
11368         if (repair)
11369                 ctree_flags |= OPEN_CTREE_PARTIAL;
11370
11371         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11372                                   chunk_root_bytenr, ctree_flags);
11373         if (!info) {
11374                 fprintf(stderr, "Couldn't open file system\n");
11375                 ret = -EIO;
11376                 goto err_out;
11377         }
11378
11379         global_info = info;
11380         root = info->fs_root;
11381
11382         /*
11383          * repair mode will force us to commit transaction which
11384          * will make us fail to load log tree when mounting.
11385          */
11386         if (repair && btrfs_super_log_root(info->super_copy)) {
11387                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11388                 if (!ret) {
11389                         ret = 1;
11390                         goto close_out;
11391                 }
11392                 ret = zero_log_tree(root);
11393                 if (ret) {
11394                         fprintf(stderr, "fail to zero log tree\n");
11395                         goto close_out;
11396                 }
11397         }
11398
11399         uuid_unparse(info->super_copy->fsid, uuidbuf);
11400         if (qgroup_report) {
11401                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11402                        uuidbuf);
11403                 ret = qgroup_verify_all(info);
11404                 if (ret == 0)
11405                         report_qgroups(1);
11406                 goto close_out;
11407         }
11408         if (subvolid) {
11409                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11410                        subvolid, argv[optind], uuidbuf);
11411                 ret = print_extent_state(info, subvolid);
11412                 goto close_out;
11413         }
11414         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11415
11416         if (!extent_buffer_uptodate(info->tree_root->node) ||
11417             !extent_buffer_uptodate(info->dev_root->node) ||
11418             !extent_buffer_uptodate(info->chunk_root->node)) {
11419                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11420                 ret = -EIO;
11421                 goto close_out;
11422         }
11423
11424         if (init_extent_tree || init_csum_tree) {
11425                 struct btrfs_trans_handle *trans;
11426
11427                 trans = btrfs_start_transaction(info->extent_root, 0);
11428                 if (IS_ERR(trans)) {
11429                         fprintf(stderr, "Error starting transaction\n");
11430                         ret = PTR_ERR(trans);
11431                         goto close_out;
11432                 }
11433
11434                 if (init_extent_tree) {
11435                         printf("Creating a new extent tree\n");
11436                         ret = reinit_extent_tree(trans, info);
11437                         if (ret)
11438                                 goto close_out;
11439                 }
11440
11441                 if (init_csum_tree) {
11442                         fprintf(stderr, "Reinit crc root\n");
11443                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11444                         if (ret) {
11445                                 fprintf(stderr, "crc root initialization failed\n");
11446                                 ret = -EIO;
11447                                 goto close_out;
11448                         }
11449
11450                         ret = fill_csum_tree(trans, info->csum_root,
11451                                              init_extent_tree);
11452                         if (ret) {
11453                                 fprintf(stderr, "crc refilling failed\n");
11454                                 return -EIO;
11455                         }
11456                 }
11457                 /*
11458                  * Ok now we commit and run the normal fsck, which will add
11459                  * extent entries for all of the items it finds.
11460                  */
11461                 ret = btrfs_commit_transaction(trans, info->extent_root);
11462                 if (ret)
11463                         goto close_out;
11464         }
11465         if (!extent_buffer_uptodate(info->extent_root->node)) {
11466                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11467                 ret = -EIO;
11468                 goto close_out;
11469         }
11470         if (!extent_buffer_uptodate(info->csum_root->node)) {
11471                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11472                 ret = -EIO;
11473                 goto close_out;
11474         }
11475
11476         if (!ctx.progress_enabled)
11477                 fprintf(stderr, "checking extents\n");
11478         if (check_mode == CHECK_MODE_LOWMEM)
11479                 ret = check_chunks_and_extents_v2(root);
11480         else
11481                 ret = check_chunks_and_extents(root);
11482         if (ret)
11483                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11484
11485         ret = repair_root_items(info);
11486         if (ret < 0)
11487                 goto close_out;
11488         if (repair) {
11489                 fprintf(stderr, "Fixed %d roots.\n", ret);
11490                 ret = 0;
11491         } else if (ret > 0) {
11492                 fprintf(stderr,
11493                        "Found %d roots with an outdated root item.\n",
11494                        ret);
11495                 fprintf(stderr,
11496                         "Please run a filesystem check with the option --repair to fix them.\n");
11497                 ret = 1;
11498                 goto close_out;
11499         }
11500
11501         if (!ctx.progress_enabled) {
11502                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11503                         fprintf(stderr, "checking free space tree\n");
11504                 else
11505                         fprintf(stderr, "checking free space cache\n");
11506         }
11507         ret = check_space_cache(root);
11508         if (ret)
11509                 goto out;
11510
11511         /*
11512          * We used to have to have these hole extents in between our real
11513          * extents so if we don't have this flag set we need to make sure there
11514          * are no gaps in the file extents for inodes, otherwise we can just
11515          * ignore it when this happens.
11516          */
11517         no_holes = btrfs_fs_incompat(root->fs_info,
11518                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11519         if (!ctx.progress_enabled)
11520                 fprintf(stderr, "checking fs roots\n");
11521         ret = check_fs_roots(root, &root_cache);
11522         if (ret)
11523                 goto out;
11524
11525         fprintf(stderr, "checking csums\n");
11526         ret = check_csums(root);
11527         if (ret)
11528                 goto out;
11529
11530         fprintf(stderr, "checking root refs\n");
11531         ret = check_root_refs(root, &root_cache);
11532         if (ret)
11533                 goto out;
11534
11535         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11536                 struct extent_buffer *eb;
11537
11538                 eb = list_first_entry(&root->fs_info->recow_ebs,
11539                                       struct extent_buffer, recow);
11540                 list_del_init(&eb->recow);
11541                 ret = recow_extent_buffer(root, eb);
11542                 if (ret)
11543                         break;
11544         }
11545
11546         while (!list_empty(&delete_items)) {
11547                 struct bad_item *bad;
11548
11549                 bad = list_first_entry(&delete_items, struct bad_item, list);
11550                 list_del_init(&bad->list);
11551                 if (repair)
11552                         ret = delete_bad_item(root, bad);
11553                 free(bad);
11554         }
11555
11556         if (info->quota_enabled) {
11557                 int err;
11558                 fprintf(stderr, "checking quota groups\n");
11559                 err = qgroup_verify_all(info);
11560                 if (err)
11561                         goto out;
11562                 report_qgroups(0);
11563                 err = repair_qgroups(info, &qgroups_repaired);
11564                 if (err)
11565                         goto out;
11566         }
11567
11568         if (!list_empty(&root->fs_info->recow_ebs)) {
11569                 fprintf(stderr, "Transid errors in file system\n");
11570                 ret = 1;
11571         }
11572 out:
11573         /* Don't override original ret */
11574         if (!ret && qgroups_repaired)
11575                 ret = qgroups_repaired;
11576
11577         if (found_old_backref) { /*
11578                  * there was a disk format change when mixed
11579                  * backref was in testing tree. The old format
11580                  * existed about one week.
11581                  */
11582                 printf("\n * Found old mixed backref format. "
11583                        "The old format is not supported! *"
11584                        "\n * Please mount the FS in readonly mode, "
11585                        "backup data and re-format the FS. *\n\n");
11586                 ret = 1;
11587         }
11588         printf("found %llu bytes used err is %d\n",
11589                (unsigned long long)bytes_used, ret);
11590         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11591         printf("total tree bytes: %llu\n",
11592                (unsigned long long)total_btree_bytes);
11593         printf("total fs tree bytes: %llu\n",
11594                (unsigned long long)total_fs_tree_bytes);
11595         printf("total extent tree bytes: %llu\n",
11596                (unsigned long long)total_extent_tree_bytes);
11597         printf("btree space waste bytes: %llu\n",
11598                (unsigned long long)btree_space_waste);
11599         printf("file data blocks allocated: %llu\n referenced %llu\n",
11600                 (unsigned long long)data_bytes_allocated,
11601                 (unsigned long long)data_bytes_referenced);
11602
11603         free_qgroup_counts();
11604         free_root_recs_tree(&root_cache);
11605 close_out:
11606         close_ctree(root);
11607 err_out:
11608         if (ctx.progress_enabled)
11609                 task_deinit(ctx.info);
11610
11611         return ret;
11612 }