btrfs-progs: check: switch some messages to common helpers
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         unsigned int filetype:8;
189         int errors;
190         unsigned int ref_type;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         size_t size;
663         int ret;
664
665         rec = malloc(sizeof(*rec));
666         if (!rec)
667                 return ERR_PTR(-ENOMEM);
668         memcpy(rec, orig_rec, sizeof(*rec));
669         rec->refs = 1;
670         INIT_LIST_HEAD(&rec->backrefs);
671         INIT_LIST_HEAD(&rec->orphan_extents);
672         rec->holes = RB_ROOT;
673
674         list_for_each_entry(orig, &orig_rec->backrefs, list) {
675                 size = sizeof(*orig) + orig->namelen + 1;
676                 backref = malloc(size);
677                 if (!backref) {
678                         ret = -ENOMEM;
679                         goto cleanup;
680                 }
681                 memcpy(backref, orig, size);
682                 list_add_tail(&backref->list, &rec->backrefs);
683         }
684         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
685                 dst_orphan = malloc(sizeof(*dst_orphan));
686                 if (!dst_orphan) {
687                         ret = -ENOMEM;
688                         goto cleanup;
689                 }
690                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
691                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
692         }
693         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
694         BUG_ON(ret < 0);
695
696         return rec;
697
698 cleanup:
699         if (!list_empty(&rec->backrefs))
700                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
701                         list_del(&orig->list);
702                         free(orig);
703                 }
704
705         if (!list_empty(&rec->orphan_extents))
706                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
707                         list_del(&orig->list);
708                         free(orig);
709                 }
710
711         free(rec);
712
713         return ERR_PTR(ret);
714 }
715
716 static void print_orphan_data_extents(struct list_head *orphan_extents,
717                                       u64 objectid)
718 {
719         struct orphan_data_extent *orphan;
720
721         if (list_empty(orphan_extents))
722                 return;
723         printf("The following data extent is lost in tree %llu:\n",
724                objectid);
725         list_for_each_entry(orphan, orphan_extents, list) {
726                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
727                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
728                        orphan->disk_len);
729         }
730 }
731
732 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
733 {
734         u64 root_objectid = root->root_key.objectid;
735         int errors = rec->errors;
736
737         if (!errors)
738                 return;
739         /* reloc root errors, we print its corresponding fs root objectid*/
740         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
741                 root_objectid = root->root_key.offset;
742                 fprintf(stderr, "reloc");
743         }
744         fprintf(stderr, "root %llu inode %llu errors %x",
745                 (unsigned long long) root_objectid,
746                 (unsigned long long) rec->ino, rec->errors);
747
748         if (errors & I_ERR_NO_INODE_ITEM)
749                 fprintf(stderr, ", no inode item");
750         if (errors & I_ERR_NO_ORPHAN_ITEM)
751                 fprintf(stderr, ", no orphan item");
752         if (errors & I_ERR_DUP_INODE_ITEM)
753                 fprintf(stderr, ", dup inode item");
754         if (errors & I_ERR_DUP_DIR_INDEX)
755                 fprintf(stderr, ", dup dir index");
756         if (errors & I_ERR_ODD_DIR_ITEM)
757                 fprintf(stderr, ", odd dir item");
758         if (errors & I_ERR_ODD_FILE_EXTENT)
759                 fprintf(stderr, ", odd file extent");
760         if (errors & I_ERR_BAD_FILE_EXTENT)
761                 fprintf(stderr, ", bad file extent");
762         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
763                 fprintf(stderr, ", file extent overlap");
764         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
765                 fprintf(stderr, ", file extent discount");
766         if (errors & I_ERR_DIR_ISIZE_WRONG)
767                 fprintf(stderr, ", dir isize wrong");
768         if (errors & I_ERR_FILE_NBYTES_WRONG)
769                 fprintf(stderr, ", nbytes wrong");
770         if (errors & I_ERR_ODD_CSUM_ITEM)
771                 fprintf(stderr, ", odd csum item");
772         if (errors & I_ERR_SOME_CSUM_MISSING)
773                 fprintf(stderr, ", some csum missing");
774         if (errors & I_ERR_LINK_COUNT_WRONG)
775                 fprintf(stderr, ", link count wrong");
776         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
777                 fprintf(stderr, ", orphan file extent");
778         fprintf(stderr, "\n");
779         /* Print the orphan extents if needed */
780         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
781                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
782
783         /* Print the holes if needed */
784         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
785                 struct file_extent_hole *hole;
786                 struct rb_node *node;
787                 int found = 0;
788
789                 node = rb_first(&rec->holes);
790                 fprintf(stderr, "Found file extent holes:\n");
791                 while (node) {
792                         found = 1;
793                         hole = rb_entry(node, struct file_extent_hole, node);
794                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
795                                 hole->start, hole->len);
796                         node = rb_next(node);
797                 }
798                 if (!found)
799                         fprintf(stderr, "\tstart: 0, len: %llu\n",
800                                 round_up(rec->isize, root->sectorsize));
801         }
802 }
803
804 static void print_ref_error(int errors)
805 {
806         if (errors & REF_ERR_NO_DIR_ITEM)
807                 fprintf(stderr, ", no dir item");
808         if (errors & REF_ERR_NO_DIR_INDEX)
809                 fprintf(stderr, ", no dir index");
810         if (errors & REF_ERR_NO_INODE_REF)
811                 fprintf(stderr, ", no inode ref");
812         if (errors & REF_ERR_DUP_DIR_ITEM)
813                 fprintf(stderr, ", dup dir item");
814         if (errors & REF_ERR_DUP_DIR_INDEX)
815                 fprintf(stderr, ", dup dir index");
816         if (errors & REF_ERR_DUP_INODE_REF)
817                 fprintf(stderr, ", dup inode ref");
818         if (errors & REF_ERR_INDEX_UNMATCH)
819                 fprintf(stderr, ", index mismatch");
820         if (errors & REF_ERR_FILETYPE_UNMATCH)
821                 fprintf(stderr, ", filetype mismatch");
822         if (errors & REF_ERR_NAME_TOO_LONG)
823                 fprintf(stderr, ", name too long");
824         if (errors & REF_ERR_NO_ROOT_REF)
825                 fprintf(stderr, ", no root ref");
826         if (errors & REF_ERR_NO_ROOT_BACKREF)
827                 fprintf(stderr, ", no root backref");
828         if (errors & REF_ERR_DUP_ROOT_REF)
829                 fprintf(stderr, ", dup root ref");
830         if (errors & REF_ERR_DUP_ROOT_BACKREF)
831                 fprintf(stderr, ", dup root backref");
832         fprintf(stderr, "\n");
833 }
834
835 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
836                                           u64 ino, int mod)
837 {
838         struct ptr_node *node;
839         struct cache_extent *cache;
840         struct inode_record *rec = NULL;
841         int ret;
842
843         cache = lookup_cache_extent(inode_cache, ino, 1);
844         if (cache) {
845                 node = container_of(cache, struct ptr_node, cache);
846                 rec = node->data;
847                 if (mod && rec->refs > 1) {
848                         node->data = clone_inode_rec(rec);
849                         if (IS_ERR(node->data))
850                                 return node->data;
851                         rec->refs--;
852                         rec = node->data;
853                 }
854         } else if (mod) {
855                 rec = calloc(1, sizeof(*rec));
856                 if (!rec)
857                         return ERR_PTR(-ENOMEM);
858                 rec->ino = ino;
859                 rec->extent_start = (u64)-1;
860                 rec->refs = 1;
861                 INIT_LIST_HEAD(&rec->backrefs);
862                 INIT_LIST_HEAD(&rec->orphan_extents);
863                 rec->holes = RB_ROOT;
864
865                 node = malloc(sizeof(*node));
866                 if (!node) {
867                         free(rec);
868                         return ERR_PTR(-ENOMEM);
869                 }
870                 node->cache.start = ino;
871                 node->cache.size = 1;
872                 node->data = rec;
873
874                 if (ino == BTRFS_FREE_INO_OBJECTID)
875                         rec->found_link = 1;
876
877                 ret = insert_cache_extent(inode_cache, &node->cache);
878                 if (ret)
879                         return ERR_PTR(-EEXIST);
880         }
881         return rec;
882 }
883
884 static void free_orphan_data_extents(struct list_head *orphan_extents)
885 {
886         struct orphan_data_extent *orphan;
887
888         while (!list_empty(orphan_extents)) {
889                 orphan = list_entry(orphan_extents->next,
890                                     struct orphan_data_extent, list);
891                 list_del(&orphan->list);
892                 free(orphan);
893         }
894 }
895
896 static void free_inode_rec(struct inode_record *rec)
897 {
898         struct inode_backref *backref;
899
900         if (--rec->refs > 0)
901                 return;
902
903         while (!list_empty(&rec->backrefs)) {
904                 backref = to_inode_backref(rec->backrefs.next);
905                 list_del(&backref->list);
906                 free(backref);
907         }
908         free_orphan_data_extents(&rec->orphan_extents);
909         free_file_extent_holes(&rec->holes);
910         free(rec);
911 }
912
913 static int can_free_inode_rec(struct inode_record *rec)
914 {
915         if (!rec->errors && rec->checked && rec->found_inode_item &&
916             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
917                 return 1;
918         return 0;
919 }
920
921 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
922                                  struct inode_record *rec)
923 {
924         struct cache_extent *cache;
925         struct inode_backref *tmp, *backref;
926         struct ptr_node *node;
927         unsigned char filetype;
928
929         if (!rec->found_inode_item)
930                 return;
931
932         filetype = imode_to_type(rec->imode);
933         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
934                 if (backref->found_dir_item && backref->found_dir_index) {
935                         if (backref->filetype != filetype)
936                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
937                         if (!backref->errors && backref->found_inode_ref &&
938                             rec->nlink == rec->found_link) {
939                                 list_del(&backref->list);
940                                 free(backref);
941                         }
942                 }
943         }
944
945         if (!rec->checked || rec->merging)
946                 return;
947
948         if (S_ISDIR(rec->imode)) {
949                 if (rec->found_size != rec->isize)
950                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
951                 if (rec->found_file_extent)
952                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
953         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
954                 if (rec->found_dir_item)
955                         rec->errors |= I_ERR_ODD_DIR_ITEM;
956                 if (rec->found_size != rec->nbytes)
957                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
958                 if (rec->nlink > 0 && !no_holes &&
959                     (rec->extent_end < rec->isize ||
960                      first_extent_gap(&rec->holes) < rec->isize))
961                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
962         }
963
964         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
965                 if (rec->found_csum_item && rec->nodatasum)
966                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
967                 if (rec->some_csum_missing && !rec->nodatasum)
968                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
969         }
970
971         BUG_ON(rec->refs != 1);
972         if (can_free_inode_rec(rec)) {
973                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
974                 node = container_of(cache, struct ptr_node, cache);
975                 BUG_ON(node->data != rec);
976                 remove_cache_extent(inode_cache, &node->cache);
977                 free(node);
978                 free_inode_rec(rec);
979         }
980 }
981
982 static int check_orphan_item(struct btrfs_root *root, u64 ino)
983 {
984         struct btrfs_path path;
985         struct btrfs_key key;
986         int ret;
987
988         key.objectid = BTRFS_ORPHAN_OBJECTID;
989         key.type = BTRFS_ORPHAN_ITEM_KEY;
990         key.offset = ino;
991
992         btrfs_init_path(&path);
993         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
994         btrfs_release_path(&path);
995         if (ret > 0)
996                 ret = -ENOENT;
997         return ret;
998 }
999
1000 static int process_inode_item(struct extent_buffer *eb,
1001                               int slot, struct btrfs_key *key,
1002                               struct shared_node *active_node)
1003 {
1004         struct inode_record *rec;
1005         struct btrfs_inode_item *item;
1006
1007         rec = active_node->current;
1008         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1009         if (rec->found_inode_item) {
1010                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1011                 return 1;
1012         }
1013         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1014         rec->nlink = btrfs_inode_nlink(eb, item);
1015         rec->isize = btrfs_inode_size(eb, item);
1016         rec->nbytes = btrfs_inode_nbytes(eb, item);
1017         rec->imode = btrfs_inode_mode(eb, item);
1018         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1019                 rec->nodatasum = 1;
1020         rec->found_inode_item = 1;
1021         if (rec->nlink == 0)
1022                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1023         maybe_free_inode_rec(&active_node->inode_cache, rec);
1024         return 0;
1025 }
1026
1027 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1028                                                 const char *name,
1029                                                 int namelen, u64 dir)
1030 {
1031         struct inode_backref *backref;
1032
1033         list_for_each_entry(backref, &rec->backrefs, list) {
1034                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1035                         break;
1036                 if (backref->dir != dir || backref->namelen != namelen)
1037                         continue;
1038                 if (memcmp(name, backref->name, namelen))
1039                         continue;
1040                 return backref;
1041         }
1042
1043         backref = malloc(sizeof(*backref) + namelen + 1);
1044         if (!backref)
1045                 return NULL;
1046         memset(backref, 0, sizeof(*backref));
1047         backref->dir = dir;
1048         backref->namelen = namelen;
1049         memcpy(backref->name, name, namelen);
1050         backref->name[namelen] = '\0';
1051         list_add_tail(&backref->list, &rec->backrefs);
1052         return backref;
1053 }
1054
1055 static int add_inode_backref(struct cache_tree *inode_cache,
1056                              u64 ino, u64 dir, u64 index,
1057                              const char *name, int namelen,
1058                              int filetype, int itemtype, int errors)
1059 {
1060         struct inode_record *rec;
1061         struct inode_backref *backref;
1062
1063         rec = get_inode_rec(inode_cache, ino, 1);
1064         BUG_ON(IS_ERR(rec));
1065         backref = get_inode_backref(rec, name, namelen, dir);
1066         BUG_ON(!backref);
1067         if (errors)
1068                 backref->errors |= errors;
1069         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1070                 if (backref->found_dir_index)
1071                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1072                 if (backref->found_inode_ref && backref->index != index)
1073                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1074                 if (backref->found_dir_item && backref->filetype != filetype)
1075                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1076
1077                 backref->index = index;
1078                 backref->filetype = filetype;
1079                 backref->found_dir_index = 1;
1080         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1081                 rec->found_link++;
1082                 if (backref->found_dir_item)
1083                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1084                 if (backref->found_dir_index && backref->filetype != filetype)
1085                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1086
1087                 backref->filetype = filetype;
1088                 backref->found_dir_item = 1;
1089         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1090                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1091                 if (backref->found_inode_ref)
1092                         backref->errors |= REF_ERR_DUP_INODE_REF;
1093                 if (backref->found_dir_index && backref->index != index)
1094                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1095                 else
1096                         backref->index = index;
1097
1098                 backref->ref_type = itemtype;
1099                 backref->found_inode_ref = 1;
1100         } else {
1101                 BUG_ON(1);
1102         }
1103
1104         maybe_free_inode_rec(inode_cache, rec);
1105         return 0;
1106 }
1107
1108 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1109                             struct cache_tree *dst_cache)
1110 {
1111         struct inode_backref *backref;
1112         u32 dir_count = 0;
1113         int ret = 0;
1114
1115         dst->merging = 1;
1116         list_for_each_entry(backref, &src->backrefs, list) {
1117                 if (backref->found_dir_index) {
1118                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1119                                         backref->index, backref->name,
1120                                         backref->namelen, backref->filetype,
1121                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1122                 }
1123                 if (backref->found_dir_item) {
1124                         dir_count++;
1125                         add_inode_backref(dst_cache, dst->ino,
1126                                         backref->dir, 0, backref->name,
1127                                         backref->namelen, backref->filetype,
1128                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1129                 }
1130                 if (backref->found_inode_ref) {
1131                         add_inode_backref(dst_cache, dst->ino,
1132                                         backref->dir, backref->index,
1133                                         backref->name, backref->namelen, 0,
1134                                         backref->ref_type, backref->errors);
1135                 }
1136         }
1137
1138         if (src->found_dir_item)
1139                 dst->found_dir_item = 1;
1140         if (src->found_file_extent)
1141                 dst->found_file_extent = 1;
1142         if (src->found_csum_item)
1143                 dst->found_csum_item = 1;
1144         if (src->some_csum_missing)
1145                 dst->some_csum_missing = 1;
1146         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1147                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1148                 if (ret < 0)
1149                         return ret;
1150         }
1151
1152         BUG_ON(src->found_link < dir_count);
1153         dst->found_link += src->found_link - dir_count;
1154         dst->found_size += src->found_size;
1155         if (src->extent_start != (u64)-1) {
1156                 if (dst->extent_start == (u64)-1) {
1157                         dst->extent_start = src->extent_start;
1158                         dst->extent_end = src->extent_end;
1159                 } else {
1160                         if (dst->extent_end > src->extent_start)
1161                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1162                         else if (dst->extent_end < src->extent_start) {
1163                                 ret = add_file_extent_hole(&dst->holes,
1164                                         dst->extent_end,
1165                                         src->extent_start - dst->extent_end);
1166                         }
1167                         if (dst->extent_end < src->extent_end)
1168                                 dst->extent_end = src->extent_end;
1169                 }
1170         }
1171
1172         dst->errors |= src->errors;
1173         if (src->found_inode_item) {
1174                 if (!dst->found_inode_item) {
1175                         dst->nlink = src->nlink;
1176                         dst->isize = src->isize;
1177                         dst->nbytes = src->nbytes;
1178                         dst->imode = src->imode;
1179                         dst->nodatasum = src->nodatasum;
1180                         dst->found_inode_item = 1;
1181                 } else {
1182                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1183                 }
1184         }
1185         dst->merging = 0;
1186
1187         return 0;
1188 }
1189
1190 static int splice_shared_node(struct shared_node *src_node,
1191                               struct shared_node *dst_node)
1192 {
1193         struct cache_extent *cache;
1194         struct ptr_node *node, *ins;
1195         struct cache_tree *src, *dst;
1196         struct inode_record *rec, *conflict;
1197         u64 current_ino = 0;
1198         int splice = 0;
1199         int ret;
1200
1201         if (--src_node->refs == 0)
1202                 splice = 1;
1203         if (src_node->current)
1204                 current_ino = src_node->current->ino;
1205
1206         src = &src_node->root_cache;
1207         dst = &dst_node->root_cache;
1208 again:
1209         cache = search_cache_extent(src, 0);
1210         while (cache) {
1211                 node = container_of(cache, struct ptr_node, cache);
1212                 rec = node->data;
1213                 cache = next_cache_extent(cache);
1214
1215                 if (splice) {
1216                         remove_cache_extent(src, &node->cache);
1217                         ins = node;
1218                 } else {
1219                         ins = malloc(sizeof(*ins));
1220                         BUG_ON(!ins);
1221                         ins->cache.start = node->cache.start;
1222                         ins->cache.size = node->cache.size;
1223                         ins->data = rec;
1224                         rec->refs++;
1225                 }
1226                 ret = insert_cache_extent(dst, &ins->cache);
1227                 if (ret == -EEXIST) {
1228                         conflict = get_inode_rec(dst, rec->ino, 1);
1229                         BUG_ON(IS_ERR(conflict));
1230                         merge_inode_recs(rec, conflict, dst);
1231                         if (rec->checked) {
1232                                 conflict->checked = 1;
1233                                 if (dst_node->current == conflict)
1234                                         dst_node->current = NULL;
1235                         }
1236                         maybe_free_inode_rec(dst, conflict);
1237                         free_inode_rec(rec);
1238                         free(ins);
1239                 } else {
1240                         BUG_ON(ret);
1241                 }
1242         }
1243
1244         if (src == &src_node->root_cache) {
1245                 src = &src_node->inode_cache;
1246                 dst = &dst_node->inode_cache;
1247                 goto again;
1248         }
1249
1250         if (current_ino > 0 && (!dst_node->current ||
1251             current_ino > dst_node->current->ino)) {
1252                 if (dst_node->current) {
1253                         dst_node->current->checked = 1;
1254                         maybe_free_inode_rec(dst, dst_node->current);
1255                 }
1256                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1257                 BUG_ON(IS_ERR(dst_node->current));
1258         }
1259         return 0;
1260 }
1261
1262 static void free_inode_ptr(struct cache_extent *cache)
1263 {
1264         struct ptr_node *node;
1265         struct inode_record *rec;
1266
1267         node = container_of(cache, struct ptr_node, cache);
1268         rec = node->data;
1269         free_inode_rec(rec);
1270         free(node);
1271 }
1272
1273 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1274
1275 static struct shared_node *find_shared_node(struct cache_tree *shared,
1276                                             u64 bytenr)
1277 {
1278         struct cache_extent *cache;
1279         struct shared_node *node;
1280
1281         cache = lookup_cache_extent(shared, bytenr, 1);
1282         if (cache) {
1283                 node = container_of(cache, struct shared_node, cache);
1284                 return node;
1285         }
1286         return NULL;
1287 }
1288
1289 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1290 {
1291         int ret;
1292         struct shared_node *node;
1293
1294         node = calloc(1, sizeof(*node));
1295         if (!node)
1296                 return -ENOMEM;
1297         node->cache.start = bytenr;
1298         node->cache.size = 1;
1299         cache_tree_init(&node->root_cache);
1300         cache_tree_init(&node->inode_cache);
1301         node->refs = refs;
1302
1303         ret = insert_cache_extent(shared, &node->cache);
1304
1305         return ret;
1306 }
1307
1308 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1309                              struct walk_control *wc, int level)
1310 {
1311         struct shared_node *node;
1312         struct shared_node *dest;
1313         int ret;
1314
1315         if (level == wc->active_node)
1316                 return 0;
1317
1318         BUG_ON(wc->active_node <= level);
1319         node = find_shared_node(&wc->shared, bytenr);
1320         if (!node) {
1321                 ret = add_shared_node(&wc->shared, bytenr, refs);
1322                 BUG_ON(ret);
1323                 node = find_shared_node(&wc->shared, bytenr);
1324                 wc->nodes[level] = node;
1325                 wc->active_node = level;
1326                 return 0;
1327         }
1328
1329         if (wc->root_level == wc->active_node &&
1330             btrfs_root_refs(&root->root_item) == 0) {
1331                 if (--node->refs == 0) {
1332                         free_inode_recs_tree(&node->root_cache);
1333                         free_inode_recs_tree(&node->inode_cache);
1334                         remove_cache_extent(&wc->shared, &node->cache);
1335                         free(node);
1336                 }
1337                 return 1;
1338         }
1339
1340         dest = wc->nodes[wc->active_node];
1341         splice_shared_node(node, dest);
1342         if (node->refs == 0) {
1343                 remove_cache_extent(&wc->shared, &node->cache);
1344                 free(node);
1345         }
1346         return 1;
1347 }
1348
1349 static int leave_shared_node(struct btrfs_root *root,
1350                              struct walk_control *wc, int level)
1351 {
1352         struct shared_node *node;
1353         struct shared_node *dest;
1354         int i;
1355
1356         if (level == wc->root_level)
1357                 return 0;
1358
1359         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1360                 if (wc->nodes[i])
1361                         break;
1362         }
1363         BUG_ON(i >= BTRFS_MAX_LEVEL);
1364
1365         node = wc->nodes[wc->active_node];
1366         wc->nodes[wc->active_node] = NULL;
1367         wc->active_node = i;
1368
1369         dest = wc->nodes[wc->active_node];
1370         if (wc->active_node < wc->root_level ||
1371             btrfs_root_refs(&root->root_item) > 0) {
1372                 BUG_ON(node->refs <= 1);
1373                 splice_shared_node(node, dest);
1374         } else {
1375                 BUG_ON(node->refs < 2);
1376                 node->refs--;
1377         }
1378         return 0;
1379 }
1380
1381 /*
1382  * Returns:
1383  * < 0 - on error
1384  * 1   - if the root with id child_root_id is a child of root parent_root_id
1385  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1386  *       has other root(s) as parent(s)
1387  * 2   - if the root child_root_id doesn't have any parent roots
1388  */
1389 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1390                          u64 child_root_id)
1391 {
1392         struct btrfs_path path;
1393         struct btrfs_key key;
1394         struct extent_buffer *leaf;
1395         int has_parent = 0;
1396         int ret;
1397
1398         btrfs_init_path(&path);
1399
1400         key.objectid = parent_root_id;
1401         key.type = BTRFS_ROOT_REF_KEY;
1402         key.offset = child_root_id;
1403         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1404                                 0, 0);
1405         if (ret < 0)
1406                 return ret;
1407         btrfs_release_path(&path);
1408         if (!ret)
1409                 return 1;
1410
1411         key.objectid = child_root_id;
1412         key.type = BTRFS_ROOT_BACKREF_KEY;
1413         key.offset = 0;
1414         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1415                                 0, 0);
1416         if (ret < 0)
1417                 goto out;
1418
1419         while (1) {
1420                 leaf = path.nodes[0];
1421                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1422                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1423                         if (ret)
1424                                 break;
1425                         leaf = path.nodes[0];
1426                 }
1427
1428                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1429                 if (key.objectid != child_root_id ||
1430                     key.type != BTRFS_ROOT_BACKREF_KEY)
1431                         break;
1432
1433                 has_parent = 1;
1434
1435                 if (key.offset == parent_root_id) {
1436                         btrfs_release_path(&path);
1437                         return 1;
1438                 }
1439
1440                 path.slots[0]++;
1441         }
1442 out:
1443         btrfs_release_path(&path);
1444         if (ret < 0)
1445                 return ret;
1446         return has_parent ? 0 : 2;
1447 }
1448
1449 static int process_dir_item(struct btrfs_root *root,
1450                             struct extent_buffer *eb,
1451                             int slot, struct btrfs_key *key,
1452                             struct shared_node *active_node)
1453 {
1454         u32 total;
1455         u32 cur = 0;
1456         u32 len;
1457         u32 name_len;
1458         u32 data_len;
1459         int error;
1460         int nritems = 0;
1461         int filetype;
1462         struct btrfs_dir_item *di;
1463         struct inode_record *rec;
1464         struct cache_tree *root_cache;
1465         struct cache_tree *inode_cache;
1466         struct btrfs_key location;
1467         char namebuf[BTRFS_NAME_LEN];
1468
1469         root_cache = &active_node->root_cache;
1470         inode_cache = &active_node->inode_cache;
1471         rec = active_node->current;
1472         rec->found_dir_item = 1;
1473
1474         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1475         total = btrfs_item_size_nr(eb, slot);
1476         while (cur < total) {
1477                 nritems++;
1478                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1479                 name_len = btrfs_dir_name_len(eb, di);
1480                 data_len = btrfs_dir_data_len(eb, di);
1481                 filetype = btrfs_dir_type(eb, di);
1482
1483                 rec->found_size += name_len;
1484                 if (name_len <= BTRFS_NAME_LEN) {
1485                         len = name_len;
1486                         error = 0;
1487                 } else {
1488                         len = BTRFS_NAME_LEN;
1489                         error = REF_ERR_NAME_TOO_LONG;
1490                 }
1491                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1492
1493                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1494                         add_inode_backref(inode_cache, location.objectid,
1495                                           key->objectid, key->offset, namebuf,
1496                                           len, filetype, key->type, error);
1497                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1498                         add_inode_backref(root_cache, location.objectid,
1499                                           key->objectid, key->offset,
1500                                           namebuf, len, filetype,
1501                                           key->type, error);
1502                 } else {
1503                         fprintf(stderr, "invalid location in dir item %u\n",
1504                                 location.type);
1505                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1506                                           key->objectid, key->offset, namebuf,
1507                                           len, filetype, key->type, error);
1508                 }
1509
1510                 len = sizeof(*di) + name_len + data_len;
1511                 di = (struct btrfs_dir_item *)((char *)di + len);
1512                 cur += len;
1513         }
1514         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1515                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1516
1517         return 0;
1518 }
1519
1520 static int process_inode_ref(struct extent_buffer *eb,
1521                              int slot, struct btrfs_key *key,
1522                              struct shared_node *active_node)
1523 {
1524         u32 total;
1525         u32 cur = 0;
1526         u32 len;
1527         u32 name_len;
1528         u64 index;
1529         int error;
1530         struct cache_tree *inode_cache;
1531         struct btrfs_inode_ref *ref;
1532         char namebuf[BTRFS_NAME_LEN];
1533
1534         inode_cache = &active_node->inode_cache;
1535
1536         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1537         total = btrfs_item_size_nr(eb, slot);
1538         while (cur < total) {
1539                 name_len = btrfs_inode_ref_name_len(eb, ref);
1540                 index = btrfs_inode_ref_index(eb, ref);
1541                 if (name_len <= BTRFS_NAME_LEN) {
1542                         len = name_len;
1543                         error = 0;
1544                 } else {
1545                         len = BTRFS_NAME_LEN;
1546                         error = REF_ERR_NAME_TOO_LONG;
1547                 }
1548                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1549                 add_inode_backref(inode_cache, key->objectid, key->offset,
1550                                   index, namebuf, len, 0, key->type, error);
1551
1552                 len = sizeof(*ref) + name_len;
1553                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1554                 cur += len;
1555         }
1556         return 0;
1557 }
1558
1559 static int process_inode_extref(struct extent_buffer *eb,
1560                                 int slot, struct btrfs_key *key,
1561                                 struct shared_node *active_node)
1562 {
1563         u32 total;
1564         u32 cur = 0;
1565         u32 len;
1566         u32 name_len;
1567         u64 index;
1568         u64 parent;
1569         int error;
1570         struct cache_tree *inode_cache;
1571         struct btrfs_inode_extref *extref;
1572         char namebuf[BTRFS_NAME_LEN];
1573
1574         inode_cache = &active_node->inode_cache;
1575
1576         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1577         total = btrfs_item_size_nr(eb, slot);
1578         while (cur < total) {
1579                 name_len = btrfs_inode_extref_name_len(eb, extref);
1580                 index = btrfs_inode_extref_index(eb, extref);
1581                 parent = btrfs_inode_extref_parent(eb, extref);
1582                 if (name_len <= BTRFS_NAME_LEN) {
1583                         len = name_len;
1584                         error = 0;
1585                 } else {
1586                         len = BTRFS_NAME_LEN;
1587                         error = REF_ERR_NAME_TOO_LONG;
1588                 }
1589                 read_extent_buffer(eb, namebuf,
1590                                    (unsigned long)(extref + 1), len);
1591                 add_inode_backref(inode_cache, key->objectid, parent,
1592                                   index, namebuf, len, 0, key->type, error);
1593
1594                 len = sizeof(*extref) + name_len;
1595                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1596                 cur += len;
1597         }
1598         return 0;
1599
1600 }
1601
1602 static int count_csum_range(struct btrfs_root *root, u64 start,
1603                             u64 len, u64 *found)
1604 {
1605         struct btrfs_key key;
1606         struct btrfs_path path;
1607         struct extent_buffer *leaf;
1608         int ret;
1609         size_t size;
1610         *found = 0;
1611         u64 csum_end;
1612         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1613
1614         btrfs_init_path(&path);
1615
1616         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1617         key.offset = start;
1618         key.type = BTRFS_EXTENT_CSUM_KEY;
1619
1620         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1621                                 &key, &path, 0, 0);
1622         if (ret < 0)
1623                 goto out;
1624         if (ret > 0 && path.slots[0] > 0) {
1625                 leaf = path.nodes[0];
1626                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1627                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1628                     key.type == BTRFS_EXTENT_CSUM_KEY)
1629                         path.slots[0]--;
1630         }
1631
1632         while (len > 0) {
1633                 leaf = path.nodes[0];
1634                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1635                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1636                         if (ret > 0)
1637                                 break;
1638                         else if (ret < 0)
1639                                 goto out;
1640                         leaf = path.nodes[0];
1641                 }
1642
1643                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1644                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1645                     key.type != BTRFS_EXTENT_CSUM_KEY)
1646                         break;
1647
1648                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1649                 if (key.offset >= start + len)
1650                         break;
1651
1652                 if (key.offset > start)
1653                         start = key.offset;
1654
1655                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1656                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1657                 if (csum_end > start) {
1658                         size = min(csum_end - start, len);
1659                         len -= size;
1660                         start += size;
1661                         *found += size;
1662                 }
1663
1664                 path.slots[0]++;
1665         }
1666 out:
1667         btrfs_release_path(&path);
1668         if (ret < 0)
1669                 return ret;
1670         return 0;
1671 }
1672
1673 static int process_file_extent(struct btrfs_root *root,
1674                                 struct extent_buffer *eb,
1675                                 int slot, struct btrfs_key *key,
1676                                 struct shared_node *active_node)
1677 {
1678         struct inode_record *rec;
1679         struct btrfs_file_extent_item *fi;
1680         u64 num_bytes = 0;
1681         u64 disk_bytenr = 0;
1682         u64 extent_offset = 0;
1683         u64 mask = root->sectorsize - 1;
1684         int extent_type;
1685         int ret;
1686
1687         rec = active_node->current;
1688         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1689         rec->found_file_extent = 1;
1690
1691         if (rec->extent_start == (u64)-1) {
1692                 rec->extent_start = key->offset;
1693                 rec->extent_end = key->offset;
1694         }
1695
1696         if (rec->extent_end > key->offset)
1697                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1698         else if (rec->extent_end < key->offset) {
1699                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1700                                            key->offset - rec->extent_end);
1701                 if (ret < 0)
1702                         return ret;
1703         }
1704
1705         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1706         extent_type = btrfs_file_extent_type(eb, fi);
1707
1708         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1709                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1710                 if (num_bytes == 0)
1711                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1712                 rec->found_size += num_bytes;
1713                 num_bytes = (num_bytes + mask) & ~mask;
1714         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1715                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1716                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1717                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1718                 extent_offset = btrfs_file_extent_offset(eb, fi);
1719                 if (num_bytes == 0 || (num_bytes & mask))
1720                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1721                 if (num_bytes + extent_offset >
1722                     btrfs_file_extent_ram_bytes(eb, fi))
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1725                     (btrfs_file_extent_compression(eb, fi) ||
1726                      btrfs_file_extent_encryption(eb, fi) ||
1727                      btrfs_file_extent_other_encoding(eb, fi)))
1728                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1729                 if (disk_bytenr > 0)
1730                         rec->found_size += num_bytes;
1731         } else {
1732                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733         }
1734         rec->extent_end = key->offset + num_bytes;
1735
1736         /*
1737          * The data reloc tree will copy full extents into its inode and then
1738          * copy the corresponding csums.  Because the extent it copied could be
1739          * a preallocated extent that hasn't been written to yet there may be no
1740          * csums to copy, ergo we won't have csums for our file extent.  This is
1741          * ok so just don't bother checking csums if the inode belongs to the
1742          * data reloc tree.
1743          */
1744         if (disk_bytenr > 0 &&
1745             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1746                 u64 found;
1747                 if (btrfs_file_extent_compression(eb, fi))
1748                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1749                 else
1750                         disk_bytenr += extent_offset;
1751
1752                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1753                 if (ret < 0)
1754                         return ret;
1755                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1756                         if (found > 0)
1757                                 rec->found_csum_item = 1;
1758                         if (found < num_bytes)
1759                                 rec->some_csum_missing = 1;
1760                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1761                         if (found > 0)
1762                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1763                 }
1764         }
1765         return 0;
1766 }
1767
1768 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1769                             struct walk_control *wc)
1770 {
1771         struct btrfs_key key;
1772         u32 nritems;
1773         int i;
1774         int ret = 0;
1775         struct cache_tree *inode_cache;
1776         struct shared_node *active_node;
1777
1778         if (wc->root_level == wc->active_node &&
1779             btrfs_root_refs(&root->root_item) == 0)
1780                 return 0;
1781
1782         active_node = wc->nodes[wc->active_node];
1783         inode_cache = &active_node->inode_cache;
1784         nritems = btrfs_header_nritems(eb);
1785         for (i = 0; i < nritems; i++) {
1786                 btrfs_item_key_to_cpu(eb, &key, i);
1787
1788                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1789                         continue;
1790                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1791                         continue;
1792
1793                 if (active_node->current == NULL ||
1794                     active_node->current->ino < key.objectid) {
1795                         if (active_node->current) {
1796                                 active_node->current->checked = 1;
1797                                 maybe_free_inode_rec(inode_cache,
1798                                                      active_node->current);
1799                         }
1800                         active_node->current = get_inode_rec(inode_cache,
1801                                                              key.objectid, 1);
1802                         BUG_ON(IS_ERR(active_node->current));
1803                 }
1804                 switch (key.type) {
1805                 case BTRFS_DIR_ITEM_KEY:
1806                 case BTRFS_DIR_INDEX_KEY:
1807                         ret = process_dir_item(root, eb, i, &key, active_node);
1808                         break;
1809                 case BTRFS_INODE_REF_KEY:
1810                         ret = process_inode_ref(eb, i, &key, active_node);
1811                         break;
1812                 case BTRFS_INODE_EXTREF_KEY:
1813                         ret = process_inode_extref(eb, i, &key, active_node);
1814                         break;
1815                 case BTRFS_INODE_ITEM_KEY:
1816                         ret = process_inode_item(eb, i, &key, active_node);
1817                         break;
1818                 case BTRFS_EXTENT_DATA_KEY:
1819                         ret = process_file_extent(root, eb, i, &key,
1820                                                   active_node);
1821                         break;
1822                 default:
1823                         break;
1824                 };
1825         }
1826         return ret;
1827 }
1828
1829 static void reada_walk_down(struct btrfs_root *root,
1830                             struct extent_buffer *node, int slot)
1831 {
1832         u64 bytenr;
1833         u64 ptr_gen;
1834         u32 nritems;
1835         u32 blocksize;
1836         int i;
1837         int level;
1838
1839         level = btrfs_header_level(node);
1840         if (level != 1)
1841                 return;
1842
1843         nritems = btrfs_header_nritems(node);
1844         blocksize = root->nodesize;
1845         for (i = slot; i < nritems; i++) {
1846                 bytenr = btrfs_node_blockptr(node, i);
1847                 ptr_gen = btrfs_node_ptr_generation(node, i);
1848                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1849         }
1850 }
1851
1852 /*
1853  * Check the child node/leaf by the following condition:
1854  * 1. the first item key of the node/leaf should be the same with the one
1855  *    in parent.
1856  * 2. block in parent node should match the child node/leaf.
1857  * 3. generation of parent node and child's header should be consistent.
1858  *
1859  * Or the child node/leaf pointed by the key in parent is not valid.
1860  *
1861  * We hope to check leaf owner too, but since subvol may share leaves,
1862  * which makes leaf owner check not so strong, key check should be
1863  * sufficient enough for that case.
1864  */
1865 static int check_child_node(struct btrfs_root *root,
1866                             struct extent_buffer *parent, int slot,
1867                             struct extent_buffer *child)
1868 {
1869         struct btrfs_key parent_key;
1870         struct btrfs_key child_key;
1871         int ret = 0;
1872
1873         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1874         if (btrfs_header_level(child) == 0)
1875                 btrfs_item_key_to_cpu(child, &child_key, 0);
1876         else
1877                 btrfs_node_key_to_cpu(child, &child_key, 0);
1878
1879         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1880                 ret = -EINVAL;
1881                 fprintf(stderr,
1882                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1883                         parent_key.objectid, parent_key.type, parent_key.offset,
1884                         child_key.objectid, child_key.type, child_key.offset);
1885         }
1886         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1887                 ret = -EINVAL;
1888                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1889                         btrfs_node_blockptr(parent, slot),
1890                         btrfs_header_bytenr(child));
1891         }
1892         if (btrfs_node_ptr_generation(parent, slot) !=
1893             btrfs_header_generation(child)) {
1894                 ret = -EINVAL;
1895                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1896                         btrfs_header_generation(child),
1897                         btrfs_node_ptr_generation(parent, slot));
1898         }
1899         return ret;
1900 }
1901
1902 struct node_refs {
1903         u64 bytenr[BTRFS_MAX_LEVEL];
1904         u64 refs[BTRFS_MAX_LEVEL];
1905 };
1906
1907 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1908                           struct walk_control *wc, int *level,
1909                           struct node_refs *nrefs)
1910 {
1911         enum btrfs_tree_block_status status;
1912         u64 bytenr;
1913         u64 ptr_gen;
1914         struct extent_buffer *next;
1915         struct extent_buffer *cur;
1916         u32 blocksize;
1917         int ret, err = 0;
1918         u64 refs;
1919
1920         WARN_ON(*level < 0);
1921         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1922
1923         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1924                 refs = nrefs->refs[*level];
1925                 ret = 0;
1926         } else {
1927                 ret = btrfs_lookup_extent_info(NULL, root,
1928                                        path->nodes[*level]->start,
1929                                        *level, 1, &refs, NULL);
1930                 if (ret < 0) {
1931                         err = ret;
1932                         goto out;
1933                 }
1934                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1935                 nrefs->refs[*level] = refs;
1936         }
1937
1938         if (refs > 1) {
1939                 ret = enter_shared_node(root, path->nodes[*level]->start,
1940                                         refs, wc, *level);
1941                 if (ret > 0) {
1942                         err = ret;
1943                         goto out;
1944                 }
1945         }
1946
1947         while (*level >= 0) {
1948                 WARN_ON(*level < 0);
1949                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1950                 cur = path->nodes[*level];
1951
1952                 if (btrfs_header_level(cur) != *level)
1953                         WARN_ON(1);
1954
1955                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1956                         break;
1957                 if (*level == 0) {
1958                         ret = process_one_leaf(root, cur, wc);
1959                         if (ret < 0)
1960                                 err = ret;
1961                         break;
1962                 }
1963                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1964                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1965                 blocksize = root->nodesize;
1966
1967                 if (bytenr == nrefs->bytenr[*level - 1]) {
1968                         refs = nrefs->refs[*level - 1];
1969                 } else {
1970                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1971                                         *level - 1, 1, &refs, NULL);
1972                         if (ret < 0) {
1973                                 refs = 0;
1974                         } else {
1975                                 nrefs->bytenr[*level - 1] = bytenr;
1976                                 nrefs->refs[*level - 1] = refs;
1977                         }
1978                 }
1979
1980                 if (refs > 1) {
1981                         ret = enter_shared_node(root, bytenr, refs,
1982                                                 wc, *level - 1);
1983                         if (ret > 0) {
1984                                 path->slots[*level]++;
1985                                 continue;
1986                         }
1987                 }
1988
1989                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1990                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1991                         free_extent_buffer(next);
1992                         reada_walk_down(root, cur, path->slots[*level]);
1993                         next = read_tree_block(root, bytenr, blocksize,
1994                                                ptr_gen);
1995                         if (!extent_buffer_uptodate(next)) {
1996                                 struct btrfs_key node_key;
1997
1998                                 btrfs_node_key_to_cpu(path->nodes[*level],
1999                                                       &node_key,
2000                                                       path->slots[*level]);
2001                                 btrfs_add_corrupt_extent_record(root->fs_info,
2002                                                 &node_key,
2003                                                 path->nodes[*level]->start,
2004                                                 root->nodesize, *level);
2005                                 err = -EIO;
2006                                 goto out;
2007                         }
2008                 }
2009
2010                 ret = check_child_node(root, cur, path->slots[*level], next);
2011                 if (ret) {
2012                         err = ret;
2013                         goto out;
2014                 }
2015
2016                 if (btrfs_is_leaf(next))
2017                         status = btrfs_check_leaf(root, NULL, next);
2018                 else
2019                         status = btrfs_check_node(root, NULL, next);
2020                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2021                         free_extent_buffer(next);
2022                         err = -EIO;
2023                         goto out;
2024                 }
2025
2026                 *level = *level - 1;
2027                 free_extent_buffer(path->nodes[*level]);
2028                 path->nodes[*level] = next;
2029                 path->slots[*level] = 0;
2030         }
2031 out:
2032         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2033         return err;
2034 }
2035
2036 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2037                         struct walk_control *wc, int *level)
2038 {
2039         int i;
2040         struct extent_buffer *leaf;
2041
2042         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2043                 leaf = path->nodes[i];
2044                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2045                         path->slots[i]++;
2046                         *level = i;
2047                         return 0;
2048                 } else {
2049                         free_extent_buffer(path->nodes[*level]);
2050                         path->nodes[*level] = NULL;
2051                         BUG_ON(*level > wc->active_node);
2052                         if (*level == wc->active_node)
2053                                 leave_shared_node(root, wc, *level);
2054                         *level = i + 1;
2055                 }
2056         }
2057         return 1;
2058 }
2059
2060 static int check_root_dir(struct inode_record *rec)
2061 {
2062         struct inode_backref *backref;
2063         int ret = -1;
2064
2065         if (!rec->found_inode_item || rec->errors)
2066                 goto out;
2067         if (rec->nlink != 1 || rec->found_link != 0)
2068                 goto out;
2069         if (list_empty(&rec->backrefs))
2070                 goto out;
2071         backref = to_inode_backref(rec->backrefs.next);
2072         if (!backref->found_inode_ref)
2073                 goto out;
2074         if (backref->index != 0 || backref->namelen != 2 ||
2075             memcmp(backref->name, "..", 2))
2076                 goto out;
2077         if (backref->found_dir_index || backref->found_dir_item)
2078                 goto out;
2079         ret = 0;
2080 out:
2081         return ret;
2082 }
2083
2084 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2085                               struct btrfs_root *root, struct btrfs_path *path,
2086                               struct inode_record *rec)
2087 {
2088         struct btrfs_inode_item *ei;
2089         struct btrfs_key key;
2090         int ret;
2091
2092         key.objectid = rec->ino;
2093         key.type = BTRFS_INODE_ITEM_KEY;
2094         key.offset = (u64)-1;
2095
2096         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2097         if (ret < 0)
2098                 goto out;
2099         if (ret) {
2100                 if (!path->slots[0]) {
2101                         ret = -ENOENT;
2102                         goto out;
2103                 }
2104                 path->slots[0]--;
2105                 ret = 0;
2106         }
2107         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2108         if (key.objectid != rec->ino) {
2109                 ret = -ENOENT;
2110                 goto out;
2111         }
2112
2113         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2114                             struct btrfs_inode_item);
2115         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2116         btrfs_mark_buffer_dirty(path->nodes[0]);
2117         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2118         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2119                root->root_key.objectid);
2120 out:
2121         btrfs_release_path(path);
2122         return ret;
2123 }
2124
2125 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2126                                     struct btrfs_root *root,
2127                                     struct btrfs_path *path,
2128                                     struct inode_record *rec)
2129 {
2130         int ret;
2131
2132         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2133         btrfs_release_path(path);
2134         if (!ret)
2135                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2136         return ret;
2137 }
2138
2139 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2140                                struct btrfs_root *root,
2141                                struct btrfs_path *path,
2142                                struct inode_record *rec)
2143 {
2144         struct btrfs_inode_item *ei;
2145         struct btrfs_key key;
2146         int ret = 0;
2147
2148         key.objectid = rec->ino;
2149         key.type = BTRFS_INODE_ITEM_KEY;
2150         key.offset = 0;
2151
2152         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2153         if (ret) {
2154                 if (ret > 0)
2155                         ret = -ENOENT;
2156                 goto out;
2157         }
2158
2159         /* Since ret == 0, no need to check anything */
2160         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2161                             struct btrfs_inode_item);
2162         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2163         btrfs_mark_buffer_dirty(path->nodes[0]);
2164         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2165         printf("reset nbytes for ino %llu root %llu\n",
2166                rec->ino, root->root_key.objectid);
2167 out:
2168         btrfs_release_path(path);
2169         return ret;
2170 }
2171
2172 static int add_missing_dir_index(struct btrfs_root *root,
2173                                  struct cache_tree *inode_cache,
2174                                  struct inode_record *rec,
2175                                  struct inode_backref *backref)
2176 {
2177         struct btrfs_path *path;
2178         struct btrfs_trans_handle *trans;
2179         struct btrfs_dir_item *dir_item;
2180         struct extent_buffer *leaf;
2181         struct btrfs_key key;
2182         struct btrfs_disk_key disk_key;
2183         struct inode_record *dir_rec;
2184         unsigned long name_ptr;
2185         u32 data_size = sizeof(*dir_item) + backref->namelen;
2186         int ret;
2187
2188         path = btrfs_alloc_path();
2189         if (!path)
2190                 return -ENOMEM;
2191
2192         trans = btrfs_start_transaction(root, 1);
2193         if (IS_ERR(trans)) {
2194                 btrfs_free_path(path);
2195                 return PTR_ERR(trans);
2196         }
2197
2198         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2199                 (unsigned long long)rec->ino);
2200         key.objectid = backref->dir;
2201         key.type = BTRFS_DIR_INDEX_KEY;
2202         key.offset = backref->index;
2203
2204         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2205         BUG_ON(ret);
2206
2207         leaf = path->nodes[0];
2208         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2209
2210         disk_key.objectid = cpu_to_le64(rec->ino);
2211         disk_key.type = BTRFS_INODE_ITEM_KEY;
2212         disk_key.offset = 0;
2213
2214         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2215         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2216         btrfs_set_dir_data_len(leaf, dir_item, 0);
2217         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2218         name_ptr = (unsigned long)(dir_item + 1);
2219         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2220         btrfs_mark_buffer_dirty(leaf);
2221         btrfs_free_path(path);
2222         btrfs_commit_transaction(trans, root);
2223
2224         backref->found_dir_index = 1;
2225         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2226         BUG_ON(IS_ERR(dir_rec));
2227         if (!dir_rec)
2228                 return 0;
2229         dir_rec->found_size += backref->namelen;
2230         if (dir_rec->found_size == dir_rec->isize &&
2231             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2232                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2233         if (dir_rec->found_size != dir_rec->isize)
2234                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2235
2236         return 0;
2237 }
2238
2239 static int delete_dir_index(struct btrfs_root *root,
2240                             struct cache_tree *inode_cache,
2241                             struct inode_record *rec,
2242                             struct inode_backref *backref)
2243 {
2244         struct btrfs_trans_handle *trans;
2245         struct btrfs_dir_item *di;
2246         struct btrfs_path *path;
2247         int ret = 0;
2248
2249         path = btrfs_alloc_path();
2250         if (!path)
2251                 return -ENOMEM;
2252
2253         trans = btrfs_start_transaction(root, 1);
2254         if (IS_ERR(trans)) {
2255                 btrfs_free_path(path);
2256                 return PTR_ERR(trans);
2257         }
2258
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2266                                     backref->name, backref->namelen,
2267                                     backref->index, -1);
2268         if (IS_ERR(di)) {
2269                 ret = PTR_ERR(di);
2270                 btrfs_free_path(path);
2271                 btrfs_commit_transaction(trans, root);
2272                 if (ret == -ENOENT)
2273                         return 0;
2274                 return ret;
2275         }
2276
2277         if (!di)
2278                 ret = btrfs_del_item(trans, root, path);
2279         else
2280                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2281         BUG_ON(ret);
2282         btrfs_free_path(path);
2283         btrfs_commit_transaction(trans, root);
2284         return ret;
2285 }
2286
2287 static int create_inode_item(struct btrfs_root *root,
2288                              struct inode_record *rec,
2289                              struct inode_backref *backref, int root_dir)
2290 {
2291         struct btrfs_trans_handle *trans;
2292         struct btrfs_inode_item inode_item;
2293         time_t now = time(NULL);
2294         int ret;
2295
2296         trans = btrfs_start_transaction(root, 1);
2297         if (IS_ERR(trans)) {
2298                 ret = PTR_ERR(trans);
2299                 return ret;
2300         }
2301
2302         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2303                 "be incomplete, please check permissions and content after "
2304                 "the fsck completes.\n", (unsigned long long)root->objectid,
2305                 (unsigned long long)rec->ino);
2306
2307         memset(&inode_item, 0, sizeof(inode_item));
2308         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2309         if (root_dir)
2310                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2311         else
2312                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2313         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2314         if (rec->found_dir_item) {
2315                 if (rec->found_file_extent)
2316                         fprintf(stderr, "root %llu inode %llu has both a dir "
2317                                 "item and extents, unsure if it is a dir or a "
2318                                 "regular file so setting it as a directory\n",
2319                                 (unsigned long long)root->objectid,
2320                                 (unsigned long long)rec->ino);
2321                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2322                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2323         } else if (!rec->found_dir_item) {
2324                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2325                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2326         }
2327         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2328         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2329         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2334         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2335
2336         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2337         BUG_ON(ret);
2338         btrfs_commit_transaction(trans, root);
2339         return 0;
2340 }
2341
2342 static int repair_inode_backrefs(struct btrfs_root *root,
2343                                  struct inode_record *rec,
2344                                  struct cache_tree *inode_cache,
2345                                  int delete)
2346 {
2347         struct inode_backref *tmp, *backref;
2348         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2349         int ret = 0;
2350         int repaired = 0;
2351
2352         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2353                 if (!delete && rec->ino == root_dirid) {
2354                         if (!rec->found_inode_item) {
2355                                 ret = create_inode_item(root, rec, backref, 1);
2356                                 if (ret)
2357                                         break;
2358                                 repaired++;
2359                         }
2360                 }
2361
2362                 /* Index 0 for root dir's are special, don't mess with it */
2363                 if (rec->ino == root_dirid && backref->index == 0)
2364                         continue;
2365
2366                 if (delete &&
2367                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2368                      (backref->found_dir_index && backref->found_inode_ref &&
2369                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2370                         ret = delete_dir_index(root, inode_cache, rec, backref);
2371                         if (ret)
2372                                 break;
2373                         repaired++;
2374                         list_del(&backref->list);
2375                         free(backref);
2376                 }
2377
2378                 if (!delete && !backref->found_dir_index &&
2379                     backref->found_dir_item && backref->found_inode_ref) {
2380                         ret = add_missing_dir_index(root, inode_cache, rec,
2381                                                     backref);
2382                         if (ret)
2383                                 break;
2384                         repaired++;
2385                         if (backref->found_dir_item &&
2386                             backref->found_dir_index &&
2387                             backref->found_dir_index) {
2388                                 if (!backref->errors &&
2389                                     backref->found_inode_ref) {
2390                                         list_del(&backref->list);
2391                                         free(backref);
2392                                 }
2393                         }
2394                 }
2395
2396                 if (!delete && (!backref->found_dir_index &&
2397                                 !backref->found_dir_item &&
2398                                 backref->found_inode_ref)) {
2399                         struct btrfs_trans_handle *trans;
2400                         struct btrfs_key location;
2401
2402                         ret = check_dir_conflict(root, backref->name,
2403                                                  backref->namelen,
2404                                                  backref->dir,
2405                                                  backref->index);
2406                         if (ret) {
2407                                 /*
2408                                  * let nlink fixing routine to handle it,
2409                                  * which can do it better.
2410                                  */
2411                                 ret = 0;
2412                                 break;
2413                         }
2414                         location.objectid = rec->ino;
2415                         location.type = BTRFS_INODE_ITEM_KEY;
2416                         location.offset = 0;
2417
2418                         trans = btrfs_start_transaction(root, 1);
2419                         if (IS_ERR(trans)) {
2420                                 ret = PTR_ERR(trans);
2421                                 break;
2422                         }
2423                         fprintf(stderr, "adding missing dir index/item pair "
2424                                 "for inode %llu\n",
2425                                 (unsigned long long)rec->ino);
2426                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2427                                                     backref->namelen,
2428                                                     backref->dir, &location,
2429                                                     imode_to_type(rec->imode),
2430                                                     backref->index);
2431                         BUG_ON(ret);
2432                         btrfs_commit_transaction(trans, root);
2433                         repaired++;
2434                 }
2435
2436                 if (!delete && (backref->found_inode_ref &&
2437                                 backref->found_dir_index &&
2438                                 backref->found_dir_item &&
2439                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2440                                 !rec->found_inode_item)) {
2441                         ret = create_inode_item(root, rec, backref, 0);
2442                         if (ret)
2443                                 break;
2444                         repaired++;
2445                 }
2446
2447         }
2448         return ret ? ret : repaired;
2449 }
2450
2451 /*
2452  * To determine the file type for nlink/inode_item repair
2453  *
2454  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2455  * Return -ENOENT if file type is not found.
2456  */
2457 static int find_file_type(struct inode_record *rec, u8 *type)
2458 {
2459         struct inode_backref *backref;
2460
2461         /* For inode item recovered case */
2462         if (rec->found_inode_item) {
2463                 *type = imode_to_type(rec->imode);
2464                 return 0;
2465         }
2466
2467         list_for_each_entry(backref, &rec->backrefs, list) {
2468                 if (backref->found_dir_index || backref->found_dir_item) {
2469                         *type = backref->filetype;
2470                         return 0;
2471                 }
2472         }
2473         return -ENOENT;
2474 }
2475
2476 /*
2477  * To determine the file name for nlink repair
2478  *
2479  * Return 0 if file name is found, set name and namelen.
2480  * Return -ENOENT if file name is not found.
2481  */
2482 static int find_file_name(struct inode_record *rec,
2483                           char *name, int *namelen)
2484 {
2485         struct inode_backref *backref;
2486
2487         list_for_each_entry(backref, &rec->backrefs, list) {
2488                 if (backref->found_dir_index || backref->found_dir_item ||
2489                     backref->found_inode_ref) {
2490                         memcpy(name, backref->name, backref->namelen);
2491                         *namelen = backref->namelen;
2492                         return 0;
2493                 }
2494         }
2495         return -ENOENT;
2496 }
2497
2498 /* Reset the nlink of the inode to the correct one */
2499 static int reset_nlink(struct btrfs_trans_handle *trans,
2500                        struct btrfs_root *root,
2501                        struct btrfs_path *path,
2502                        struct inode_record *rec)
2503 {
2504         struct inode_backref *backref;
2505         struct inode_backref *tmp;
2506         struct btrfs_key key;
2507         struct btrfs_inode_item *inode_item;
2508         int ret = 0;
2509
2510         /* We don't believe this either, reset it and iterate backref */
2511         rec->found_link = 0;
2512
2513         /* Remove all backref including the valid ones */
2514         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2515                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2516                                    backref->index, backref->name,
2517                                    backref->namelen, 0);
2518                 if (ret < 0)
2519                         goto out;
2520
2521                 /* remove invalid backref, so it won't be added back */
2522                 if (!(backref->found_dir_index &&
2523                       backref->found_dir_item &&
2524                       backref->found_inode_ref)) {
2525                         list_del(&backref->list);
2526                         free(backref);
2527                 } else {
2528                         rec->found_link++;
2529                 }
2530         }
2531
2532         /* Set nlink to 0 */
2533         key.objectid = rec->ino;
2534         key.type = BTRFS_INODE_ITEM_KEY;
2535         key.offset = 0;
2536         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2537         if (ret < 0)
2538                 goto out;
2539         if (ret > 0) {
2540                 ret = -ENOENT;
2541                 goto out;
2542         }
2543         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2544                                     struct btrfs_inode_item);
2545         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2546         btrfs_mark_buffer_dirty(path->nodes[0]);
2547         btrfs_release_path(path);
2548
2549         /*
2550          * Add back valid inode_ref/dir_item/dir_index,
2551          * add_link() will handle the nlink inc, so new nlink must be correct
2552          */
2553         list_for_each_entry(backref, &rec->backrefs, list) {
2554                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2555                                      backref->name, backref->namelen,
2556                                      backref->filetype, &backref->index, 1);
2557                 if (ret < 0)
2558                         goto out;
2559         }
2560 out:
2561         btrfs_release_path(path);
2562         return ret;
2563 }
2564
2565 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2566                                struct btrfs_root *root,
2567                                struct btrfs_path *path,
2568                                struct inode_record *rec)
2569 {
2570         char *dir_name = "lost+found";
2571         char namebuf[BTRFS_NAME_LEN] = {0};
2572         u64 lost_found_ino;
2573         u32 mode = 0700;
2574         u8 type = 0;
2575         int namelen = 0;
2576         int name_recovered = 0;
2577         int type_recovered = 0;
2578         int ret = 0;
2579
2580         /*
2581          * Get file name and type first before these invalid inode ref
2582          * are deleted by remove_all_invalid_backref()
2583          */
2584         name_recovered = !find_file_name(rec, namebuf, &namelen);
2585         type_recovered = !find_file_type(rec, &type);
2586
2587         if (!name_recovered) {
2588                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2589                        rec->ino, rec->ino);
2590                 namelen = count_digits(rec->ino);
2591                 sprintf(namebuf, "%llu", rec->ino);
2592                 name_recovered = 1;
2593         }
2594         if (!type_recovered) {
2595                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2596                        rec->ino);
2597                 type = BTRFS_FT_REG_FILE;
2598                 type_recovered = 1;
2599         }
2600
2601         ret = reset_nlink(trans, root, path, rec);
2602         if (ret < 0) {
2603                 fprintf(stderr,
2604                         "Failed to reset nlink for inode %llu: %s\n",
2605                         rec->ino, strerror(-ret));
2606                 goto out;
2607         }
2608
2609         if (rec->found_link == 0) {
2610                 lost_found_ino = root->highest_inode;
2611                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2612                         ret = -EOVERFLOW;
2613                         goto out;
2614                 }
2615                 lost_found_ino++;
2616                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2617                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2618                                   mode);
2619                 if (ret < 0) {
2620                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2621                                 dir_name, strerror(-ret));
2622                         goto out;
2623                 }
2624                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2625                                      namebuf, namelen, type, NULL, 1);
2626                 /*
2627                  * Add ".INO" suffix several times to handle case where
2628                  * "FILENAME.INO" is already taken by another file.
2629                  */
2630                 while (ret == -EEXIST) {
2631                         /*
2632                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2633                          */
2634                         if (namelen + count_digits(rec->ino) + 1 >
2635                             BTRFS_NAME_LEN) {
2636                                 ret = -EFBIG;
2637                                 goto out;
2638                         }
2639                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2640                                  ".%llu", rec->ino);
2641                         namelen += count_digits(rec->ino) + 1;
2642                         ret = btrfs_add_link(trans, root, rec->ino,
2643                                              lost_found_ino, namebuf,
2644                                              namelen, type, NULL, 1);
2645                 }
2646                 if (ret < 0) {
2647                         fprintf(stderr,
2648                                 "Failed to link the inode %llu to %s dir: %s\n",
2649                                 rec->ino, dir_name, strerror(-ret));
2650                         goto out;
2651                 }
2652                 /*
2653                  * Just increase the found_link, don't actually add the
2654                  * backref. This will make things easier and this inode
2655                  * record will be freed after the repair is done.
2656                  * So fsck will not report problem about this inode.
2657                  */
2658                 rec->found_link++;
2659                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2660                        namelen, namebuf, dir_name);
2661         }
2662         printf("Fixed the nlink of inode %llu\n", rec->ino);
2663 out:
2664         /*
2665          * Clear the flag anyway, or we will loop forever for the same inode
2666          * as it will not be removed from the bad inode list and the dead loop
2667          * happens.
2668          */
2669         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2670         btrfs_release_path(path);
2671         return ret;
2672 }
2673
2674 /*
2675  * Check if there is any normal(reg or prealloc) file extent for given
2676  * ino.
2677  * This is used to determine the file type when neither its dir_index/item or
2678  * inode_item exists.
2679  *
2680  * This will *NOT* report error, if any error happens, just consider it does
2681  * not have any normal file extent.
2682  */
2683 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2684 {
2685         struct btrfs_path *path;
2686         struct btrfs_key key;
2687         struct btrfs_key found_key;
2688         struct btrfs_file_extent_item *fi;
2689         u8 type;
2690         int ret = 0;
2691
2692         path = btrfs_alloc_path();
2693         if (!path)
2694                 goto out;
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2705                 ret = btrfs_next_leaf(root, path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2713                                       path->slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path->nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_free_path(path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path *path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         path = btrfs_alloc_path();
2930         if (!path)
2931                 return -ENOMEM;
2932
2933         /*
2934          * For nlink repair, it may create a dir and add link, so
2935          * 2 for parent(256)'s dir_index and dir_item
2936          * 2 for lost+found dir's inode_item and inode_ref
2937          * 1 for the new inode_ref of the file
2938          * 2 for lost+found dir's dir_index and dir_item for the file
2939          */
2940         trans = btrfs_start_transaction(root, 7);
2941         if (IS_ERR(trans)) {
2942                 btrfs_free_path(path);
2943                 return PTR_ERR(trans);
2944         }
2945
2946         if (rec->errors & I_ERR_NO_INODE_ITEM)
2947                 ret = repair_inode_no_item(trans, root, path, rec);
2948         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2949                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2950         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2951                 ret = repair_inode_discount_extent(trans, root, path, rec);
2952         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2953                 ret = repair_inode_isize(trans, root, path, rec);
2954         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2955                 ret = repair_inode_orphan_item(trans, root, path, rec);
2956         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2957                 ret = repair_inode_nlinks(trans, root, path, rec);
2958         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2959                 ret = repair_inode_nbytes(trans, root, path, rec);
2960         btrfs_commit_transaction(trans, root);
2961         btrfs_free_path(path);
2962         return ret;
2963 }
2964
2965 static int check_inode_recs(struct btrfs_root *root,
2966                             struct cache_tree *inode_cache)
2967 {
2968         struct cache_extent *cache;
2969         struct ptr_node *node;
2970         struct inode_record *rec;
2971         struct inode_backref *backref;
2972         int stage = 0;
2973         int ret = 0;
2974         int err = 0;
2975         u64 error = 0;
2976         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2977
2978         if (btrfs_root_refs(&root->root_item) == 0) {
2979                 if (!cache_tree_empty(inode_cache))
2980                         fprintf(stderr, "warning line %d\n", __LINE__);
2981                 return 0;
2982         }
2983
2984         /*
2985          * We need to record the highest inode number for later 'lost+found'
2986          * dir creation.
2987          * We must select an ino not used/referred by any existing inode, or
2988          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2989          * this may cause 'lost+found' dir has wrong nlinks.
2990          */
2991         cache = last_cache_extent(inode_cache);
2992         if (cache) {
2993                 node = container_of(cache, struct ptr_node, cache);
2994                 rec = node->data;
2995                 if (rec->ino > root->highest_inode)
2996                         root->highest_inode = rec->ino;
2997         }
2998
2999         /*
3000          * We need to repair backrefs first because we could change some of the
3001          * errors in the inode recs.
3002          *
3003          * We also need to go through and delete invalid backrefs first and then
3004          * add the correct ones second.  We do this because we may get EEXIST
3005          * when adding back the correct index because we hadn't yet deleted the
3006          * invalid index.
3007          *
3008          * For example, if we were missing a dir index then the directories
3009          * isize would be wrong, so if we fixed the isize to what we thought it
3010          * would be and then fixed the backref we'd still have a invalid fs, so
3011          * we need to add back the dir index and then check to see if the isize
3012          * is still wrong.
3013          */
3014         while (stage < 3) {
3015                 stage++;
3016                 if (stage == 3 && !err)
3017                         break;
3018
3019                 cache = search_cache_extent(inode_cache, 0);
3020                 while (repair && cache) {
3021                         node = container_of(cache, struct ptr_node, cache);
3022                         rec = node->data;
3023                         cache = next_cache_extent(cache);
3024
3025                         /* Need to free everything up and rescan */
3026                         if (stage == 3) {
3027                                 remove_cache_extent(inode_cache, &node->cache);
3028                                 free(node);
3029                                 free_inode_rec(rec);
3030                                 continue;
3031                         }
3032
3033                         if (list_empty(&rec->backrefs))
3034                                 continue;
3035
3036                         ret = repair_inode_backrefs(root, rec, inode_cache,
3037                                                     stage == 1);
3038                         if (ret < 0) {
3039                                 err = ret;
3040                                 stage = 2;
3041                                 break;
3042                         } if (ret > 0) {
3043                                 err = -EAGAIN;
3044                         }
3045                 }
3046         }
3047         if (err)
3048                 return err;
3049
3050         rec = get_inode_rec(inode_cache, root_dirid, 0);
3051         BUG_ON(IS_ERR(rec));
3052         if (rec) {
3053                 ret = check_root_dir(rec);
3054                 if (ret) {
3055                         fprintf(stderr, "root %llu root dir %llu error\n",
3056                                 (unsigned long long)root->root_key.objectid,
3057                                 (unsigned long long)root_dirid);
3058                         print_inode_error(root, rec);
3059                         error++;
3060                 }
3061         } else {
3062                 if (repair) {
3063                         struct btrfs_trans_handle *trans;
3064
3065                         trans = btrfs_start_transaction(root, 1);
3066                         if (IS_ERR(trans)) {
3067                                 err = PTR_ERR(trans);
3068                                 return err;
3069                         }
3070
3071                         fprintf(stderr,
3072                                 "root %llu missing its root dir, recreating\n",
3073                                 (unsigned long long)root->objectid);
3074
3075                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3076                         BUG_ON(ret);
3077
3078                         btrfs_commit_transaction(trans, root);
3079                         return -EAGAIN;
3080                 }
3081
3082                 fprintf(stderr, "root %llu root dir %llu not found\n",
3083                         (unsigned long long)root->root_key.objectid,
3084                         (unsigned long long)root_dirid);
3085         }
3086
3087         while (1) {
3088                 cache = search_cache_extent(inode_cache, 0);
3089                 if (!cache)
3090                         break;
3091                 node = container_of(cache, struct ptr_node, cache);
3092                 rec = node->data;
3093                 remove_cache_extent(inode_cache, &node->cache);
3094                 free(node);
3095                 if (rec->ino == root_dirid ||
3096                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3097                         free_inode_rec(rec);
3098                         continue;
3099                 }
3100
3101                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3102                         ret = check_orphan_item(root, rec->ino);
3103                         if (ret == 0)
3104                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3105                         if (can_free_inode_rec(rec)) {
3106                                 free_inode_rec(rec);
3107                                 continue;
3108                         }
3109                 }
3110
3111                 if (!rec->found_inode_item)
3112                         rec->errors |= I_ERR_NO_INODE_ITEM;
3113                 if (rec->found_link != rec->nlink)
3114                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3115                 if (repair) {
3116                         ret = try_repair_inode(root, rec);
3117                         if (ret == 0 && can_free_inode_rec(rec)) {
3118                                 free_inode_rec(rec);
3119                                 continue;
3120                         }
3121                         ret = 0;
3122                 }
3123
3124                 if (!(repair && ret == 0))
3125                         error++;
3126                 print_inode_error(root, rec);
3127                 list_for_each_entry(backref, &rec->backrefs, list) {
3128                         if (!backref->found_dir_item)
3129                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3130                         if (!backref->found_dir_index)
3131                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3132                         if (!backref->found_inode_ref)
3133                                 backref->errors |= REF_ERR_NO_INODE_REF;
3134                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3135                                 " namelen %u name %s filetype %d errors %x",
3136                                 (unsigned long long)backref->dir,
3137                                 (unsigned long long)backref->index,
3138                                 backref->namelen, backref->name,
3139                                 backref->filetype, backref->errors);
3140                         print_ref_error(backref->errors);
3141                 }
3142                 free_inode_rec(rec);
3143         }
3144         return (error > 0) ? -1 : 0;
3145 }
3146
3147 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3148                                         u64 objectid)
3149 {
3150         struct cache_extent *cache;
3151         struct root_record *rec = NULL;
3152         int ret;
3153
3154         cache = lookup_cache_extent(root_cache, objectid, 1);
3155         if (cache) {
3156                 rec = container_of(cache, struct root_record, cache);
3157         } else {
3158                 rec = calloc(1, sizeof(*rec));
3159                 if (!rec)
3160                         return ERR_PTR(-ENOMEM);
3161                 rec->objectid = objectid;
3162                 INIT_LIST_HEAD(&rec->backrefs);
3163                 rec->cache.start = objectid;
3164                 rec->cache.size = 1;
3165
3166                 ret = insert_cache_extent(root_cache, &rec->cache);
3167                 if (ret)
3168                         return ERR_PTR(-EEXIST);
3169         }
3170         return rec;
3171 }
3172
3173 static struct root_backref *get_root_backref(struct root_record *rec,
3174                                              u64 ref_root, u64 dir, u64 index,
3175                                              const char *name, int namelen)
3176 {
3177         struct root_backref *backref;
3178
3179         list_for_each_entry(backref, &rec->backrefs, list) {
3180                 if (backref->ref_root != ref_root || backref->dir != dir ||
3181                     backref->namelen != namelen)
3182                         continue;
3183                 if (memcmp(name, backref->name, namelen))
3184                         continue;
3185                 return backref;
3186         }
3187
3188         backref = calloc(1, sizeof(*backref) + namelen + 1);
3189         if (!backref)
3190                 return NULL;
3191         backref->ref_root = ref_root;
3192         backref->dir = dir;
3193         backref->index = index;
3194         backref->namelen = namelen;
3195         memcpy(backref->name, name, namelen);
3196         backref->name[namelen] = '\0';
3197         list_add_tail(&backref->list, &rec->backrefs);
3198         return backref;
3199 }
3200
3201 static void free_root_record(struct cache_extent *cache)
3202 {
3203         struct root_record *rec;
3204         struct root_backref *backref;
3205
3206         rec = container_of(cache, struct root_record, cache);
3207         while (!list_empty(&rec->backrefs)) {
3208                 backref = to_root_backref(rec->backrefs.next);
3209                 list_del(&backref->list);
3210                 free(backref);
3211         }
3212
3213         free(rec);
3214 }
3215
3216 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3217
3218 static int add_root_backref(struct cache_tree *root_cache,
3219                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3220                             const char *name, int namelen,
3221                             int item_type, int errors)
3222 {
3223         struct root_record *rec;
3224         struct root_backref *backref;
3225
3226         rec = get_root_rec(root_cache, root_id);
3227         BUG_ON(IS_ERR(rec));
3228         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3229         BUG_ON(!backref);
3230
3231         backref->errors |= errors;
3232
3233         if (item_type != BTRFS_DIR_ITEM_KEY) {
3234                 if (backref->found_dir_index || backref->found_back_ref ||
3235                     backref->found_forward_ref) {
3236                         if (backref->index != index)
3237                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3238                 } else {
3239                         backref->index = index;
3240                 }
3241         }
3242
3243         if (item_type == BTRFS_DIR_ITEM_KEY) {
3244                 if (backref->found_forward_ref)
3245                         rec->found_ref++;
3246                 backref->found_dir_item = 1;
3247         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3248                 backref->found_dir_index = 1;
3249         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3250                 if (backref->found_forward_ref)
3251                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3252                 else if (backref->found_dir_item)
3253                         rec->found_ref++;
3254                 backref->found_forward_ref = 1;
3255         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3256                 if (backref->found_back_ref)
3257                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3258                 backref->found_back_ref = 1;
3259         } else {
3260                 BUG_ON(1);
3261         }
3262
3263         if (backref->found_forward_ref && backref->found_dir_item)
3264                 backref->reachable = 1;
3265         return 0;
3266 }
3267
3268 static int merge_root_recs(struct btrfs_root *root,
3269                            struct cache_tree *src_cache,
3270                            struct cache_tree *dst_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int ret = 0;
3277
3278         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3279                 free_inode_recs_tree(src_cache);
3280                 return 0;
3281         }
3282
3283         while (1) {
3284                 cache = search_cache_extent(src_cache, 0);
3285                 if (!cache)
3286                         break;
3287                 node = container_of(cache, struct ptr_node, cache);
3288                 rec = node->data;
3289                 remove_cache_extent(src_cache, &node->cache);
3290                 free(node);
3291
3292                 ret = is_child_root(root, root->objectid, rec->ino);
3293                 if (ret < 0)
3294                         break;
3295                 else if (ret == 0)
3296                         goto skip;
3297
3298                 list_for_each_entry(backref, &rec->backrefs, list) {
3299                         BUG_ON(backref->found_inode_ref);
3300                         if (backref->found_dir_item)
3301                                 add_root_backref(dst_cache, rec->ino,
3302                                         root->root_key.objectid, backref->dir,
3303                                         backref->index, backref->name,
3304                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3305                                         backref->errors);
3306                         if (backref->found_dir_index)
3307                                 add_root_backref(dst_cache, rec->ino,
3308                                         root->root_key.objectid, backref->dir,
3309                                         backref->index, backref->name,
3310                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3311                                         backref->errors);
3312                 }
3313 skip:
3314                 free_inode_rec(rec);
3315         }
3316         if (ret < 0)
3317                 return ret;
3318         return 0;
3319 }
3320
3321 static int check_root_refs(struct btrfs_root *root,
3322                            struct cache_tree *root_cache)
3323 {
3324         struct root_record *rec;
3325         struct root_record *ref_root;
3326         struct root_backref *backref;
3327         struct cache_extent *cache;
3328         int loop = 1;
3329         int ret;
3330         int error;
3331         int errors = 0;
3332
3333         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3334         BUG_ON(IS_ERR(rec));
3335         rec->found_ref = 1;
3336
3337         /* fixme: this can not detect circular references */
3338         while (loop) {
3339                 loop = 0;
3340                 cache = search_cache_extent(root_cache, 0);
3341                 while (1) {
3342                         if (!cache)
3343                                 break;
3344                         rec = container_of(cache, struct root_record, cache);
3345                         cache = next_cache_extent(cache);
3346
3347                         if (rec->found_ref == 0)
3348                                 continue;
3349
3350                         list_for_each_entry(backref, &rec->backrefs, list) {
3351                                 if (!backref->reachable)
3352                                         continue;
3353
3354                                 ref_root = get_root_rec(root_cache,
3355                                                         backref->ref_root);
3356                                 BUG_ON(IS_ERR(ref_root));
3357                                 if (ref_root->found_ref > 0)
3358                                         continue;
3359
3360                                 backref->reachable = 0;
3361                                 rec->found_ref--;
3362                                 if (rec->found_ref == 0)
3363                                         loop = 1;
3364                         }
3365                 }
3366         }
3367
3368         cache = search_cache_extent(root_cache, 0);
3369         while (1) {
3370                 if (!cache)
3371                         break;
3372                 rec = container_of(cache, struct root_record, cache);
3373                 cache = next_cache_extent(cache);
3374
3375                 if (rec->found_ref == 0 &&
3376                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3377                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3378                         ret = check_orphan_item(root->fs_info->tree_root,
3379                                                 rec->objectid);
3380                         if (ret == 0)
3381                                 continue;
3382
3383                         /*
3384                          * If we don't have a root item then we likely just have
3385                          * a dir item in a snapshot for this root but no actual
3386                          * ref key or anything so it's meaningless.
3387                          */
3388                         if (!rec->found_root_item)
3389                                 continue;
3390                         errors++;
3391                         fprintf(stderr, "fs tree %llu not referenced\n",
3392                                 (unsigned long long)rec->objectid);
3393                 }
3394
3395                 error = 0;
3396                 if (rec->found_ref > 0 && !rec->found_root_item)
3397                         error = 1;
3398                 list_for_each_entry(backref, &rec->backrefs, list) {
3399                         if (!backref->found_dir_item)
3400                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3401                         if (!backref->found_dir_index)
3402                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3403                         if (!backref->found_back_ref)
3404                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3405                         if (!backref->found_forward_ref)
3406                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3407                         if (backref->reachable && backref->errors)
3408                                 error = 1;
3409                 }
3410                 if (!error)
3411                         continue;
3412
3413                 errors++;
3414                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3415                         (unsigned long long)rec->objectid, rec->found_ref,
3416                          rec->found_root_item ? "" : "not found");
3417
3418                 list_for_each_entry(backref, &rec->backrefs, list) {
3419                         if (!backref->reachable)
3420                                 continue;
3421                         if (!backref->errors && rec->found_root_item)
3422                                 continue;
3423                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3424                                 " index %llu namelen %u name %s errors %x\n",
3425                                 (unsigned long long)backref->ref_root,
3426                                 (unsigned long long)backref->dir,
3427                                 (unsigned long long)backref->index,
3428                                 backref->namelen, backref->name,
3429                                 backref->errors);
3430                         print_ref_error(backref->errors);
3431                 }
3432         }
3433         return errors > 0 ? 1 : 0;
3434 }
3435
3436 static int process_root_ref(struct extent_buffer *eb, int slot,
3437                             struct btrfs_key *key,
3438                             struct cache_tree *root_cache)
3439 {
3440         u64 dirid;
3441         u64 index;
3442         u32 len;
3443         u32 name_len;
3444         struct btrfs_root_ref *ref;
3445         char namebuf[BTRFS_NAME_LEN];
3446         int error;
3447
3448         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3449
3450         dirid = btrfs_root_ref_dirid(eb, ref);
3451         index = btrfs_root_ref_sequence(eb, ref);
3452         name_len = btrfs_root_ref_name_len(eb, ref);
3453
3454         if (name_len <= BTRFS_NAME_LEN) {
3455                 len = name_len;
3456                 error = 0;
3457         } else {
3458                 len = BTRFS_NAME_LEN;
3459                 error = REF_ERR_NAME_TOO_LONG;
3460         }
3461         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3462
3463         if (key->type == BTRFS_ROOT_REF_KEY) {
3464                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3465                                  index, namebuf, len, key->type, error);
3466         } else {
3467                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3468                                  index, namebuf, len, key->type, error);
3469         }
3470         return 0;
3471 }
3472
3473 static void free_corrupt_block(struct cache_extent *cache)
3474 {
3475         struct btrfs_corrupt_block *corrupt;
3476
3477         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3478         free(corrupt);
3479 }
3480
3481 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3482
3483 /*
3484  * Repair the btree of the given root.
3485  *
3486  * The fix is to remove the node key in corrupt_blocks cache_tree.
3487  * and rebalance the tree.
3488  * After the fix, the btree should be writeable.
3489  */
3490 static int repair_btree(struct btrfs_root *root,
3491                         struct cache_tree *corrupt_blocks)
3492 {
3493         struct btrfs_trans_handle *trans;
3494         struct btrfs_path *path;
3495         struct btrfs_corrupt_block *corrupt;
3496         struct cache_extent *cache;
3497         struct btrfs_key key;
3498         u64 offset;
3499         int level;
3500         int ret = 0;
3501
3502         if (cache_tree_empty(corrupt_blocks))
3503                 return 0;
3504
3505         path = btrfs_alloc_path();
3506         if (!path)
3507                 return -ENOMEM;
3508
3509         trans = btrfs_start_transaction(root, 1);
3510         if (IS_ERR(trans)) {
3511                 ret = PTR_ERR(trans);
3512                 fprintf(stderr, "Error starting transaction: %s\n",
3513                         strerror(-ret));
3514                 goto out_free_path;
3515         }
3516         cache = first_cache_extent(corrupt_blocks);
3517         while (cache) {
3518                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3519                                        cache);
3520                 level = corrupt->level;
3521                 path->lowest_level = level;
3522                 key.objectid = corrupt->key.objectid;
3523                 key.type = corrupt->key.type;
3524                 key.offset = corrupt->key.offset;
3525
3526                 /*
3527                  * Here we don't want to do any tree balance, since it may
3528                  * cause a balance with corrupted brother leaf/node,
3529                  * so ins_len set to 0 here.
3530                  * Balance will be done after all corrupt node/leaf is deleted.
3531                  */
3532                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3533                 if (ret < 0)
3534                         goto out;
3535                 offset = btrfs_node_blockptr(path->nodes[level],
3536                                              path->slots[level]);
3537
3538                 /* Remove the ptr */
3539                 ret = btrfs_del_ptr(trans, root, path, level,
3540                                     path->slots[level]);
3541                 if (ret < 0)
3542                         goto out;
3543                 /*
3544                  * Remove the corresponding extent
3545                  * return value is not concerned.
3546                  */
3547                 btrfs_release_path(path);
3548                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3549                                         0, root->root_key.objectid,
3550                                         level - 1, 0);
3551                 cache = next_cache_extent(cache);
3552         }
3553
3554         /* Balance the btree using btrfs_search_slot() */
3555         cache = first_cache_extent(corrupt_blocks);
3556         while (cache) {
3557                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3558                                        cache);
3559                 memcpy(&key, &corrupt->key, sizeof(key));
3560                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3561                 if (ret < 0)
3562                         goto out;
3563                 /* return will always >0 since it won't find the item */
3564                 ret = 0;
3565                 btrfs_release_path(path);
3566                 cache = next_cache_extent(cache);
3567         }
3568 out:
3569         btrfs_commit_transaction(trans, root);
3570 out_free_path:
3571         btrfs_free_path(path);
3572         return ret;
3573 }
3574
3575 static int check_fs_root(struct btrfs_root *root,
3576                          struct cache_tree *root_cache,
3577                          struct walk_control *wc)
3578 {
3579         int ret = 0;
3580         int err = 0;
3581         int wret;
3582         int level;
3583         struct btrfs_path path;
3584         struct shared_node root_node;
3585         struct root_record *rec;
3586         struct btrfs_root_item *root_item = &root->root_item;
3587         struct cache_tree corrupt_blocks;
3588         struct orphan_data_extent *orphan;
3589         struct orphan_data_extent *tmp;
3590         enum btrfs_tree_block_status status;
3591         struct node_refs nrefs;
3592
3593         /*
3594          * Reuse the corrupt_block cache tree to record corrupted tree block
3595          *
3596          * Unlike the usage in extent tree check, here we do it in a per
3597          * fs/subvol tree base.
3598          */
3599         cache_tree_init(&corrupt_blocks);
3600         root->fs_info->corrupt_blocks = &corrupt_blocks;
3601
3602         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3603                 rec = get_root_rec(root_cache, root->root_key.objectid);
3604                 BUG_ON(IS_ERR(rec));
3605                 if (btrfs_root_refs(root_item) > 0)
3606                         rec->found_root_item = 1;
3607         }
3608
3609         btrfs_init_path(&path);
3610         memset(&root_node, 0, sizeof(root_node));
3611         cache_tree_init(&root_node.root_cache);
3612         cache_tree_init(&root_node.inode_cache);
3613         memset(&nrefs, 0, sizeof(nrefs));
3614
3615         /* Move the orphan extent record to corresponding inode_record */
3616         list_for_each_entry_safe(orphan, tmp,
3617                                  &root->orphan_data_extents, list) {
3618                 struct inode_record *inode;
3619
3620                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3621                                       1);
3622                 BUG_ON(IS_ERR(inode));
3623                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3624                 list_move(&orphan->list, &inode->orphan_extents);
3625         }
3626
3627         level = btrfs_header_level(root->node);
3628         memset(wc->nodes, 0, sizeof(wc->nodes));
3629         wc->nodes[level] = &root_node;
3630         wc->active_node = level;
3631         wc->root_level = level;
3632
3633         /* We may not have checked the root block, lets do that now */
3634         if (btrfs_is_leaf(root->node))
3635                 status = btrfs_check_leaf(root, NULL, root->node);
3636         else
3637                 status = btrfs_check_node(root, NULL, root->node);
3638         if (status != BTRFS_TREE_BLOCK_CLEAN)
3639                 return -EIO;
3640
3641         if (btrfs_root_refs(root_item) > 0 ||
3642             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3643                 path.nodes[level] = root->node;
3644                 extent_buffer_get(root->node);
3645                 path.slots[level] = 0;
3646         } else {
3647                 struct btrfs_key key;
3648                 struct btrfs_disk_key found_key;
3649
3650                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3651                 level = root_item->drop_level;
3652                 path.lowest_level = level;
3653                 if (level > btrfs_header_level(root->node) ||
3654                     level >= BTRFS_MAX_LEVEL) {
3655                         error("ignoring invalid drop level: %u", level);
3656                         goto skip_walking;
3657                 }
3658                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3659                 if (wret < 0)
3660                         goto skip_walking;
3661                 btrfs_node_key(path.nodes[level], &found_key,
3662                                 path.slots[level]);
3663                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3664                                         sizeof(found_key)));
3665         }
3666
3667         while (1) {
3668                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3669                 if (wret < 0)
3670                         ret = wret;
3671                 if (wret != 0)
3672                         break;
3673
3674                 wret = walk_up_tree(root, &path, wc, &level);
3675                 if (wret < 0)
3676                         ret = wret;
3677                 if (wret != 0)
3678                         break;
3679         }
3680 skip_walking:
3681         btrfs_release_path(&path);
3682
3683         if (!cache_tree_empty(&corrupt_blocks)) {
3684                 struct cache_extent *cache;
3685                 struct btrfs_corrupt_block *corrupt;
3686
3687                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3688                        root->root_key.objectid);
3689                 cache = first_cache_extent(&corrupt_blocks);
3690                 while (cache) {
3691                         corrupt = container_of(cache,
3692                                                struct btrfs_corrupt_block,
3693                                                cache);
3694                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3695                                cache->start, corrupt->level,
3696                                corrupt->key.objectid, corrupt->key.type,
3697                                corrupt->key.offset);
3698                         cache = next_cache_extent(cache);
3699                 }
3700                 if (repair) {
3701                         printf("Try to repair the btree for root %llu\n",
3702                                root->root_key.objectid);
3703                         ret = repair_btree(root, &corrupt_blocks);
3704                         if (ret < 0)
3705                                 fprintf(stderr, "Failed to repair btree: %s\n",
3706                                         strerror(-ret));
3707                         if (!ret)
3708                                 printf("Btree for root %llu is fixed\n",
3709                                        root->root_key.objectid);
3710                 }
3711         }
3712
3713         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3714         if (err < 0)
3715                 ret = err;
3716
3717         if (root_node.current) {
3718                 root_node.current->checked = 1;
3719                 maybe_free_inode_rec(&root_node.inode_cache,
3720                                 root_node.current);
3721         }
3722
3723         err = check_inode_recs(root, &root_node.inode_cache);
3724         if (!ret)
3725                 ret = err;
3726
3727         free_corrupt_blocks_tree(&corrupt_blocks);
3728         root->fs_info->corrupt_blocks = NULL;
3729         free_orphan_data_extents(&root->orphan_data_extents);
3730         return ret;
3731 }
3732
3733 static int fs_root_objectid(u64 objectid)
3734 {
3735         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3736             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3737                 return 1;
3738         return is_fstree(objectid);
3739 }
3740
3741 static int check_fs_roots(struct btrfs_root *root,
3742                           struct cache_tree *root_cache)
3743 {
3744         struct btrfs_path path;
3745         struct btrfs_key key;
3746         struct walk_control wc;
3747         struct extent_buffer *leaf, *tree_node;
3748         struct btrfs_root *tmp_root;
3749         struct btrfs_root *tree_root = root->fs_info->tree_root;
3750         int ret;
3751         int err = 0;
3752
3753         if (ctx.progress_enabled) {
3754                 ctx.tp = TASK_FS_ROOTS;
3755                 task_start(ctx.info);
3756         }
3757
3758         /*
3759          * Just in case we made any changes to the extent tree that weren't
3760          * reflected into the free space cache yet.
3761          */
3762         if (repair)
3763                 reset_cached_block_groups(root->fs_info);
3764         memset(&wc, 0, sizeof(wc));
3765         cache_tree_init(&wc.shared);
3766         btrfs_init_path(&path);
3767
3768 again:
3769         key.offset = 0;
3770         key.objectid = 0;
3771         key.type = BTRFS_ROOT_ITEM_KEY;
3772         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3773         if (ret < 0) {
3774                 err = 1;
3775                 goto out;
3776         }
3777         tree_node = tree_root->node;
3778         while (1) {
3779                 if (tree_node != tree_root->node) {
3780                         free_root_recs_tree(root_cache);
3781                         btrfs_release_path(&path);
3782                         goto again;
3783                 }
3784                 leaf = path.nodes[0];
3785                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3786                         ret = btrfs_next_leaf(tree_root, &path);
3787                         if (ret) {
3788                                 if (ret < 0)
3789                                         err = 1;
3790                                 break;
3791                         }
3792                         leaf = path.nodes[0];
3793                 }
3794                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3795                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3796                     fs_root_objectid(key.objectid)) {
3797                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3798                                 tmp_root = btrfs_read_fs_root_no_cache(
3799                                                 root->fs_info, &key);
3800                         } else {
3801                                 key.offset = (u64)-1;
3802                                 tmp_root = btrfs_read_fs_root(
3803                                                 root->fs_info, &key);
3804                         }
3805                         if (IS_ERR(tmp_root)) {
3806                                 err = 1;
3807                                 goto next;
3808                         }
3809                         ret = check_fs_root(tmp_root, root_cache, &wc);
3810                         if (ret == -EAGAIN) {
3811                                 free_root_recs_tree(root_cache);
3812                                 btrfs_release_path(&path);
3813                                 goto again;
3814                         }
3815                         if (ret)
3816                                 err = 1;
3817                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3818                                 btrfs_free_fs_root(tmp_root);
3819                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3820                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3821                         process_root_ref(leaf, path.slots[0], &key,
3822                                          root_cache);
3823                 }
3824 next:
3825                 path.slots[0]++;
3826         }
3827 out:
3828         btrfs_release_path(&path);
3829         if (err)
3830                 free_extent_cache_tree(&wc.shared);
3831         if (!cache_tree_empty(&wc.shared))
3832                 fprintf(stderr, "warning line %d\n", __LINE__);
3833
3834         task_stop(ctx.info);
3835
3836         return err;
3837 }
3838
3839 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3840 {
3841         struct list_head *cur = rec->backrefs.next;
3842         struct extent_backref *back;
3843         struct tree_backref *tback;
3844         struct data_backref *dback;
3845         u64 found = 0;
3846         int err = 0;
3847
3848         while(cur != &rec->backrefs) {
3849                 back = to_extent_backref(cur);
3850                 cur = cur->next;
3851                 if (!back->found_extent_tree) {
3852                         err = 1;
3853                         if (!print_errs)
3854                                 goto out;
3855                         if (back->is_data) {
3856                                 dback = to_data_backref(back);
3857                                 fprintf(stderr, "Backref %llu %s %llu"
3858                                         " owner %llu offset %llu num_refs %lu"
3859                                         " not found in extent tree\n",
3860                                         (unsigned long long)rec->start,
3861                                         back->full_backref ?
3862                                         "parent" : "root",
3863                                         back->full_backref ?
3864                                         (unsigned long long)dback->parent:
3865                                         (unsigned long long)dback->root,
3866                                         (unsigned long long)dback->owner,
3867                                         (unsigned long long)dback->offset,
3868                                         (unsigned long)dback->num_refs);
3869                         } else {
3870                                 tback = to_tree_backref(back);
3871                                 fprintf(stderr, "Backref %llu parent %llu"
3872                                         " root %llu not found in extent tree\n",
3873                                         (unsigned long long)rec->start,
3874                                         (unsigned long long)tback->parent,
3875                                         (unsigned long long)tback->root);
3876                         }
3877                 }
3878                 if (!back->is_data && !back->found_ref) {
3879                         err = 1;
3880                         if (!print_errs)
3881                                 goto out;
3882                         tback = to_tree_backref(back);
3883                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3884                                 (unsigned long long)rec->start,
3885                                 back->full_backref ? "parent" : "root",
3886                                 back->full_backref ?
3887                                 (unsigned long long)tback->parent :
3888                                 (unsigned long long)tback->root, back);
3889                 }
3890                 if (back->is_data) {
3891                         dback = to_data_backref(back);
3892                         if (dback->found_ref != dback->num_refs) {
3893                                 err = 1;
3894                                 if (!print_errs)
3895                                         goto out;
3896                                 fprintf(stderr, "Incorrect local backref count"
3897                                         " on %llu %s %llu owner %llu"
3898                                         " offset %llu found %u wanted %u back %p\n",
3899                                         (unsigned long long)rec->start,
3900                                         back->full_backref ?
3901                                         "parent" : "root",
3902                                         back->full_backref ?
3903                                         (unsigned long long)dback->parent:
3904                                         (unsigned long long)dback->root,
3905                                         (unsigned long long)dback->owner,
3906                                         (unsigned long long)dback->offset,
3907                                         dback->found_ref, dback->num_refs, back);
3908                         }
3909                         if (dback->disk_bytenr != rec->start) {
3910                                 err = 1;
3911                                 if (!print_errs)
3912                                         goto out;
3913                                 fprintf(stderr, "Backref disk bytenr does not"
3914                                         " match extent record, bytenr=%llu, "
3915                                         "ref bytenr=%llu\n",
3916                                         (unsigned long long)rec->start,
3917                                         (unsigned long long)dback->disk_bytenr);
3918                         }
3919
3920                         if (dback->bytes != rec->nr) {
3921                                 err = 1;
3922                                 if (!print_errs)
3923                                         goto out;
3924                                 fprintf(stderr, "Backref bytes do not match "
3925                                         "extent backref, bytenr=%llu, ref "
3926                                         "bytes=%llu, backref bytes=%llu\n",
3927                                         (unsigned long long)rec->start,
3928                                         (unsigned long long)rec->nr,
3929                                         (unsigned long long)dback->bytes);
3930                         }
3931                 }
3932                 if (!back->is_data) {
3933                         found += 1;
3934                 } else {
3935                         dback = to_data_backref(back);
3936                         found += dback->found_ref;
3937                 }
3938         }
3939         if (found != rec->refs) {
3940                 err = 1;
3941                 if (!print_errs)
3942                         goto out;
3943                 fprintf(stderr, "Incorrect global backref count "
3944                         "on %llu found %llu wanted %llu\n",
3945                         (unsigned long long)rec->start,
3946                         (unsigned long long)found,
3947                         (unsigned long long)rec->refs);
3948         }
3949 out:
3950         return err;
3951 }
3952
3953 static int free_all_extent_backrefs(struct extent_record *rec)
3954 {
3955         struct extent_backref *back;
3956         struct list_head *cur;
3957         while (!list_empty(&rec->backrefs)) {
3958                 cur = rec->backrefs.next;
3959                 back = to_extent_backref(cur);
3960                 list_del(cur);
3961                 free(back);
3962         }
3963         return 0;
3964 }
3965
3966 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3967                                      struct cache_tree *extent_cache)
3968 {
3969         struct cache_extent *cache;
3970         struct extent_record *rec;
3971
3972         while (1) {
3973                 cache = first_cache_extent(extent_cache);
3974                 if (!cache)
3975                         break;
3976                 rec = container_of(cache, struct extent_record, cache);
3977                 remove_cache_extent(extent_cache, cache);
3978                 free_all_extent_backrefs(rec);
3979                 free(rec);
3980         }
3981 }
3982
3983 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3984                                  struct extent_record *rec)
3985 {
3986         if (rec->content_checked && rec->owner_ref_checked &&
3987             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3988             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3989             !rec->bad_full_backref && !rec->crossing_stripes &&
3990             !rec->wrong_chunk_type) {
3991                 remove_cache_extent(extent_cache, &rec->cache);
3992                 free_all_extent_backrefs(rec);
3993                 list_del_init(&rec->list);
3994                 free(rec);
3995         }
3996         return 0;
3997 }
3998
3999 static int check_owner_ref(struct btrfs_root *root,
4000                             struct extent_record *rec,
4001                             struct extent_buffer *buf)
4002 {
4003         struct extent_backref *node;
4004         struct tree_backref *back;
4005         struct btrfs_root *ref_root;
4006         struct btrfs_key key;
4007         struct btrfs_path path;
4008         struct extent_buffer *parent;
4009         int level;
4010         int found = 0;
4011         int ret;
4012
4013         list_for_each_entry(node, &rec->backrefs, list) {
4014                 if (node->is_data)
4015                         continue;
4016                 if (!node->found_ref)
4017                         continue;
4018                 if (node->full_backref)
4019                         continue;
4020                 back = to_tree_backref(node);
4021                 if (btrfs_header_owner(buf) == back->root)
4022                         return 0;
4023         }
4024         BUG_ON(rec->is_root);
4025
4026         /* try to find the block by search corresponding fs tree */
4027         key.objectid = btrfs_header_owner(buf);
4028         key.type = BTRFS_ROOT_ITEM_KEY;
4029         key.offset = (u64)-1;
4030
4031         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4032         if (IS_ERR(ref_root))
4033                 return 1;
4034
4035         level = btrfs_header_level(buf);
4036         if (level == 0)
4037                 btrfs_item_key_to_cpu(buf, &key, 0);
4038         else
4039                 btrfs_node_key_to_cpu(buf, &key, 0);
4040
4041         btrfs_init_path(&path);
4042         path.lowest_level = level + 1;
4043         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4044         if (ret < 0)
4045                 return 0;
4046
4047         parent = path.nodes[level + 1];
4048         if (parent && buf->start == btrfs_node_blockptr(parent,
4049                                                         path.slots[level + 1]))
4050                 found = 1;
4051
4052         btrfs_release_path(&path);
4053         return found ? 0 : 1;
4054 }
4055
4056 static int is_extent_tree_record(struct extent_record *rec)
4057 {
4058         struct list_head *cur = rec->backrefs.next;
4059         struct extent_backref *node;
4060         struct tree_backref *back;
4061         int is_extent = 0;
4062
4063         while(cur != &rec->backrefs) {
4064                 node = to_extent_backref(cur);
4065                 cur = cur->next;
4066                 if (node->is_data)
4067                         return 0;
4068                 back = to_tree_backref(node);
4069                 if (node->full_backref)
4070                         return 0;
4071                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4072                         is_extent = 1;
4073         }
4074         return is_extent;
4075 }
4076
4077
4078 static int record_bad_block_io(struct btrfs_fs_info *info,
4079                                struct cache_tree *extent_cache,
4080                                u64 start, u64 len)
4081 {
4082         struct extent_record *rec;
4083         struct cache_extent *cache;
4084         struct btrfs_key key;
4085
4086         cache = lookup_cache_extent(extent_cache, start, len);
4087         if (!cache)
4088                 return 0;
4089
4090         rec = container_of(cache, struct extent_record, cache);
4091         if (!is_extent_tree_record(rec))
4092                 return 0;
4093
4094         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4095         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4096 }
4097
4098 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4099                        struct extent_buffer *buf, int slot)
4100 {
4101         if (btrfs_header_level(buf)) {
4102                 struct btrfs_key_ptr ptr1, ptr2;
4103
4104                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4105                                    sizeof(struct btrfs_key_ptr));
4106                 read_extent_buffer(buf, &ptr2,
4107                                    btrfs_node_key_ptr_offset(slot + 1),
4108                                    sizeof(struct btrfs_key_ptr));
4109                 write_extent_buffer(buf, &ptr1,
4110                                     btrfs_node_key_ptr_offset(slot + 1),
4111                                     sizeof(struct btrfs_key_ptr));
4112                 write_extent_buffer(buf, &ptr2,
4113                                     btrfs_node_key_ptr_offset(slot),
4114                                     sizeof(struct btrfs_key_ptr));
4115                 if (slot == 0) {
4116                         struct btrfs_disk_key key;
4117                         btrfs_node_key(buf, &key, 0);
4118                         btrfs_fixup_low_keys(root, path, &key,
4119                                              btrfs_header_level(buf) + 1);
4120                 }
4121         } else {
4122                 struct btrfs_item *item1, *item2;
4123                 struct btrfs_key k1, k2;
4124                 char *item1_data, *item2_data;
4125                 u32 item1_offset, item2_offset, item1_size, item2_size;
4126
4127                 item1 = btrfs_item_nr(slot);
4128                 item2 = btrfs_item_nr(slot + 1);
4129                 btrfs_item_key_to_cpu(buf, &k1, slot);
4130                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4131                 item1_offset = btrfs_item_offset(buf, item1);
4132                 item2_offset = btrfs_item_offset(buf, item2);
4133                 item1_size = btrfs_item_size(buf, item1);
4134                 item2_size = btrfs_item_size(buf, item2);
4135
4136                 item1_data = malloc(item1_size);
4137                 if (!item1_data)
4138                         return -ENOMEM;
4139                 item2_data = malloc(item2_size);
4140                 if (!item2_data) {
4141                         free(item1_data);
4142                         return -ENOMEM;
4143                 }
4144
4145                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4146                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4147
4148                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4149                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4150                 free(item1_data);
4151                 free(item2_data);
4152
4153                 btrfs_set_item_offset(buf, item1, item2_offset);
4154                 btrfs_set_item_offset(buf, item2, item1_offset);
4155                 btrfs_set_item_size(buf, item1, item2_size);
4156                 btrfs_set_item_size(buf, item2, item1_size);
4157
4158                 path->slots[0] = slot;
4159                 btrfs_set_item_key_unsafe(root, path, &k2);
4160                 path->slots[0] = slot + 1;
4161                 btrfs_set_item_key_unsafe(root, path, &k1);
4162         }
4163         return 0;
4164 }
4165
4166 static int fix_key_order(struct btrfs_trans_handle *trans,
4167                          struct btrfs_root *root,
4168                          struct btrfs_path *path)
4169 {
4170         struct extent_buffer *buf;
4171         struct btrfs_key k1, k2;
4172         int i;
4173         int level = path->lowest_level;
4174         int ret = -EIO;
4175
4176         buf = path->nodes[level];
4177         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4178                 if (level) {
4179                         btrfs_node_key_to_cpu(buf, &k1, i);
4180                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4181                 } else {
4182                         btrfs_item_key_to_cpu(buf, &k1, i);
4183                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4184                 }
4185                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4186                         continue;
4187                 ret = swap_values(root, path, buf, i);
4188                 if (ret)
4189                         break;
4190                 btrfs_mark_buffer_dirty(buf);
4191                 i = 0;
4192         }
4193         return ret;
4194 }
4195
4196 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4197                              struct btrfs_root *root,
4198                              struct btrfs_path *path,
4199                              struct extent_buffer *buf, int slot)
4200 {
4201         struct btrfs_key key;
4202         int nritems = btrfs_header_nritems(buf);
4203
4204         btrfs_item_key_to_cpu(buf, &key, slot);
4205
4206         /* These are all the keys we can deal with missing. */
4207         if (key.type != BTRFS_DIR_INDEX_KEY &&
4208             key.type != BTRFS_EXTENT_ITEM_KEY &&
4209             key.type != BTRFS_METADATA_ITEM_KEY &&
4210             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4211             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4212                 return -1;
4213
4214         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4215                (unsigned long long)key.objectid, key.type,
4216                (unsigned long long)key.offset, slot, buf->start);
4217         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4218                               btrfs_item_nr_offset(slot + 1),
4219                               sizeof(struct btrfs_item) *
4220                               (nritems - slot - 1));
4221         btrfs_set_header_nritems(buf, nritems - 1);
4222         if (slot == 0) {
4223                 struct btrfs_disk_key disk_key;
4224
4225                 btrfs_item_key(buf, &disk_key, 0);
4226                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4227         }
4228         btrfs_mark_buffer_dirty(buf);
4229         return 0;
4230 }
4231
4232 static int fix_item_offset(struct btrfs_trans_handle *trans,
4233                            struct btrfs_root *root,
4234                            struct btrfs_path *path)
4235 {
4236         struct extent_buffer *buf;
4237         int i;
4238         int ret = 0;
4239
4240         /* We should only get this for leaves */
4241         BUG_ON(path->lowest_level);
4242         buf = path->nodes[0];
4243 again:
4244         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4245                 unsigned int shift = 0, offset;
4246
4247                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4248                     BTRFS_LEAF_DATA_SIZE(root)) {
4249                         if (btrfs_item_end_nr(buf, i) >
4250                             BTRFS_LEAF_DATA_SIZE(root)) {
4251                                 ret = delete_bogus_item(trans, root, path,
4252                                                         buf, i);
4253                                 if (!ret)
4254                                         goto again;
4255                                 fprintf(stderr, "item is off the end of the "
4256                                         "leaf, can't fix\n");
4257                                 ret = -EIO;
4258                                 break;
4259                         }
4260                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4261                                 btrfs_item_end_nr(buf, i);
4262                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4263                            btrfs_item_offset_nr(buf, i - 1)) {
4264                         if (btrfs_item_end_nr(buf, i) >
4265                             btrfs_item_offset_nr(buf, i - 1)) {
4266                                 ret = delete_bogus_item(trans, root, path,
4267                                                         buf, i);
4268                                 if (!ret)
4269                                         goto again;
4270                                 fprintf(stderr, "items overlap, can't fix\n");
4271                                 ret = -EIO;
4272                                 break;
4273                         }
4274                         shift = btrfs_item_offset_nr(buf, i - 1) -
4275                                 btrfs_item_end_nr(buf, i);
4276                 }
4277                 if (!shift)
4278                         continue;
4279
4280                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4281                        i, shift, (unsigned long long)buf->start);
4282                 offset = btrfs_item_offset_nr(buf, i);
4283                 memmove_extent_buffer(buf,
4284                                       btrfs_leaf_data(buf) + offset + shift,
4285                                       btrfs_leaf_data(buf) + offset,
4286                                       btrfs_item_size_nr(buf, i));
4287                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4288                                       offset + shift);
4289                 btrfs_mark_buffer_dirty(buf);
4290         }
4291
4292         /*
4293          * We may have moved things, in which case we want to exit so we don't
4294          * write those changes out.  Once we have proper abort functionality in
4295          * progs this can be changed to something nicer.
4296          */
4297         BUG_ON(ret);
4298         return ret;
4299 }
4300
4301 /*
4302  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4303  * then just return -EIO.
4304  */
4305 static int try_to_fix_bad_block(struct btrfs_root *root,
4306                                 struct extent_buffer *buf,
4307                                 enum btrfs_tree_block_status status)
4308 {
4309         struct btrfs_trans_handle *trans;
4310         struct ulist *roots;
4311         struct ulist_node *node;
4312         struct btrfs_root *search_root;
4313         struct btrfs_path *path;
4314         struct ulist_iterator iter;
4315         struct btrfs_key root_key, key;
4316         int ret;
4317
4318         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4319             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4320                 return -EIO;
4321
4322         path = btrfs_alloc_path();
4323         if (!path)
4324                 return -EIO;
4325
4326         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4327                                    0, &roots);
4328         if (ret) {
4329                 btrfs_free_path(path);
4330                 return -EIO;
4331         }
4332
4333         ULIST_ITER_INIT(&iter);
4334         while ((node = ulist_next(roots, &iter))) {
4335                 root_key.objectid = node->val;
4336                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4337                 root_key.offset = (u64)-1;
4338
4339                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4340                 if (IS_ERR(root)) {
4341                         ret = -EIO;
4342                         break;
4343                 }
4344
4345
4346                 trans = btrfs_start_transaction(search_root, 0);
4347                 if (IS_ERR(trans)) {
4348                         ret = PTR_ERR(trans);
4349                         break;
4350                 }
4351
4352                 path->lowest_level = btrfs_header_level(buf);
4353                 path->skip_check_block = 1;
4354                 if (path->lowest_level)
4355                         btrfs_node_key_to_cpu(buf, &key, 0);
4356                 else
4357                         btrfs_item_key_to_cpu(buf, &key, 0);
4358                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4359                 if (ret) {
4360                         ret = -EIO;
4361                         btrfs_commit_transaction(trans, search_root);
4362                         break;
4363                 }
4364                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4365                         ret = fix_key_order(trans, search_root, path);
4366                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4367                         ret = fix_item_offset(trans, search_root, path);
4368                 if (ret) {
4369                         btrfs_commit_transaction(trans, search_root);
4370                         break;
4371                 }
4372                 btrfs_release_path(path);
4373                 btrfs_commit_transaction(trans, search_root);
4374         }
4375         ulist_free(roots);
4376         btrfs_free_path(path);
4377         return ret;
4378 }
4379
4380 static int check_block(struct btrfs_root *root,
4381                        struct cache_tree *extent_cache,
4382                        struct extent_buffer *buf, u64 flags)
4383 {
4384         struct extent_record *rec;
4385         struct cache_extent *cache;
4386         struct btrfs_key key;
4387         enum btrfs_tree_block_status status;
4388         int ret = 0;
4389         int level;
4390
4391         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4392         if (!cache)
4393                 return 1;
4394         rec = container_of(cache, struct extent_record, cache);
4395         rec->generation = btrfs_header_generation(buf);
4396
4397         level = btrfs_header_level(buf);
4398         if (btrfs_header_nritems(buf) > 0) {
4399
4400                 if (level == 0)
4401                         btrfs_item_key_to_cpu(buf, &key, 0);
4402                 else
4403                         btrfs_node_key_to_cpu(buf, &key, 0);
4404
4405                 rec->info_objectid = key.objectid;
4406         }
4407         rec->info_level = level;
4408
4409         if (btrfs_is_leaf(buf))
4410                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4411         else
4412                 status = btrfs_check_node(root, &rec->parent_key, buf);
4413
4414         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4415                 if (repair)
4416                         status = try_to_fix_bad_block(root, buf, status);
4417                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4418                         ret = -EIO;
4419                         fprintf(stderr, "bad block %llu\n",
4420                                 (unsigned long long)buf->start);
4421                 } else {
4422                         /*
4423                          * Signal to callers we need to start the scan over
4424                          * again since we'll have cowed blocks.
4425                          */
4426                         ret = -EAGAIN;
4427                 }
4428         } else {
4429                 rec->content_checked = 1;
4430                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4431                         rec->owner_ref_checked = 1;
4432                 else {
4433                         ret = check_owner_ref(root, rec, buf);
4434                         if (!ret)
4435                                 rec->owner_ref_checked = 1;
4436                 }
4437         }
4438         if (!ret)
4439                 maybe_free_extent_rec(extent_cache, rec);
4440         return ret;
4441 }
4442
4443 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4444                                                 u64 parent, u64 root)
4445 {
4446         struct list_head *cur = rec->backrefs.next;
4447         struct extent_backref *node;
4448         struct tree_backref *back;
4449
4450         while(cur != &rec->backrefs) {
4451                 node = to_extent_backref(cur);
4452                 cur = cur->next;
4453                 if (node->is_data)
4454                         continue;
4455                 back = to_tree_backref(node);
4456                 if (parent > 0) {
4457                         if (!node->full_backref)
4458                                 continue;
4459                         if (parent == back->parent)
4460                                 return back;
4461                 } else {
4462                         if (node->full_backref)
4463                                 continue;
4464                         if (back->root == root)
4465                                 return back;
4466                 }
4467         }
4468         return NULL;
4469 }
4470
4471 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4472                                                 u64 parent, u64 root)
4473 {
4474         struct tree_backref *ref = malloc(sizeof(*ref));
4475
4476         if (!ref)
4477                 return NULL;
4478         memset(&ref->node, 0, sizeof(ref->node));
4479         if (parent > 0) {
4480                 ref->parent = parent;
4481                 ref->node.full_backref = 1;
4482         } else {
4483                 ref->root = root;
4484                 ref->node.full_backref = 0;
4485         }
4486         list_add_tail(&ref->node.list, &rec->backrefs);
4487
4488         return ref;
4489 }
4490
4491 static struct data_backref *find_data_backref(struct extent_record *rec,
4492                                                 u64 parent, u64 root,
4493                                                 u64 owner, u64 offset,
4494                                                 int found_ref,
4495                                                 u64 disk_bytenr, u64 bytes)
4496 {
4497         struct list_head *cur = rec->backrefs.next;
4498         struct extent_backref *node;
4499         struct data_backref *back;
4500
4501         while(cur != &rec->backrefs) {
4502                 node = to_extent_backref(cur);
4503                 cur = cur->next;
4504                 if (!node->is_data)
4505                         continue;
4506                 back = to_data_backref(node);
4507                 if (parent > 0) {
4508                         if (!node->full_backref)
4509                                 continue;
4510                         if (parent == back->parent)
4511                                 return back;
4512                 } else {
4513                         if (node->full_backref)
4514                                 continue;
4515                         if (back->root == root && back->owner == owner &&
4516                             back->offset == offset) {
4517                                 if (found_ref && node->found_ref &&
4518                                     (back->bytes != bytes ||
4519                                     back->disk_bytenr != disk_bytenr))
4520                                         continue;
4521                                 return back;
4522                         }
4523                 }
4524         }
4525         return NULL;
4526 }
4527
4528 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root,
4530                                                 u64 owner, u64 offset,
4531                                                 u64 max_size)
4532 {
4533         struct data_backref *ref = malloc(sizeof(*ref));
4534
4535         if (!ref)
4536                 return NULL;
4537         memset(&ref->node, 0, sizeof(ref->node));
4538         ref->node.is_data = 1;
4539
4540         if (parent > 0) {
4541                 ref->parent = parent;
4542                 ref->owner = 0;
4543                 ref->offset = 0;
4544                 ref->node.full_backref = 1;
4545         } else {
4546                 ref->root = root;
4547                 ref->owner = owner;
4548                 ref->offset = offset;
4549                 ref->node.full_backref = 0;
4550         }
4551         ref->bytes = max_size;
4552         ref->found_ref = 0;
4553         ref->num_refs = 0;
4554         list_add_tail(&ref->node.list, &rec->backrefs);
4555         if (max_size > rec->max_size)
4556                 rec->max_size = max_size;
4557         return ref;
4558 }
4559
4560 /* Check if the type of extent matches with its chunk */
4561 static void check_extent_type(struct extent_record *rec)
4562 {
4563         struct btrfs_block_group_cache *bg_cache;
4564
4565         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4566         if (!bg_cache)
4567                 return;
4568
4569         /* data extent, check chunk directly*/
4570         if (!rec->metadata) {
4571                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4572                         rec->wrong_chunk_type = 1;
4573                 return;
4574         }
4575
4576         /* metadata extent, check the obvious case first */
4577         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4578                                  BTRFS_BLOCK_GROUP_METADATA))) {
4579                 rec->wrong_chunk_type = 1;
4580                 return;
4581         }
4582
4583         /*
4584          * Check SYSTEM extent, as it's also marked as metadata, we can only
4585          * make sure it's a SYSTEM extent by its backref
4586          */
4587         if (!list_empty(&rec->backrefs)) {
4588                 struct extent_backref *node;
4589                 struct tree_backref *tback;
4590                 u64 bg_type;
4591
4592                 node = to_extent_backref(rec->backrefs.next);
4593                 if (node->is_data) {
4594                         /* tree block shouldn't have data backref */
4595                         rec->wrong_chunk_type = 1;
4596                         return;
4597                 }
4598                 tback = container_of(node, struct tree_backref, node);
4599
4600                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4601                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4602                 else
4603                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4604                 if (!(bg_cache->flags & bg_type))
4605                         rec->wrong_chunk_type = 1;
4606         }
4607 }
4608
4609 /*
4610  * Allocate a new extent record, fill default values from @tmpl and insert int
4611  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4612  * the cache, otherwise it fails.
4613  */
4614 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4615                 struct extent_record *tmpl)
4616 {
4617         struct extent_record *rec;
4618         int ret = 0;
4619
4620         rec = malloc(sizeof(*rec));
4621         if (!rec)
4622                 return -ENOMEM;
4623         rec->start = tmpl->start;
4624         rec->max_size = tmpl->max_size;
4625         rec->nr = max(tmpl->nr, tmpl->max_size);
4626         rec->found_rec = tmpl->found_rec;
4627         rec->content_checked = tmpl->content_checked;
4628         rec->owner_ref_checked = tmpl->owner_ref_checked;
4629         rec->num_duplicates = 0;
4630         rec->metadata = tmpl->metadata;
4631         rec->flag_block_full_backref = FLAG_UNSET;
4632         rec->bad_full_backref = 0;
4633         rec->crossing_stripes = 0;
4634         rec->wrong_chunk_type = 0;
4635         rec->is_root = tmpl->is_root;
4636         rec->refs = tmpl->refs;
4637         rec->extent_item_refs = tmpl->extent_item_refs;
4638         rec->parent_generation = tmpl->parent_generation;
4639         INIT_LIST_HEAD(&rec->backrefs);
4640         INIT_LIST_HEAD(&rec->dups);
4641         INIT_LIST_HEAD(&rec->list);
4642         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4643         rec->cache.start = tmpl->start;
4644         rec->cache.size = tmpl->nr;
4645         ret = insert_cache_extent(extent_cache, &rec->cache);
4646         if (ret) {
4647                 free(rec);
4648                 return ret;
4649         }
4650         bytes_used += rec->nr;
4651
4652         if (tmpl->metadata)
4653                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4654                                 global_info->tree_root->nodesize);
4655         check_extent_type(rec);
4656         return ret;
4657 }
4658
4659 /*
4660  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4661  * some are hints:
4662  * - refs              - if found, increase refs
4663  * - is_root           - if found, set
4664  * - content_checked   - if found, set
4665  * - owner_ref_checked - if found, set
4666  *
4667  * If not found, create a new one, initialize and insert.
4668  */
4669 static int add_extent_rec(struct cache_tree *extent_cache,
4670                 struct extent_record *tmpl)
4671 {
4672         struct extent_record *rec;
4673         struct cache_extent *cache;
4674         int ret = 0;
4675         int dup = 0;
4676
4677         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4678         if (cache) {
4679                 rec = container_of(cache, struct extent_record, cache);
4680                 if (tmpl->refs)
4681                         rec->refs++;
4682                 if (rec->nr == 1)
4683                         rec->nr = max(tmpl->nr, tmpl->max_size);
4684
4685                 /*
4686                  * We need to make sure to reset nr to whatever the extent
4687                  * record says was the real size, this way we can compare it to
4688                  * the backrefs.
4689                  */
4690                 if (tmpl->found_rec) {
4691                         if (tmpl->start != rec->start || rec->found_rec) {
4692                                 struct extent_record *tmp;
4693
4694                                 dup = 1;
4695                                 if (list_empty(&rec->list))
4696                                         list_add_tail(&rec->list,
4697                                                       &duplicate_extents);
4698
4699                                 /*
4700                                  * We have to do this song and dance in case we
4701                                  * find an extent record that falls inside of
4702                                  * our current extent record but does not have
4703                                  * the same objectid.
4704                                  */
4705                                 tmp = malloc(sizeof(*tmp));
4706                                 if (!tmp)
4707                                         return -ENOMEM;
4708                                 tmp->start = tmpl->start;
4709                                 tmp->max_size = tmpl->max_size;
4710                                 tmp->nr = tmpl->nr;
4711                                 tmp->found_rec = 1;
4712                                 tmp->metadata = tmpl->metadata;
4713                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4714                                 INIT_LIST_HEAD(&tmp->list);
4715                                 list_add_tail(&tmp->list, &rec->dups);
4716                                 rec->num_duplicates++;
4717                         } else {
4718                                 rec->nr = tmpl->nr;
4719                                 rec->found_rec = 1;
4720                         }
4721                 }
4722
4723                 if (tmpl->extent_item_refs && !dup) {
4724                         if (rec->extent_item_refs) {
4725                                 fprintf(stderr, "block %llu rec "
4726                                         "extent_item_refs %llu, passed %llu\n",
4727                                         (unsigned long long)tmpl->start,
4728                                         (unsigned long long)
4729                                                         rec->extent_item_refs,
4730                                         (unsigned long long)tmpl->extent_item_refs);
4731                         }
4732                         rec->extent_item_refs = tmpl->extent_item_refs;
4733                 }
4734                 if (tmpl->is_root)
4735                         rec->is_root = 1;
4736                 if (tmpl->content_checked)
4737                         rec->content_checked = 1;
4738                 if (tmpl->owner_ref_checked)
4739                         rec->owner_ref_checked = 1;
4740                 memcpy(&rec->parent_key, &tmpl->parent_key,
4741                                 sizeof(tmpl->parent_key));
4742                 if (tmpl->parent_generation)
4743                         rec->parent_generation = tmpl->parent_generation;
4744                 if (rec->max_size < tmpl->max_size)
4745                         rec->max_size = tmpl->max_size;
4746
4747                 /*
4748                  * A metadata extent can't cross stripe_len boundary, otherwise
4749                  * kernel scrub won't be able to handle it.
4750                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4751                  * it.
4752                  */
4753                 if (tmpl->metadata)
4754                         rec->crossing_stripes = check_crossing_stripes(
4755                                 rec->start, global_info->tree_root->nodesize);
4756                 check_extent_type(rec);
4757                 maybe_free_extent_rec(extent_cache, rec);
4758                 return ret;
4759         }
4760
4761         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4762
4763         return ret;
4764 }
4765
4766 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4767                             u64 parent, u64 root, int found_ref)
4768 {
4769         struct extent_record *rec;
4770         struct tree_backref *back;
4771         struct cache_extent *cache;
4772         int ret;
4773
4774         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4775         if (!cache) {
4776                 struct extent_record tmpl;
4777
4778                 memset(&tmpl, 0, sizeof(tmpl));
4779                 tmpl.start = bytenr;
4780                 tmpl.nr = 1;
4781                 tmpl.metadata = 1;
4782
4783                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4784                 if (ret)
4785                         return ret;
4786
4787                 /* really a bug in cache_extent implement now */
4788                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4789                 if (!cache)
4790                         return -ENOENT;
4791         }
4792
4793         rec = container_of(cache, struct extent_record, cache);
4794         if (rec->start != bytenr) {
4795                 /*
4796                  * Several cause, from unaligned bytenr to over lapping extents
4797                  */
4798                 return -EEXIST;
4799         }
4800
4801         back = find_tree_backref(rec, parent, root);
4802         if (!back) {
4803                 back = alloc_tree_backref(rec, parent, root);
4804                 if (!back)
4805                         return -ENOMEM;
4806         }
4807
4808         if (found_ref) {
4809                 if (back->node.found_ref) {
4810                         fprintf(stderr, "Extent back ref already exists "
4811                                 "for %llu parent %llu root %llu \n",
4812                                 (unsigned long long)bytenr,
4813                                 (unsigned long long)parent,
4814                                 (unsigned long long)root);
4815                 }
4816                 back->node.found_ref = 1;
4817         } else {
4818                 if (back->node.found_extent_tree) {
4819                         fprintf(stderr, "Extent back ref already exists "
4820                                 "for %llu parent %llu root %llu \n",
4821                                 (unsigned long long)bytenr,
4822                                 (unsigned long long)parent,
4823                                 (unsigned long long)root);
4824                 }
4825                 back->node.found_extent_tree = 1;
4826         }
4827         check_extent_type(rec);
4828         maybe_free_extent_rec(extent_cache, rec);
4829         return 0;
4830 }
4831
4832 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4833                             u64 parent, u64 root, u64 owner, u64 offset,
4834                             u32 num_refs, int found_ref, u64 max_size)
4835 {
4836         struct extent_record *rec;
4837         struct data_backref *back;
4838         struct cache_extent *cache;
4839         int ret;
4840
4841         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4842         if (!cache) {
4843                 struct extent_record tmpl;
4844
4845                 memset(&tmpl, 0, sizeof(tmpl));
4846                 tmpl.start = bytenr;
4847                 tmpl.nr = 1;
4848                 tmpl.max_size = max_size;
4849
4850                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4851                 if (ret)
4852                         return ret;
4853
4854                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4855                 if (!cache)
4856                         abort();
4857         }
4858
4859         rec = container_of(cache, struct extent_record, cache);
4860         if (rec->max_size < max_size)
4861                 rec->max_size = max_size;
4862
4863         /*
4864          * If found_ref is set then max_size is the real size and must match the
4865          * existing refs.  So if we have already found a ref then we need to
4866          * make sure that this ref matches the existing one, otherwise we need
4867          * to add a new backref so we can notice that the backrefs don't match
4868          * and we need to figure out who is telling the truth.  This is to
4869          * account for that awful fsync bug I introduced where we'd end up with
4870          * a btrfs_file_extent_item that would have its length include multiple
4871          * prealloc extents or point inside of a prealloc extent.
4872          */
4873         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4874                                  bytenr, max_size);
4875         if (!back) {
4876                 back = alloc_data_backref(rec, parent, root, owner, offset,
4877                                           max_size);
4878                 BUG_ON(!back);
4879         }
4880
4881         if (found_ref) {
4882                 BUG_ON(num_refs != 1);
4883                 if (back->node.found_ref)
4884                         BUG_ON(back->bytes != max_size);
4885                 back->node.found_ref = 1;
4886                 back->found_ref += 1;
4887                 back->bytes = max_size;
4888                 back->disk_bytenr = bytenr;
4889                 rec->refs += 1;
4890                 rec->content_checked = 1;
4891                 rec->owner_ref_checked = 1;
4892         } else {
4893                 if (back->node.found_extent_tree) {
4894                         fprintf(stderr, "Extent back ref already exists "
4895                                 "for %llu parent %llu root %llu "
4896                                 "owner %llu offset %llu num_refs %lu\n",
4897                                 (unsigned long long)bytenr,
4898                                 (unsigned long long)parent,
4899                                 (unsigned long long)root,
4900                                 (unsigned long long)owner,
4901                                 (unsigned long long)offset,
4902                                 (unsigned long)num_refs);
4903                 }
4904                 back->num_refs = num_refs;
4905                 back->node.found_extent_tree = 1;
4906         }
4907         maybe_free_extent_rec(extent_cache, rec);
4908         return 0;
4909 }
4910
4911 static int add_pending(struct cache_tree *pending,
4912                        struct cache_tree *seen, u64 bytenr, u32 size)
4913 {
4914         int ret;
4915         ret = add_cache_extent(seen, bytenr, size);
4916         if (ret)
4917                 return ret;
4918         add_cache_extent(pending, bytenr, size);
4919         return 0;
4920 }
4921
4922 static int pick_next_pending(struct cache_tree *pending,
4923                         struct cache_tree *reada,
4924                         struct cache_tree *nodes,
4925                         u64 last, struct block_info *bits, int bits_nr,
4926                         int *reada_bits)
4927 {
4928         unsigned long node_start = last;
4929         struct cache_extent *cache;
4930         int ret;
4931
4932         cache = search_cache_extent(reada, 0);
4933         if (cache) {
4934                 bits[0].start = cache->start;
4935                 bits[0].size = cache->size;
4936                 *reada_bits = 1;
4937                 return 1;
4938         }
4939         *reada_bits = 0;
4940         if (node_start > 32768)
4941                 node_start -= 32768;
4942
4943         cache = search_cache_extent(nodes, node_start);
4944         if (!cache)
4945                 cache = search_cache_extent(nodes, 0);
4946
4947         if (!cache) {
4948                  cache = search_cache_extent(pending, 0);
4949                  if (!cache)
4950                          return 0;
4951                  ret = 0;
4952                  do {
4953                          bits[ret].start = cache->start;
4954                          bits[ret].size = cache->size;
4955                          cache = next_cache_extent(cache);
4956                          ret++;
4957                  } while (cache && ret < bits_nr);
4958                  return ret;
4959         }
4960
4961         ret = 0;
4962         do {
4963                 bits[ret].start = cache->start;
4964                 bits[ret].size = cache->size;
4965                 cache = next_cache_extent(cache);
4966                 ret++;
4967         } while (cache && ret < bits_nr);
4968
4969         if (bits_nr - ret > 8) {
4970                 u64 lookup = bits[0].start + bits[0].size;
4971                 struct cache_extent *next;
4972                 next = search_cache_extent(pending, lookup);
4973                 while(next) {
4974                         if (next->start - lookup > 32768)
4975                                 break;
4976                         bits[ret].start = next->start;
4977                         bits[ret].size = next->size;
4978                         lookup = next->start + next->size;
4979                         ret++;
4980                         if (ret == bits_nr)
4981                                 break;
4982                         next = next_cache_extent(next);
4983                         if (!next)
4984                                 break;
4985                 }
4986         }
4987         return ret;
4988 }
4989
4990 static void free_chunk_record(struct cache_extent *cache)
4991 {
4992         struct chunk_record *rec;
4993
4994         rec = container_of(cache, struct chunk_record, cache);
4995         list_del_init(&rec->list);
4996         list_del_init(&rec->dextents);
4997         free(rec);
4998 }
4999
5000 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5001 {
5002         cache_tree_free_extents(chunk_cache, free_chunk_record);
5003 }
5004
5005 static void free_device_record(struct rb_node *node)
5006 {
5007         struct device_record *rec;
5008
5009         rec = container_of(node, struct device_record, node);
5010         free(rec);
5011 }
5012
5013 FREE_RB_BASED_TREE(device_cache, free_device_record);
5014
5015 int insert_block_group_record(struct block_group_tree *tree,
5016                               struct block_group_record *bg_rec)
5017 {
5018         int ret;
5019
5020         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5021         if (ret)
5022                 return ret;
5023
5024         list_add_tail(&bg_rec->list, &tree->block_groups);
5025         return 0;
5026 }
5027
5028 static void free_block_group_record(struct cache_extent *cache)
5029 {
5030         struct block_group_record *rec;
5031
5032         rec = container_of(cache, struct block_group_record, cache);
5033         list_del_init(&rec->list);
5034         free(rec);
5035 }
5036
5037 void free_block_group_tree(struct block_group_tree *tree)
5038 {
5039         cache_tree_free_extents(&tree->tree, free_block_group_record);
5040 }
5041
5042 int insert_device_extent_record(struct device_extent_tree *tree,
5043                                 struct device_extent_record *de_rec)
5044 {
5045         int ret;
5046
5047         /*
5048          * Device extent is a bit different from the other extents, because
5049          * the extents which belong to the different devices may have the
5050          * same start and size, so we need use the special extent cache
5051          * search/insert functions.
5052          */
5053         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5054         if (ret)
5055                 return ret;
5056
5057         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5058         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5059         return 0;
5060 }
5061
5062 static void free_device_extent_record(struct cache_extent *cache)
5063 {
5064         struct device_extent_record *rec;
5065
5066         rec = container_of(cache, struct device_extent_record, cache);
5067         if (!list_empty(&rec->chunk_list))
5068                 list_del_init(&rec->chunk_list);
5069         if (!list_empty(&rec->device_list))
5070                 list_del_init(&rec->device_list);
5071         free(rec);
5072 }
5073
5074 void free_device_extent_tree(struct device_extent_tree *tree)
5075 {
5076         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5077 }
5078
5079 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5080 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5081                                  struct extent_buffer *leaf, int slot)
5082 {
5083         struct btrfs_extent_ref_v0 *ref0;
5084         struct btrfs_key key;
5085         int ret;
5086
5087         btrfs_item_key_to_cpu(leaf, &key, slot);
5088         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5089         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5090                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5091                                 0, 0);
5092         } else {
5093                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5094                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5095         }
5096         return ret;
5097 }
5098 #endif
5099
5100 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5101                                             struct btrfs_key *key,
5102                                             int slot)
5103 {
5104         struct btrfs_chunk *ptr;
5105         struct chunk_record *rec;
5106         int num_stripes, i;
5107
5108         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5109         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5110
5111         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5112         if (!rec) {
5113                 fprintf(stderr, "memory allocation failed\n");
5114                 exit(-1);
5115         }
5116
5117         INIT_LIST_HEAD(&rec->list);
5118         INIT_LIST_HEAD(&rec->dextents);
5119         rec->bg_rec = NULL;
5120
5121         rec->cache.start = key->offset;
5122         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5123
5124         rec->generation = btrfs_header_generation(leaf);
5125
5126         rec->objectid = key->objectid;
5127         rec->type = key->type;
5128         rec->offset = key->offset;
5129
5130         rec->length = rec->cache.size;
5131         rec->owner = btrfs_chunk_owner(leaf, ptr);
5132         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5133         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5134         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5135         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5136         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5137         rec->num_stripes = num_stripes;
5138         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5139
5140         for (i = 0; i < rec->num_stripes; ++i) {
5141                 rec->stripes[i].devid =
5142                         btrfs_stripe_devid_nr(leaf, ptr, i);
5143                 rec->stripes[i].offset =
5144                         btrfs_stripe_offset_nr(leaf, ptr, i);
5145                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5146                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5147                                 BTRFS_UUID_SIZE);
5148         }
5149
5150         return rec;
5151 }
5152
5153 static int process_chunk_item(struct cache_tree *chunk_cache,
5154                               struct btrfs_key *key, struct extent_buffer *eb,
5155                               int slot)
5156 {
5157         struct chunk_record *rec;
5158         struct btrfs_chunk *chunk;
5159         int ret = 0;
5160
5161         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5162         /*
5163          * Do extra check for this chunk item,
5164          *
5165          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5166          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5167          * and owner<->key_type check.
5168          */
5169         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5170                                       key->offset);
5171         if (ret < 0) {
5172                 error("chunk(%llu, %llu) is not valid, ignore it",
5173                       key->offset, btrfs_chunk_length(eb, chunk));
5174                 return 0;
5175         }
5176         rec = btrfs_new_chunk_record(eb, key, slot);
5177         ret = insert_cache_extent(chunk_cache, &rec->cache);
5178         if (ret) {
5179                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5180                         rec->offset, rec->length);
5181                 free(rec);
5182         }
5183
5184         return ret;
5185 }
5186
5187 static int process_device_item(struct rb_root *dev_cache,
5188                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5189 {
5190         struct btrfs_dev_item *ptr;
5191         struct device_record *rec;
5192         int ret = 0;
5193
5194         ptr = btrfs_item_ptr(eb,
5195                 slot, struct btrfs_dev_item);
5196
5197         rec = malloc(sizeof(*rec));
5198         if (!rec) {
5199                 fprintf(stderr, "memory allocation failed\n");
5200                 return -ENOMEM;
5201         }
5202
5203         rec->devid = key->offset;
5204         rec->generation = btrfs_header_generation(eb);
5205
5206         rec->objectid = key->objectid;
5207         rec->type = key->type;
5208         rec->offset = key->offset;
5209
5210         rec->devid = btrfs_device_id(eb, ptr);
5211         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5212         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5213
5214         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5215         if (ret) {
5216                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5217                 free(rec);
5218         }
5219
5220         return ret;
5221 }
5222
5223 struct block_group_record *
5224 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5225                              int slot)
5226 {
5227         struct btrfs_block_group_item *ptr;
5228         struct block_group_record *rec;
5229
5230         rec = calloc(1, sizeof(*rec));
5231         if (!rec) {
5232                 fprintf(stderr, "memory allocation failed\n");
5233                 exit(-1);
5234         }
5235
5236         rec->cache.start = key->objectid;
5237         rec->cache.size = key->offset;
5238
5239         rec->generation = btrfs_header_generation(leaf);
5240
5241         rec->objectid = key->objectid;
5242         rec->type = key->type;
5243         rec->offset = key->offset;
5244
5245         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5246         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5247
5248         INIT_LIST_HEAD(&rec->list);
5249
5250         return rec;
5251 }
5252
5253 static int process_block_group_item(struct block_group_tree *block_group_cache,
5254                                     struct btrfs_key *key,
5255                                     struct extent_buffer *eb, int slot)
5256 {
5257         struct block_group_record *rec;
5258         int ret = 0;
5259
5260         rec = btrfs_new_block_group_record(eb, key, slot);
5261         ret = insert_block_group_record(block_group_cache, rec);
5262         if (ret) {
5263                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5264                         rec->objectid, rec->offset);
5265                 free(rec);
5266         }
5267
5268         return ret;
5269 }
5270
5271 struct device_extent_record *
5272 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5273                                struct btrfs_key *key, int slot)
5274 {
5275         struct device_extent_record *rec;
5276         struct btrfs_dev_extent *ptr;
5277
5278         rec = calloc(1, sizeof(*rec));
5279         if (!rec) {
5280                 fprintf(stderr, "memory allocation failed\n");
5281                 exit(-1);
5282         }
5283
5284         rec->cache.objectid = key->objectid;
5285         rec->cache.start = key->offset;
5286
5287         rec->generation = btrfs_header_generation(leaf);
5288
5289         rec->objectid = key->objectid;
5290         rec->type = key->type;
5291         rec->offset = key->offset;
5292
5293         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5294         rec->chunk_objecteid =
5295                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5296         rec->chunk_offset =
5297                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5298         rec->length = btrfs_dev_extent_length(leaf, ptr);
5299         rec->cache.size = rec->length;
5300
5301         INIT_LIST_HEAD(&rec->chunk_list);
5302         INIT_LIST_HEAD(&rec->device_list);
5303
5304         return rec;
5305 }
5306
5307 static int
5308 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5309                            struct btrfs_key *key, struct extent_buffer *eb,
5310                            int slot)
5311 {
5312         struct device_extent_record *rec;
5313         int ret;
5314
5315         rec = btrfs_new_device_extent_record(eb, key, slot);
5316         ret = insert_device_extent_record(dev_extent_cache, rec);
5317         if (ret) {
5318                 fprintf(stderr,
5319                         "Device extent[%llu, %llu, %llu] existed.\n",
5320                         rec->objectid, rec->offset, rec->length);
5321                 free(rec);
5322         }
5323
5324         return ret;
5325 }
5326
5327 static int process_extent_item(struct btrfs_root *root,
5328                                struct cache_tree *extent_cache,
5329                                struct extent_buffer *eb, int slot)
5330 {
5331         struct btrfs_extent_item *ei;
5332         struct btrfs_extent_inline_ref *iref;
5333         struct btrfs_extent_data_ref *dref;
5334         struct btrfs_shared_data_ref *sref;
5335         struct btrfs_key key;
5336         struct extent_record tmpl;
5337         unsigned long end;
5338         unsigned long ptr;
5339         int ret;
5340         int type;
5341         u32 item_size = btrfs_item_size_nr(eb, slot);
5342         u64 refs = 0;
5343         u64 offset;
5344         u64 num_bytes;
5345         int metadata = 0;
5346
5347         btrfs_item_key_to_cpu(eb, &key, slot);
5348
5349         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5350                 metadata = 1;
5351                 num_bytes = root->nodesize;
5352         } else {
5353                 num_bytes = key.offset;
5354         }
5355
5356         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5357                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5358                       key.objectid, root->sectorsize);
5359                 return -EIO;
5360         }
5361         if (item_size < sizeof(*ei)) {
5362 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5363                 struct btrfs_extent_item_v0 *ei0;
5364                 BUG_ON(item_size != sizeof(*ei0));
5365                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5366                 refs = btrfs_extent_refs_v0(eb, ei0);
5367 #else
5368                 BUG();
5369 #endif
5370                 memset(&tmpl, 0, sizeof(tmpl));
5371                 tmpl.start = key.objectid;
5372                 tmpl.nr = num_bytes;
5373                 tmpl.extent_item_refs = refs;
5374                 tmpl.metadata = metadata;
5375                 tmpl.found_rec = 1;
5376                 tmpl.max_size = num_bytes;
5377
5378                 return add_extent_rec(extent_cache, &tmpl);
5379         }
5380
5381         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5382         refs = btrfs_extent_refs(eb, ei);
5383         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5384                 metadata = 1;
5385         else
5386                 metadata = 0;
5387         if (metadata && num_bytes != root->nodesize) {
5388                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5389                       num_bytes, root->nodesize);
5390                 return -EIO;
5391         }
5392         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5393                 error("ignore invalid data extent, length %llu is not aligned to %u",
5394                       num_bytes, root->sectorsize);
5395                 return -EIO;
5396         }
5397
5398         memset(&tmpl, 0, sizeof(tmpl));
5399         tmpl.start = key.objectid;
5400         tmpl.nr = num_bytes;
5401         tmpl.extent_item_refs = refs;
5402         tmpl.metadata = metadata;
5403         tmpl.found_rec = 1;
5404         tmpl.max_size = num_bytes;
5405         add_extent_rec(extent_cache, &tmpl);
5406
5407         ptr = (unsigned long)(ei + 1);
5408         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5409             key.type == BTRFS_EXTENT_ITEM_KEY)
5410                 ptr += sizeof(struct btrfs_tree_block_info);
5411
5412         end = (unsigned long)ei + item_size;
5413         while (ptr < end) {
5414                 iref = (struct btrfs_extent_inline_ref *)ptr;
5415                 type = btrfs_extent_inline_ref_type(eb, iref);
5416                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5417                 switch (type) {
5418                 case BTRFS_TREE_BLOCK_REF_KEY:
5419                         ret = add_tree_backref(extent_cache, key.objectid,
5420                                         0, offset, 0);
5421                         if (ret < 0)
5422                                 error("add_tree_backref failed: %s",
5423                                       strerror(-ret));
5424                         break;
5425                 case BTRFS_SHARED_BLOCK_REF_KEY:
5426                         ret = add_tree_backref(extent_cache, key.objectid,
5427                                         offset, 0, 0);
5428                         if (ret < 0)
5429                                 error("add_tree_backref failed: %s",
5430                                       strerror(-ret));
5431                         break;
5432                 case BTRFS_EXTENT_DATA_REF_KEY:
5433                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5434                         add_data_backref(extent_cache, key.objectid, 0,
5435                                         btrfs_extent_data_ref_root(eb, dref),
5436                                         btrfs_extent_data_ref_objectid(eb,
5437                                                                        dref),
5438                                         btrfs_extent_data_ref_offset(eb, dref),
5439                                         btrfs_extent_data_ref_count(eb, dref),
5440                                         0, num_bytes);
5441                         break;
5442                 case BTRFS_SHARED_DATA_REF_KEY:
5443                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5444                         add_data_backref(extent_cache, key.objectid, offset,
5445                                         0, 0, 0,
5446                                         btrfs_shared_data_ref_count(eb, sref),
5447                                         0, num_bytes);
5448                         break;
5449                 default:
5450                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5451                                 key.objectid, key.type, num_bytes);
5452                         goto out;
5453                 }
5454                 ptr += btrfs_extent_inline_ref_size(type);
5455         }
5456         WARN_ON(ptr > end);
5457 out:
5458         return 0;
5459 }
5460
5461 static int check_cache_range(struct btrfs_root *root,
5462                              struct btrfs_block_group_cache *cache,
5463                              u64 offset, u64 bytes)
5464 {
5465         struct btrfs_free_space *entry;
5466         u64 *logical;
5467         u64 bytenr;
5468         int stripe_len;
5469         int i, nr, ret;
5470
5471         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5472                 bytenr = btrfs_sb_offset(i);
5473                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5474                                        cache->key.objectid, bytenr, 0,
5475                                        &logical, &nr, &stripe_len);
5476                 if (ret)
5477                         return ret;
5478
5479                 while (nr--) {
5480                         if (logical[nr] + stripe_len <= offset)
5481                                 continue;
5482                         if (offset + bytes <= logical[nr])
5483                                 continue;
5484                         if (logical[nr] == offset) {
5485                                 if (stripe_len >= bytes) {
5486                                         free(logical);
5487                                         return 0;
5488                                 }
5489                                 bytes -= stripe_len;
5490                                 offset += stripe_len;
5491                         } else if (logical[nr] < offset) {
5492                                 if (logical[nr] + stripe_len >=
5493                                     offset + bytes) {
5494                                         free(logical);
5495                                         return 0;
5496                                 }
5497                                 bytes = (offset + bytes) -
5498                                         (logical[nr] + stripe_len);
5499                                 offset = logical[nr] + stripe_len;
5500                         } else {
5501                                 /*
5502                                  * Could be tricky, the super may land in the
5503                                  * middle of the area we're checking.  First
5504                                  * check the easiest case, it's at the end.
5505                                  */
5506                                 if (logical[nr] + stripe_len >=
5507                                     bytes + offset) {
5508                                         bytes = logical[nr] - offset;
5509                                         continue;
5510                                 }
5511
5512                                 /* Check the left side */
5513                                 ret = check_cache_range(root, cache,
5514                                                         offset,
5515                                                         logical[nr] - offset);
5516                                 if (ret) {
5517                                         free(logical);
5518                                         return ret;
5519                                 }
5520
5521                                 /* Now we continue with the right side */
5522                                 bytes = (offset + bytes) -
5523                                         (logical[nr] + stripe_len);
5524                                 offset = logical[nr] + stripe_len;
5525                         }
5526                 }
5527
5528                 free(logical);
5529         }
5530
5531         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5532         if (!entry) {
5533                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5534                         offset, offset+bytes);
5535                 return -EINVAL;
5536         }
5537
5538         if (entry->offset != offset) {
5539                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5540                         entry->offset);
5541                 return -EINVAL;
5542         }
5543
5544         if (entry->bytes != bytes) {
5545                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5546                         bytes, entry->bytes, offset);
5547                 return -EINVAL;
5548         }
5549
5550         unlink_free_space(cache->free_space_ctl, entry);
5551         free(entry);
5552         return 0;
5553 }
5554
5555 static int verify_space_cache(struct btrfs_root *root,
5556                               struct btrfs_block_group_cache *cache)
5557 {
5558         struct btrfs_path *path;
5559         struct extent_buffer *leaf;
5560         struct btrfs_key key;
5561         u64 last;
5562         int ret = 0;
5563
5564         path = btrfs_alloc_path();
5565         if (!path)
5566                 return -ENOMEM;
5567
5568         root = root->fs_info->extent_root;
5569
5570         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5571
5572         key.objectid = last;
5573         key.offset = 0;
5574         key.type = BTRFS_EXTENT_ITEM_KEY;
5575
5576         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5577         if (ret < 0)
5578                 goto out;
5579         ret = 0;
5580         while (1) {
5581                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5582                         ret = btrfs_next_leaf(root, path);
5583                         if (ret < 0)
5584                                 goto out;
5585                         if (ret > 0) {
5586                                 ret = 0;
5587                                 break;
5588                         }
5589                 }
5590                 leaf = path->nodes[0];
5591                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5592                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5593                         break;
5594                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5595                     key.type != BTRFS_METADATA_ITEM_KEY) {
5596                         path->slots[0]++;
5597                         continue;
5598                 }
5599
5600                 if (last == key.objectid) {
5601                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5602                                 last = key.objectid + key.offset;
5603                         else
5604                                 last = key.objectid + root->nodesize;
5605                         path->slots[0]++;
5606                         continue;
5607                 }
5608
5609                 ret = check_cache_range(root, cache, last,
5610                                         key.objectid - last);
5611                 if (ret)
5612                         break;
5613                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5614                         last = key.objectid + key.offset;
5615                 else
5616                         last = key.objectid + root->nodesize;
5617                 path->slots[0]++;
5618         }
5619
5620         if (last < cache->key.objectid + cache->key.offset)
5621                 ret = check_cache_range(root, cache, last,
5622                                         cache->key.objectid +
5623                                         cache->key.offset - last);
5624
5625 out:
5626         btrfs_free_path(path);
5627
5628         if (!ret &&
5629             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5630                 fprintf(stderr, "There are still entries left in the space "
5631                         "cache\n");
5632                 ret = -EINVAL;
5633         }
5634
5635         return ret;
5636 }
5637
5638 static int check_space_cache(struct btrfs_root *root)
5639 {
5640         struct btrfs_block_group_cache *cache;
5641         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5642         int ret;
5643         int error = 0;
5644
5645         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5646             btrfs_super_generation(root->fs_info->super_copy) !=
5647             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5648                 printf("cache and super generation don't match, space cache "
5649                        "will be invalidated\n");
5650                 return 0;
5651         }
5652
5653         if (ctx.progress_enabled) {
5654                 ctx.tp = TASK_FREE_SPACE;
5655                 task_start(ctx.info);
5656         }
5657
5658         while (1) {
5659                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5660                 if (!cache)
5661                         break;
5662
5663                 start = cache->key.objectid + cache->key.offset;
5664                 if (!cache->free_space_ctl) {
5665                         if (btrfs_init_free_space_ctl(cache,
5666                                                       root->sectorsize)) {
5667                                 ret = -ENOMEM;
5668                                 break;
5669                         }
5670                 } else {
5671                         btrfs_remove_free_space_cache(cache);
5672                 }
5673
5674                 if (btrfs_fs_compat_ro(root->fs_info,
5675                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5676                         ret = exclude_super_stripes(root, cache);
5677                         if (ret) {
5678                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5679                                         strerror(-ret));
5680                                 error++;
5681                                 continue;
5682                         }
5683                         ret = load_free_space_tree(root->fs_info, cache);
5684                         free_excluded_extents(root, cache);
5685                         if (ret < 0) {
5686                                 fprintf(stderr, "could not load free space tree: %s\n",
5687                                         strerror(-ret));
5688                                 error++;
5689                                 continue;
5690                         }
5691                         error += ret;
5692                 } else {
5693                         ret = load_free_space_cache(root->fs_info, cache);
5694                         if (!ret)
5695                                 continue;
5696                 }
5697
5698                 ret = verify_space_cache(root, cache);
5699                 if (ret) {
5700                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5701                                 cache->key.objectid);
5702                         error++;
5703                 }
5704         }
5705
5706         task_stop(ctx.info);
5707
5708         return error ? -EINVAL : 0;
5709 }
5710
5711 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5712                         u64 num_bytes, unsigned long leaf_offset,
5713                         struct extent_buffer *eb) {
5714
5715         u64 offset = 0;
5716         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5717         char *data;
5718         unsigned long csum_offset;
5719         u32 csum;
5720         u32 csum_expected;
5721         u64 read_len;
5722         u64 data_checked = 0;
5723         u64 tmp;
5724         int ret = 0;
5725         int mirror;
5726         int num_copies;
5727
5728         if (num_bytes % root->sectorsize)
5729                 return -EINVAL;
5730
5731         data = malloc(num_bytes);
5732         if (!data)
5733                 return -ENOMEM;
5734
5735         while (offset < num_bytes) {
5736                 mirror = 0;
5737 again:
5738                 read_len = num_bytes - offset;
5739                 /* read as much space once a time */
5740                 ret = read_extent_data(root, data + offset,
5741                                 bytenr + offset, &read_len, mirror);
5742                 if (ret)
5743                         goto out;
5744                 data_checked = 0;
5745                 /* verify every 4k data's checksum */
5746                 while (data_checked < read_len) {
5747                         csum = ~(u32)0;
5748                         tmp = offset + data_checked;
5749
5750                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5751                                                csum, root->sectorsize);
5752                         btrfs_csum_final(csum, (char *)&csum);
5753
5754                         csum_offset = leaf_offset +
5755                                  tmp / root->sectorsize * csum_size;
5756                         read_extent_buffer(eb, (char *)&csum_expected,
5757                                            csum_offset, csum_size);
5758                         /* try another mirror */
5759                         if (csum != csum_expected) {
5760                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5761                                                 mirror, bytenr + tmp,
5762                                                 csum, csum_expected);
5763                                 num_copies = btrfs_num_copies(
5764                                                 &root->fs_info->mapping_tree,
5765                                                 bytenr, num_bytes);
5766                                 if (mirror < num_copies - 1) {
5767                                         mirror += 1;
5768                                         goto again;
5769                                 }
5770                         }
5771                         data_checked += root->sectorsize;
5772                 }
5773                 offset += read_len;
5774         }
5775 out:
5776         free(data);
5777         return ret;
5778 }
5779
5780 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5781                                u64 num_bytes)
5782 {
5783         struct btrfs_path *path;
5784         struct extent_buffer *leaf;
5785         struct btrfs_key key;
5786         int ret;
5787
5788         path = btrfs_alloc_path();
5789         if (!path) {
5790                 fprintf(stderr, "Error allocating path\n");
5791                 return -ENOMEM;
5792         }
5793
5794         key.objectid = bytenr;
5795         key.type = BTRFS_EXTENT_ITEM_KEY;
5796         key.offset = (u64)-1;
5797
5798 again:
5799         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5800                                 0, 0);
5801         if (ret < 0) {
5802                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5803                 btrfs_free_path(path);
5804                 return ret;
5805         } else if (ret) {
5806                 if (path->slots[0] > 0) {
5807                         path->slots[0]--;
5808                 } else {
5809                         ret = btrfs_prev_leaf(root, path);
5810                         if (ret < 0) {
5811                                 goto out;
5812                         } else if (ret > 0) {
5813                                 ret = 0;
5814                                 goto out;
5815                         }
5816                 }
5817         }
5818
5819         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5820
5821         /*
5822          * Block group items come before extent items if they have the same
5823          * bytenr, so walk back one more just in case.  Dear future traveller,
5824          * first congrats on mastering time travel.  Now if it's not too much
5825          * trouble could you go back to 2006 and tell Chris to make the
5826          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5827          * EXTENT_ITEM_KEY please?
5828          */
5829         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5830                 if (path->slots[0] > 0) {
5831                         path->slots[0]--;
5832                 } else {
5833                         ret = btrfs_prev_leaf(root, path);
5834                         if (ret < 0) {
5835                                 goto out;
5836                         } else if (ret > 0) {
5837                                 ret = 0;
5838                                 goto out;
5839                         }
5840                 }
5841                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5842         }
5843
5844         while (num_bytes) {
5845                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5846                         ret = btrfs_next_leaf(root, path);
5847                         if (ret < 0) {
5848                                 fprintf(stderr, "Error going to next leaf "
5849                                         "%d\n", ret);
5850                                 btrfs_free_path(path);
5851                                 return ret;
5852                         } else if (ret) {
5853                                 break;
5854                         }
5855                 }
5856                 leaf = path->nodes[0];
5857                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5858                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5859                         path->slots[0]++;
5860                         continue;
5861                 }
5862                 if (key.objectid + key.offset < bytenr) {
5863                         path->slots[0]++;
5864                         continue;
5865                 }
5866                 if (key.objectid > bytenr + num_bytes)
5867                         break;
5868
5869                 if (key.objectid == bytenr) {
5870                         if (key.offset >= num_bytes) {
5871                                 num_bytes = 0;
5872                                 break;
5873                         }
5874                         num_bytes -= key.offset;
5875                         bytenr += key.offset;
5876                 } else if (key.objectid < bytenr) {
5877                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5878                                 num_bytes = 0;
5879                                 break;
5880                         }
5881                         num_bytes = (bytenr + num_bytes) -
5882                                 (key.objectid + key.offset);
5883                         bytenr = key.objectid + key.offset;
5884                 } else {
5885                         if (key.objectid + key.offset < bytenr + num_bytes) {
5886                                 u64 new_start = key.objectid + key.offset;
5887                                 u64 new_bytes = bytenr + num_bytes - new_start;
5888
5889                                 /*
5890                                  * Weird case, the extent is in the middle of
5891                                  * our range, we'll have to search one side
5892                                  * and then the other.  Not sure if this happens
5893                                  * in real life, but no harm in coding it up
5894                                  * anyway just in case.
5895                                  */
5896                                 btrfs_release_path(path);
5897                                 ret = check_extent_exists(root, new_start,
5898                                                           new_bytes);
5899                                 if (ret) {
5900                                         fprintf(stderr, "Right section didn't "
5901                                                 "have a record\n");
5902                                         break;
5903                                 }
5904                                 num_bytes = key.objectid - bytenr;
5905                                 goto again;
5906                         }
5907                         num_bytes = key.objectid - bytenr;
5908                 }
5909                 path->slots[0]++;
5910         }
5911         ret = 0;
5912
5913 out:
5914         if (num_bytes && !ret) {
5915                 fprintf(stderr, "There are no extents for csum range "
5916                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5917                 ret = 1;
5918         }
5919
5920         btrfs_free_path(path);
5921         return ret;
5922 }
5923
5924 static int check_csums(struct btrfs_root *root)
5925 {
5926         struct btrfs_path *path;
5927         struct extent_buffer *leaf;
5928         struct btrfs_key key;
5929         u64 offset = 0, num_bytes = 0;
5930         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5931         int errors = 0;
5932         int ret;
5933         u64 data_len;
5934         unsigned long leaf_offset;
5935
5936         root = root->fs_info->csum_root;
5937         if (!extent_buffer_uptodate(root->node)) {
5938                 fprintf(stderr, "No valid csum tree found\n");
5939                 return -ENOENT;
5940         }
5941
5942         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5943         key.type = BTRFS_EXTENT_CSUM_KEY;
5944         key.offset = 0;
5945
5946         path = btrfs_alloc_path();
5947         if (!path)
5948                 return -ENOMEM;
5949
5950         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5951         if (ret < 0) {
5952                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5953                 btrfs_free_path(path);
5954                 return ret;
5955         }
5956
5957         if (ret > 0 && path->slots[0])
5958                 path->slots[0]--;
5959         ret = 0;
5960
5961         while (1) {
5962                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5963                         ret = btrfs_next_leaf(root, path);
5964                         if (ret < 0) {
5965                                 fprintf(stderr, "Error going to next leaf "
5966                                         "%d\n", ret);
5967                                 break;
5968                         }
5969                         if (ret)
5970                                 break;
5971                 }
5972                 leaf = path->nodes[0];
5973
5974                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5975                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5976                         path->slots[0]++;
5977                         continue;
5978                 }
5979
5980                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5981                               csum_size) * root->sectorsize;
5982                 if (!check_data_csum)
5983                         goto skip_csum_check;
5984                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5985                 ret = check_extent_csums(root, key.offset, data_len,
5986                                          leaf_offset, leaf);
5987                 if (ret)
5988                         break;
5989 skip_csum_check:
5990                 if (!num_bytes) {
5991                         offset = key.offset;
5992                 } else if (key.offset != offset + num_bytes) {
5993                         ret = check_extent_exists(root, offset, num_bytes);
5994                         if (ret) {
5995                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5996                                         "there is no extent record\n",
5997                                         offset, offset+num_bytes);
5998                                 errors++;
5999                         }
6000                         offset = key.offset;
6001                         num_bytes = 0;
6002                 }
6003                 num_bytes += data_len;
6004                 path->slots[0]++;
6005         }
6006
6007         btrfs_free_path(path);
6008         return errors;
6009 }
6010
6011 static int is_dropped_key(struct btrfs_key *key,
6012                           struct btrfs_key *drop_key) {
6013         if (key->objectid < drop_key->objectid)
6014                 return 1;
6015         else if (key->objectid == drop_key->objectid) {
6016                 if (key->type < drop_key->type)
6017                         return 1;
6018                 else if (key->type == drop_key->type) {
6019                         if (key->offset < drop_key->offset)
6020                                 return 1;
6021                 }
6022         }
6023         return 0;
6024 }
6025
6026 /*
6027  * Here are the rules for FULL_BACKREF.
6028  *
6029  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6030  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6031  *      FULL_BACKREF set.
6032  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6033  *    if it happened after the relocation occurred since we'll have dropped the
6034  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6035  *    have no real way to know for sure.
6036  *
6037  * We process the blocks one root at a time, and we start from the lowest root
6038  * objectid and go to the highest.  So we can just lookup the owner backref for
6039  * the record and if we don't find it then we know it doesn't exist and we have
6040  * a FULL BACKREF.
6041  *
6042  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6043  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6044  * be set or not and then we can check later once we've gathered all the refs.
6045  */
6046 static int calc_extent_flag(struct btrfs_root *root,
6047                            struct cache_tree *extent_cache,
6048                            struct extent_buffer *buf,
6049                            struct root_item_record *ri,
6050                            u64 *flags)
6051 {
6052         struct extent_record *rec;
6053         struct cache_extent *cache;
6054         struct tree_backref *tback;
6055         u64 owner = 0;
6056
6057         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6058         /* we have added this extent before */
6059         if (!cache)
6060                 return -ENOENT;
6061
6062         rec = container_of(cache, struct extent_record, cache);
6063
6064         /*
6065          * Except file/reloc tree, we can not have
6066          * FULL BACKREF MODE
6067          */
6068         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6069                 goto normal;
6070         /*
6071          * root node
6072          */
6073         if (buf->start == ri->bytenr)
6074                 goto normal;
6075
6076         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6077                 goto full_backref;
6078
6079         owner = btrfs_header_owner(buf);
6080         if (owner == ri->objectid)
6081                 goto normal;
6082
6083         tback = find_tree_backref(rec, 0, owner);
6084         if (!tback)
6085                 goto full_backref;
6086 normal:
6087         *flags = 0;
6088         if (rec->flag_block_full_backref != FLAG_UNSET &&
6089             rec->flag_block_full_backref != 0)
6090                 rec->bad_full_backref = 1;
6091         return 0;
6092 full_backref:
6093         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6094         if (rec->flag_block_full_backref != FLAG_UNSET &&
6095             rec->flag_block_full_backref != 1)
6096                 rec->bad_full_backref = 1;
6097         return 0;
6098 }
6099
6100 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6101 {
6102         fprintf(stderr, "Invalid key type(");
6103         print_key_type(stderr, 0, key_type);
6104         fprintf(stderr, ") found in root(");
6105         print_objectid(stderr, rootid, 0);
6106         fprintf(stderr, ")\n");
6107 }
6108
6109 /*
6110  * Check if the key is valid with its extent buffer.
6111  *
6112  * This is a early check in case invalid key exists in a extent buffer
6113  * This is not comprehensive yet, but should prevent wrong key/item passed
6114  * further
6115  */
6116 static int check_type_with_root(u64 rootid, u8 key_type)
6117 {
6118         switch (key_type) {
6119         /* Only valid in chunk tree */
6120         case BTRFS_DEV_ITEM_KEY:
6121         case BTRFS_CHUNK_ITEM_KEY:
6122                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6123                         goto err;
6124                 break;
6125         /* valid in csum and log tree */
6126         case BTRFS_CSUM_TREE_OBJECTID:
6127                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6128                       is_fstree(rootid)))
6129                         goto err;
6130                 break;
6131         case BTRFS_EXTENT_ITEM_KEY:
6132         case BTRFS_METADATA_ITEM_KEY:
6133         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6134                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6135                         goto err;
6136                 break;
6137         case BTRFS_ROOT_ITEM_KEY:
6138                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6139                         goto err;
6140                 break;
6141         case BTRFS_DEV_EXTENT_KEY:
6142                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6143                         goto err;
6144                 break;
6145         }
6146         return 0;
6147 err:
6148         report_mismatch_key_root(key_type, rootid);
6149         return -EINVAL;
6150 }
6151
6152 static int run_next_block(struct btrfs_root *root,
6153                           struct block_info *bits,
6154                           int bits_nr,
6155                           u64 *last,
6156                           struct cache_tree *pending,
6157                           struct cache_tree *seen,
6158                           struct cache_tree *reada,
6159                           struct cache_tree *nodes,
6160                           struct cache_tree *extent_cache,
6161                           struct cache_tree *chunk_cache,
6162                           struct rb_root *dev_cache,
6163                           struct block_group_tree *block_group_cache,
6164                           struct device_extent_tree *dev_extent_cache,
6165                           struct root_item_record *ri)
6166 {
6167         struct extent_buffer *buf;
6168         struct extent_record *rec = NULL;
6169         u64 bytenr;
6170         u32 size;
6171         u64 parent;
6172         u64 owner;
6173         u64 flags;
6174         u64 ptr;
6175         u64 gen = 0;
6176         int ret = 0;
6177         int i;
6178         int nritems;
6179         struct btrfs_key key;
6180         struct cache_extent *cache;
6181         int reada_bits;
6182
6183         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6184                                     bits_nr, &reada_bits);
6185         if (nritems == 0)
6186                 return 1;
6187
6188         if (!reada_bits) {
6189                 for(i = 0; i < nritems; i++) {
6190                         ret = add_cache_extent(reada, bits[i].start,
6191                                                bits[i].size);
6192                         if (ret == -EEXIST)
6193                                 continue;
6194
6195                         /* fixme, get the parent transid */
6196                         readahead_tree_block(root, bits[i].start,
6197                                              bits[i].size, 0);
6198                 }
6199         }
6200         *last = bits[0].start;
6201         bytenr = bits[0].start;
6202         size = bits[0].size;
6203
6204         cache = lookup_cache_extent(pending, bytenr, size);
6205         if (cache) {
6206                 remove_cache_extent(pending, cache);
6207                 free(cache);
6208         }
6209         cache = lookup_cache_extent(reada, bytenr, size);
6210         if (cache) {
6211                 remove_cache_extent(reada, cache);
6212                 free(cache);
6213         }
6214         cache = lookup_cache_extent(nodes, bytenr, size);
6215         if (cache) {
6216                 remove_cache_extent(nodes, cache);
6217                 free(cache);
6218         }
6219         cache = lookup_cache_extent(extent_cache, bytenr, size);
6220         if (cache) {
6221                 rec = container_of(cache, struct extent_record, cache);
6222                 gen = rec->parent_generation;
6223         }
6224
6225         /* fixme, get the real parent transid */
6226         buf = read_tree_block(root, bytenr, size, gen);
6227         if (!extent_buffer_uptodate(buf)) {
6228                 record_bad_block_io(root->fs_info,
6229                                     extent_cache, bytenr, size);
6230                 goto out;
6231         }
6232
6233         nritems = btrfs_header_nritems(buf);
6234
6235         flags = 0;
6236         if (!init_extent_tree) {
6237                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6238                                        btrfs_header_level(buf), 1, NULL,
6239                                        &flags);
6240                 if (ret < 0) {
6241                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6242                         if (ret < 0) {
6243                                 fprintf(stderr, "Couldn't calc extent flags\n");
6244                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6245                         }
6246                 }
6247         } else {
6248                 flags = 0;
6249                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6250                 if (ret < 0) {
6251                         fprintf(stderr, "Couldn't calc extent flags\n");
6252                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6253                 }
6254         }
6255
6256         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6257                 if (ri != NULL &&
6258                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6259                     ri->objectid == btrfs_header_owner(buf)) {
6260                         /*
6261                          * Ok we got to this block from it's original owner and
6262                          * we have FULL_BACKREF set.  Relocation can leave
6263                          * converted blocks over so this is altogether possible,
6264                          * however it's not possible if the generation > the
6265                          * last snapshot, so check for this case.
6266                          */
6267                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6268                             btrfs_header_generation(buf) > ri->last_snapshot) {
6269                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6270                                 rec->bad_full_backref = 1;
6271                         }
6272                 }
6273         } else {
6274                 if (ri != NULL &&
6275                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6276                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6277                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6278                         rec->bad_full_backref = 1;
6279                 }
6280         }
6281
6282         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6283                 rec->flag_block_full_backref = 1;
6284                 parent = bytenr;
6285                 owner = 0;
6286         } else {
6287                 rec->flag_block_full_backref = 0;
6288                 parent = 0;
6289                 owner = btrfs_header_owner(buf);
6290         }
6291
6292         ret = check_block(root, extent_cache, buf, flags);
6293         if (ret)
6294                 goto out;
6295
6296         if (btrfs_is_leaf(buf)) {
6297                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6298                 for (i = 0; i < nritems; i++) {
6299                         struct btrfs_file_extent_item *fi;
6300                         btrfs_item_key_to_cpu(buf, &key, i);
6301                         /*
6302                          * Check key type against the leaf owner.
6303                          * Could filter quite a lot of early error if
6304                          * owner is correct
6305                          */
6306                         if (check_type_with_root(btrfs_header_owner(buf),
6307                                                  key.type)) {
6308                                 fprintf(stderr, "ignoring invalid key\n");
6309                                 continue;
6310                         }
6311                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6312                                 process_extent_item(root, extent_cache, buf,
6313                                                     i);
6314                                 continue;
6315                         }
6316                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6317                                 process_extent_item(root, extent_cache, buf,
6318                                                     i);
6319                                 continue;
6320                         }
6321                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6322                                 total_csum_bytes +=
6323                                         btrfs_item_size_nr(buf, i);
6324                                 continue;
6325                         }
6326                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6327                                 process_chunk_item(chunk_cache, &key, buf, i);
6328                                 continue;
6329                         }
6330                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6331                                 process_device_item(dev_cache, &key, buf, i);
6332                                 continue;
6333                         }
6334                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6335                                 process_block_group_item(block_group_cache,
6336                                         &key, buf, i);
6337                                 continue;
6338                         }
6339                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6340                                 process_device_extent_item(dev_extent_cache,
6341                                         &key, buf, i);
6342                                 continue;
6343
6344                         }
6345                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6346 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6347                                 process_extent_ref_v0(extent_cache, buf, i);
6348 #else
6349                                 BUG();
6350 #endif
6351                                 continue;
6352                         }
6353
6354                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6355                                 ret = add_tree_backref(extent_cache,
6356                                                 key.objectid, 0, key.offset, 0);
6357                                 if (ret < 0)
6358                                         error("add_tree_backref failed: %s",
6359                                               strerror(-ret));
6360                                 continue;
6361                         }
6362                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6363                                 ret = add_tree_backref(extent_cache,
6364                                                 key.objectid, key.offset, 0, 0);
6365                                 if (ret < 0)
6366                                         error("add_tree_backref failed: %s",
6367                                               strerror(-ret));
6368                                 continue;
6369                         }
6370                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6371                                 struct btrfs_extent_data_ref *ref;
6372                                 ref = btrfs_item_ptr(buf, i,
6373                                                 struct btrfs_extent_data_ref);
6374                                 add_data_backref(extent_cache,
6375                                         key.objectid, 0,
6376                                         btrfs_extent_data_ref_root(buf, ref),
6377                                         btrfs_extent_data_ref_objectid(buf,
6378                                                                        ref),
6379                                         btrfs_extent_data_ref_offset(buf, ref),
6380                                         btrfs_extent_data_ref_count(buf, ref),
6381                                         0, root->sectorsize);
6382                                 continue;
6383                         }
6384                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6385                                 struct btrfs_shared_data_ref *ref;
6386                                 ref = btrfs_item_ptr(buf, i,
6387                                                 struct btrfs_shared_data_ref);
6388                                 add_data_backref(extent_cache,
6389                                         key.objectid, key.offset, 0, 0, 0,
6390                                         btrfs_shared_data_ref_count(buf, ref),
6391                                         0, root->sectorsize);
6392                                 continue;
6393                         }
6394                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6395                                 struct bad_item *bad;
6396
6397                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6398                                         continue;
6399                                 if (!owner)
6400                                         continue;
6401                                 bad = malloc(sizeof(struct bad_item));
6402                                 if (!bad)
6403                                         continue;
6404                                 INIT_LIST_HEAD(&bad->list);
6405                                 memcpy(&bad->key, &key,
6406                                        sizeof(struct btrfs_key));
6407                                 bad->root_id = owner;
6408                                 list_add_tail(&bad->list, &delete_items);
6409                                 continue;
6410                         }
6411                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6412                                 continue;
6413                         fi = btrfs_item_ptr(buf, i,
6414                                             struct btrfs_file_extent_item);
6415                         if (btrfs_file_extent_type(buf, fi) ==
6416                             BTRFS_FILE_EXTENT_INLINE)
6417                                 continue;
6418                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6419                                 continue;
6420
6421                         data_bytes_allocated +=
6422                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6423                         if (data_bytes_allocated < root->sectorsize) {
6424                                 abort();
6425                         }
6426                         data_bytes_referenced +=
6427                                 btrfs_file_extent_num_bytes(buf, fi);
6428                         add_data_backref(extent_cache,
6429                                 btrfs_file_extent_disk_bytenr(buf, fi),
6430                                 parent, owner, key.objectid, key.offset -
6431                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6432                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6433                 }
6434         } else {
6435                 int level;
6436                 struct btrfs_key first_key;
6437
6438                 first_key.objectid = 0;
6439
6440                 if (nritems > 0)
6441                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6442                 level = btrfs_header_level(buf);
6443                 for (i = 0; i < nritems; i++) {
6444                         struct extent_record tmpl;
6445
6446                         ptr = btrfs_node_blockptr(buf, i);
6447                         size = root->nodesize;
6448                         btrfs_node_key_to_cpu(buf, &key, i);
6449                         if (ri != NULL) {
6450                                 if ((level == ri->drop_level)
6451                                     && is_dropped_key(&key, &ri->drop_key)) {
6452                                         continue;
6453                                 }
6454                         }
6455
6456                         memset(&tmpl, 0, sizeof(tmpl));
6457                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6458                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6459                         tmpl.start = ptr;
6460                         tmpl.nr = size;
6461                         tmpl.refs = 1;
6462                         tmpl.metadata = 1;
6463                         tmpl.max_size = size;
6464                         ret = add_extent_rec(extent_cache, &tmpl);
6465                         if (ret < 0)
6466                                 goto out;
6467
6468                         ret = add_tree_backref(extent_cache, ptr, parent,
6469                                         owner, 1);
6470                         if (ret < 0) {
6471                                 error("add_tree_backref failed: %s",
6472                                       strerror(-ret));
6473                                 continue;
6474                         }
6475
6476                         if (level > 1) {
6477                                 add_pending(nodes, seen, ptr, size);
6478                         } else {
6479                                 add_pending(pending, seen, ptr, size);
6480                         }
6481                 }
6482                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6483                                       nritems) * sizeof(struct btrfs_key_ptr);
6484         }
6485         total_btree_bytes += buf->len;
6486         if (fs_root_objectid(btrfs_header_owner(buf)))
6487                 total_fs_tree_bytes += buf->len;
6488         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6489                 total_extent_tree_bytes += buf->len;
6490         if (!found_old_backref &&
6491             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6492             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6493             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6494                 found_old_backref = 1;
6495 out:
6496         free_extent_buffer(buf);
6497         return ret;
6498 }
6499
6500 static int add_root_to_pending(struct extent_buffer *buf,
6501                                struct cache_tree *extent_cache,
6502                                struct cache_tree *pending,
6503                                struct cache_tree *seen,
6504                                struct cache_tree *nodes,
6505                                u64 objectid)
6506 {
6507         struct extent_record tmpl;
6508         int ret;
6509
6510         if (btrfs_header_level(buf) > 0)
6511                 add_pending(nodes, seen, buf->start, buf->len);
6512         else
6513                 add_pending(pending, seen, buf->start, buf->len);
6514
6515         memset(&tmpl, 0, sizeof(tmpl));
6516         tmpl.start = buf->start;
6517         tmpl.nr = buf->len;
6518         tmpl.is_root = 1;
6519         tmpl.refs = 1;
6520         tmpl.metadata = 1;
6521         tmpl.max_size = buf->len;
6522         add_extent_rec(extent_cache, &tmpl);
6523
6524         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6525             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6526                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6527                                 0, 1);
6528         else
6529                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6530                                 1);
6531         return ret;
6532 }
6533
6534 /* as we fix the tree, we might be deleting blocks that
6535  * we're tracking for repair.  This hook makes sure we
6536  * remove any backrefs for blocks as we are fixing them.
6537  */
6538 static int free_extent_hook(struct btrfs_trans_handle *trans,
6539                             struct btrfs_root *root,
6540                             u64 bytenr, u64 num_bytes, u64 parent,
6541                             u64 root_objectid, u64 owner, u64 offset,
6542                             int refs_to_drop)
6543 {
6544         struct extent_record *rec;
6545         struct cache_extent *cache;
6546         int is_data;
6547         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6548
6549         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6550         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6551         if (!cache)
6552                 return 0;
6553
6554         rec = container_of(cache, struct extent_record, cache);
6555         if (is_data) {
6556                 struct data_backref *back;
6557                 back = find_data_backref(rec, parent, root_objectid, owner,
6558                                          offset, 1, bytenr, num_bytes);
6559                 if (!back)
6560                         goto out;
6561                 if (back->node.found_ref) {
6562                         back->found_ref -= refs_to_drop;
6563                         if (rec->refs)
6564                                 rec->refs -= refs_to_drop;
6565                 }
6566                 if (back->node.found_extent_tree) {
6567                         back->num_refs -= refs_to_drop;
6568                         if (rec->extent_item_refs)
6569                                 rec->extent_item_refs -= refs_to_drop;
6570                 }
6571                 if (back->found_ref == 0)
6572                         back->node.found_ref = 0;
6573                 if (back->num_refs == 0)
6574                         back->node.found_extent_tree = 0;
6575
6576                 if (!back->node.found_extent_tree && back->node.found_ref) {
6577                         list_del(&back->node.list);
6578                         free(back);
6579                 }
6580         } else {
6581                 struct tree_backref *back;
6582                 back = find_tree_backref(rec, parent, root_objectid);
6583                 if (!back)
6584                         goto out;
6585                 if (back->node.found_ref) {
6586                         if (rec->refs)
6587                                 rec->refs--;
6588                         back->node.found_ref = 0;
6589                 }
6590                 if (back->node.found_extent_tree) {
6591                         if (rec->extent_item_refs)
6592                                 rec->extent_item_refs--;
6593                         back->node.found_extent_tree = 0;
6594                 }
6595                 if (!back->node.found_extent_tree && back->node.found_ref) {
6596                         list_del(&back->node.list);
6597                         free(back);
6598                 }
6599         }
6600         maybe_free_extent_rec(extent_cache, rec);
6601 out:
6602         return 0;
6603 }
6604
6605 static int delete_extent_records(struct btrfs_trans_handle *trans,
6606                                  struct btrfs_root *root,
6607                                  struct btrfs_path *path,
6608                                  u64 bytenr, u64 new_len)
6609 {
6610         struct btrfs_key key;
6611         struct btrfs_key found_key;
6612         struct extent_buffer *leaf;
6613         int ret;
6614         int slot;
6615
6616
6617         key.objectid = bytenr;
6618         key.type = (u8)-1;
6619         key.offset = (u64)-1;
6620
6621         while(1) {
6622                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6623                                         &key, path, 0, 1);
6624                 if (ret < 0)
6625                         break;
6626
6627                 if (ret > 0) {
6628                         ret = 0;
6629                         if (path->slots[0] == 0)
6630                                 break;
6631                         path->slots[0]--;
6632                 }
6633                 ret = 0;
6634
6635                 leaf = path->nodes[0];
6636                 slot = path->slots[0];
6637
6638                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6639                 if (found_key.objectid != bytenr)
6640                         break;
6641
6642                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6643                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6644                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6645                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6646                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6647                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6648                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6649                         btrfs_release_path(path);
6650                         if (found_key.type == 0) {
6651                                 if (found_key.offset == 0)
6652                                         break;
6653                                 key.offset = found_key.offset - 1;
6654                                 key.type = found_key.type;
6655                         }
6656                         key.type = found_key.type - 1;
6657                         key.offset = (u64)-1;
6658                         continue;
6659                 }
6660
6661                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6662                         found_key.objectid, found_key.type, found_key.offset);
6663
6664                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6665                 if (ret)
6666                         break;
6667                 btrfs_release_path(path);
6668
6669                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6670                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6671                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6672                                 found_key.offset : root->nodesize;
6673
6674                         ret = btrfs_update_block_group(trans, root, bytenr,
6675                                                        bytes, 0, 0);
6676                         if (ret)
6677                                 break;
6678                 }
6679         }
6680
6681         btrfs_release_path(path);
6682         return ret;
6683 }
6684
6685 /*
6686  * for a single backref, this will allocate a new extent
6687  * and add the backref to it.
6688  */
6689 static int record_extent(struct btrfs_trans_handle *trans,
6690                          struct btrfs_fs_info *info,
6691                          struct btrfs_path *path,
6692                          struct extent_record *rec,
6693                          struct extent_backref *back,
6694                          int allocated, u64 flags)
6695 {
6696         int ret;
6697         struct btrfs_root *extent_root = info->extent_root;
6698         struct extent_buffer *leaf;
6699         struct btrfs_key ins_key;
6700         struct btrfs_extent_item *ei;
6701         struct tree_backref *tback;
6702         struct data_backref *dback;
6703         struct btrfs_tree_block_info *bi;
6704
6705         if (!back->is_data)
6706                 rec->max_size = max_t(u64, rec->max_size,
6707                                     info->extent_root->nodesize);
6708
6709         if (!allocated) {
6710                 u32 item_size = sizeof(*ei);
6711
6712                 if (!back->is_data)
6713                         item_size += sizeof(*bi);
6714
6715                 ins_key.objectid = rec->start;
6716                 ins_key.offset = rec->max_size;
6717                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6718
6719                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6720                                         &ins_key, item_size);
6721                 if (ret)
6722                         goto fail;
6723
6724                 leaf = path->nodes[0];
6725                 ei = btrfs_item_ptr(leaf, path->slots[0],
6726                                     struct btrfs_extent_item);
6727
6728                 btrfs_set_extent_refs(leaf, ei, 0);
6729                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6730
6731                 if (back->is_data) {
6732                         btrfs_set_extent_flags(leaf, ei,
6733                                                BTRFS_EXTENT_FLAG_DATA);
6734                 } else {
6735                         struct btrfs_disk_key copy_key;;
6736
6737                         tback = to_tree_backref(back);
6738                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6739                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6740                                              sizeof(*bi));
6741
6742                         btrfs_set_disk_key_objectid(&copy_key,
6743                                                     rec->info_objectid);
6744                         btrfs_set_disk_key_type(&copy_key, 0);
6745                         btrfs_set_disk_key_offset(&copy_key, 0);
6746
6747                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6748                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6749
6750                         btrfs_set_extent_flags(leaf, ei,
6751                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6752                 }
6753
6754                 btrfs_mark_buffer_dirty(leaf);
6755                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6756                                                rec->max_size, 1, 0);
6757                 if (ret)
6758                         goto fail;
6759                 btrfs_release_path(path);
6760         }
6761
6762         if (back->is_data) {
6763                 u64 parent;
6764                 int i;
6765
6766                 dback = to_data_backref(back);
6767                 if (back->full_backref)
6768                         parent = dback->parent;
6769                 else
6770                         parent = 0;
6771
6772                 for (i = 0; i < dback->found_ref; i++) {
6773                         /* if parent != 0, we're doing a full backref
6774                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6775                          * just makes the backref allocator create a data
6776                          * backref
6777                          */
6778                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6779                                                    rec->start, rec->max_size,
6780                                                    parent,
6781                                                    dback->root,
6782                                                    parent ?
6783                                                    BTRFS_FIRST_FREE_OBJECTID :
6784                                                    dback->owner,
6785                                                    dback->offset);
6786                         if (ret)
6787                                 break;
6788                 }
6789                 fprintf(stderr, "adding new data backref"
6790                                 " on %llu %s %llu owner %llu"
6791                                 " offset %llu found %d\n",
6792                                 (unsigned long long)rec->start,
6793                                 back->full_backref ?
6794                                 "parent" : "root",
6795                                 back->full_backref ?
6796                                 (unsigned long long)parent :
6797                                 (unsigned long long)dback->root,
6798                                 (unsigned long long)dback->owner,
6799                                 (unsigned long long)dback->offset,
6800                                 dback->found_ref);
6801         } else {
6802                 u64 parent;
6803
6804                 tback = to_tree_backref(back);
6805                 if (back->full_backref)
6806                         parent = tback->parent;
6807                 else
6808                         parent = 0;
6809
6810                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6811                                            rec->start, rec->max_size,
6812                                            parent, tback->root, 0, 0);
6813                 fprintf(stderr, "adding new tree backref on "
6814                         "start %llu len %llu parent %llu root %llu\n",
6815                         rec->start, rec->max_size, parent, tback->root);
6816         }
6817 fail:
6818         btrfs_release_path(path);
6819         return ret;
6820 }
6821
6822 static struct extent_entry *find_entry(struct list_head *entries,
6823                                        u64 bytenr, u64 bytes)
6824 {
6825         struct extent_entry *entry = NULL;
6826
6827         list_for_each_entry(entry, entries, list) {
6828                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6829                         return entry;
6830         }
6831
6832         return NULL;
6833 }
6834
6835 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6836 {
6837         struct extent_entry *entry, *best = NULL, *prev = NULL;
6838
6839         list_for_each_entry(entry, entries, list) {
6840                 if (!prev) {
6841                         prev = entry;
6842                         continue;
6843                 }
6844
6845                 /*
6846                  * If there are as many broken entries as entries then we know
6847                  * not to trust this particular entry.
6848                  */
6849                 if (entry->broken == entry->count)
6850                         continue;
6851
6852                 /*
6853                  * If our current entry == best then we can't be sure our best
6854                  * is really the best, so we need to keep searching.
6855                  */
6856                 if (best && best->count == entry->count) {
6857                         prev = entry;
6858                         best = NULL;
6859                         continue;
6860                 }
6861
6862                 /* Prev == entry, not good enough, have to keep searching */
6863                 if (!prev->broken && prev->count == entry->count)
6864                         continue;
6865
6866                 if (!best)
6867                         best = (prev->count > entry->count) ? prev : entry;
6868                 else if (best->count < entry->count)
6869                         best = entry;
6870                 prev = entry;
6871         }
6872
6873         return best;
6874 }
6875
6876 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6877                       struct data_backref *dback, struct extent_entry *entry)
6878 {
6879         struct btrfs_trans_handle *trans;
6880         struct btrfs_root *root;
6881         struct btrfs_file_extent_item *fi;
6882         struct extent_buffer *leaf;
6883         struct btrfs_key key;
6884         u64 bytenr, bytes;
6885         int ret, err;
6886
6887         key.objectid = dback->root;
6888         key.type = BTRFS_ROOT_ITEM_KEY;
6889         key.offset = (u64)-1;
6890         root = btrfs_read_fs_root(info, &key);
6891         if (IS_ERR(root)) {
6892                 fprintf(stderr, "Couldn't find root for our ref\n");
6893                 return -EINVAL;
6894         }
6895
6896         /*
6897          * The backref points to the original offset of the extent if it was
6898          * split, so we need to search down to the offset we have and then walk
6899          * forward until we find the backref we're looking for.
6900          */
6901         key.objectid = dback->owner;
6902         key.type = BTRFS_EXTENT_DATA_KEY;
6903         key.offset = dback->offset;
6904         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6905         if (ret < 0) {
6906                 fprintf(stderr, "Error looking up ref %d\n", ret);
6907                 return ret;
6908         }
6909
6910         while (1) {
6911                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6912                         ret = btrfs_next_leaf(root, path);
6913                         if (ret) {
6914                                 fprintf(stderr, "Couldn't find our ref, next\n");
6915                                 return -EINVAL;
6916                         }
6917                 }
6918                 leaf = path->nodes[0];
6919                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6920                 if (key.objectid != dback->owner ||
6921                     key.type != BTRFS_EXTENT_DATA_KEY) {
6922                         fprintf(stderr, "Couldn't find our ref, search\n");
6923                         return -EINVAL;
6924                 }
6925                 fi = btrfs_item_ptr(leaf, path->slots[0],
6926                                     struct btrfs_file_extent_item);
6927                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6928                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6929
6930                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6931                         break;
6932                 path->slots[0]++;
6933         }
6934
6935         btrfs_release_path(path);
6936
6937         trans = btrfs_start_transaction(root, 1);
6938         if (IS_ERR(trans))
6939                 return PTR_ERR(trans);
6940
6941         /*
6942          * Ok we have the key of the file extent we want to fix, now we can cow
6943          * down to the thing and fix it.
6944          */
6945         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6946         if (ret < 0) {
6947                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6948                         key.objectid, key.type, key.offset, ret);
6949                 goto out;
6950         }
6951         if (ret > 0) {
6952                 fprintf(stderr, "Well that's odd, we just found this key "
6953                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6954                         key.offset);
6955                 ret = -EINVAL;
6956                 goto out;
6957         }
6958         leaf = path->nodes[0];
6959         fi = btrfs_item_ptr(leaf, path->slots[0],
6960                             struct btrfs_file_extent_item);
6961
6962         if (btrfs_file_extent_compression(leaf, fi) &&
6963             dback->disk_bytenr != entry->bytenr) {
6964                 fprintf(stderr, "Ref doesn't match the record start and is "
6965                         "compressed, please take a btrfs-image of this file "
6966                         "system and send it to a btrfs developer so they can "
6967                         "complete this functionality for bytenr %Lu\n",
6968                         dback->disk_bytenr);
6969                 ret = -EINVAL;
6970                 goto out;
6971         }
6972
6973         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6974                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6975         } else if (dback->disk_bytenr > entry->bytenr) {
6976                 u64 off_diff, offset;
6977
6978                 off_diff = dback->disk_bytenr - entry->bytenr;
6979                 offset = btrfs_file_extent_offset(leaf, fi);
6980                 if (dback->disk_bytenr + offset +
6981                     btrfs_file_extent_num_bytes(leaf, fi) >
6982                     entry->bytenr + entry->bytes) {
6983                         fprintf(stderr, "Ref is past the entry end, please "
6984                                 "take a btrfs-image of this file system and "
6985                                 "send it to a btrfs developer, ref %Lu\n",
6986                                 dback->disk_bytenr);
6987                         ret = -EINVAL;
6988                         goto out;
6989                 }
6990                 offset += off_diff;
6991                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6992                 btrfs_set_file_extent_offset(leaf, fi, offset);
6993         } else if (dback->disk_bytenr < entry->bytenr) {
6994                 u64 offset;
6995
6996                 offset = btrfs_file_extent_offset(leaf, fi);
6997                 if (dback->disk_bytenr + offset < entry->bytenr) {
6998                         fprintf(stderr, "Ref is before the entry start, please"
6999                                 " take a btrfs-image of this file system and "
7000                                 "send it to a btrfs developer, ref %Lu\n",
7001                                 dback->disk_bytenr);
7002                         ret = -EINVAL;
7003                         goto out;
7004                 }
7005
7006                 offset += dback->disk_bytenr;
7007                 offset -= entry->bytenr;
7008                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7009                 btrfs_set_file_extent_offset(leaf, fi, offset);
7010         }
7011
7012         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7013
7014         /*
7015          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7016          * only do this if we aren't using compression, otherwise it's a
7017          * trickier case.
7018          */
7019         if (!btrfs_file_extent_compression(leaf, fi))
7020                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7021         else
7022                 printf("ram bytes may be wrong?\n");
7023         btrfs_mark_buffer_dirty(leaf);
7024 out:
7025         err = btrfs_commit_transaction(trans, root);
7026         btrfs_release_path(path);
7027         return ret ? ret : err;
7028 }
7029
7030 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7031                            struct extent_record *rec)
7032 {
7033         struct extent_backref *back;
7034         struct data_backref *dback;
7035         struct extent_entry *entry, *best = NULL;
7036         LIST_HEAD(entries);
7037         int nr_entries = 0;
7038         int broken_entries = 0;
7039         int ret = 0;
7040         short mismatch = 0;
7041
7042         /*
7043          * Metadata is easy and the backrefs should always agree on bytenr and
7044          * size, if not we've got bigger issues.
7045          */
7046         if (rec->metadata)
7047                 return 0;
7048
7049         list_for_each_entry(back, &rec->backrefs, list) {
7050                 if (back->full_backref || !back->is_data)
7051                         continue;
7052
7053                 dback = to_data_backref(back);
7054
7055                 /*
7056                  * We only pay attention to backrefs that we found a real
7057                  * backref for.
7058                  */
7059                 if (dback->found_ref == 0)
7060                         continue;
7061
7062                 /*
7063                  * For now we only catch when the bytes don't match, not the
7064                  * bytenr.  We can easily do this at the same time, but I want
7065                  * to have a fs image to test on before we just add repair
7066                  * functionality willy-nilly so we know we won't screw up the
7067                  * repair.
7068                  */
7069
7070                 entry = find_entry(&entries, dback->disk_bytenr,
7071                                    dback->bytes);
7072                 if (!entry) {
7073                         entry = malloc(sizeof(struct extent_entry));
7074                         if (!entry) {
7075                                 ret = -ENOMEM;
7076                                 goto out;
7077                         }
7078                         memset(entry, 0, sizeof(*entry));
7079                         entry->bytenr = dback->disk_bytenr;
7080                         entry->bytes = dback->bytes;
7081                         list_add_tail(&entry->list, &entries);
7082                         nr_entries++;
7083                 }
7084
7085                 /*
7086                  * If we only have on entry we may think the entries agree when
7087                  * in reality they don't so we have to do some extra checking.
7088                  */
7089                 if (dback->disk_bytenr != rec->start ||
7090                     dback->bytes != rec->nr || back->broken)
7091                         mismatch = 1;
7092
7093                 if (back->broken) {
7094                         entry->broken++;
7095                         broken_entries++;
7096                 }
7097
7098                 entry->count++;
7099         }
7100
7101         /* Yay all the backrefs agree, carry on good sir */
7102         if (nr_entries <= 1 && !mismatch)
7103                 goto out;
7104
7105         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7106                 "%Lu\n", rec->start);
7107
7108         /*
7109          * First we want to see if the backrefs can agree amongst themselves who
7110          * is right, so figure out which one of the entries has the highest
7111          * count.
7112          */
7113         best = find_most_right_entry(&entries);
7114
7115         /*
7116          * Ok so we may have an even split between what the backrefs think, so
7117          * this is where we use the extent ref to see what it thinks.
7118          */
7119         if (!best) {
7120                 entry = find_entry(&entries, rec->start, rec->nr);
7121                 if (!entry && (!broken_entries || !rec->found_rec)) {
7122                         fprintf(stderr, "Backrefs don't agree with each other "
7123                                 "and extent record doesn't agree with anybody,"
7124                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7125                                 rec->start, rec->nr);
7126                         ret = -EINVAL;
7127                         goto out;
7128                 } else if (!entry) {
7129                         /*
7130                          * Ok our backrefs were broken, we'll assume this is the
7131                          * correct value and add an entry for this range.
7132                          */
7133                         entry = malloc(sizeof(struct extent_entry));
7134                         if (!entry) {
7135                                 ret = -ENOMEM;
7136                                 goto out;
7137                         }
7138                         memset(entry, 0, sizeof(*entry));
7139                         entry->bytenr = rec->start;
7140                         entry->bytes = rec->nr;
7141                         list_add_tail(&entry->list, &entries);
7142                         nr_entries++;
7143                 }
7144                 entry->count++;
7145                 best = find_most_right_entry(&entries);
7146                 if (!best) {
7147                         fprintf(stderr, "Backrefs and extent record evenly "
7148                                 "split on who is right, this is going to "
7149                                 "require user input to fix bytenr %Lu bytes "
7150                                 "%Lu\n", rec->start, rec->nr);
7151                         ret = -EINVAL;
7152                         goto out;
7153                 }
7154         }
7155
7156         /*
7157          * I don't think this can happen currently as we'll abort() if we catch
7158          * this case higher up, but in case somebody removes that we still can't
7159          * deal with it properly here yet, so just bail out of that's the case.
7160          */
7161         if (best->bytenr != rec->start) {
7162                 fprintf(stderr, "Extent start and backref starts don't match, "
7163                         "please use btrfs-image on this file system and send "
7164                         "it to a btrfs developer so they can make fsck fix "
7165                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7166                         rec->start, rec->nr);
7167                 ret = -EINVAL;
7168                 goto out;
7169         }
7170
7171         /*
7172          * Ok great we all agreed on an extent record, let's go find the real
7173          * references and fix up the ones that don't match.
7174          */
7175         list_for_each_entry(back, &rec->backrefs, list) {
7176                 if (back->full_backref || !back->is_data)
7177                         continue;
7178
7179                 dback = to_data_backref(back);
7180
7181                 /*
7182                  * Still ignoring backrefs that don't have a real ref attached
7183                  * to them.
7184                  */
7185                 if (dback->found_ref == 0)
7186                         continue;
7187
7188                 if (dback->bytes == best->bytes &&
7189                     dback->disk_bytenr == best->bytenr)
7190                         continue;
7191
7192                 ret = repair_ref(info, path, dback, best);
7193                 if (ret)
7194                         goto out;
7195         }
7196
7197         /*
7198          * Ok we messed with the actual refs, which means we need to drop our
7199          * entire cache and go back and rescan.  I know this is a huge pain and
7200          * adds a lot of extra work, but it's the only way to be safe.  Once all
7201          * the backrefs agree we may not need to do anything to the extent
7202          * record itself.
7203          */
7204         ret = -EAGAIN;
7205 out:
7206         while (!list_empty(&entries)) {
7207                 entry = list_entry(entries.next, struct extent_entry, list);
7208                 list_del_init(&entry->list);
7209                 free(entry);
7210         }
7211         return ret;
7212 }
7213
7214 static int process_duplicates(struct btrfs_root *root,
7215                               struct cache_tree *extent_cache,
7216                               struct extent_record *rec)
7217 {
7218         struct extent_record *good, *tmp;
7219         struct cache_extent *cache;
7220         int ret;
7221
7222         /*
7223          * If we found a extent record for this extent then return, or if we
7224          * have more than one duplicate we are likely going to need to delete
7225          * something.
7226          */
7227         if (rec->found_rec || rec->num_duplicates > 1)
7228                 return 0;
7229
7230         /* Shouldn't happen but just in case */
7231         BUG_ON(!rec->num_duplicates);
7232
7233         /*
7234          * So this happens if we end up with a backref that doesn't match the
7235          * actual extent entry.  So either the backref is bad or the extent
7236          * entry is bad.  Either way we want to have the extent_record actually
7237          * reflect what we found in the extent_tree, so we need to take the
7238          * duplicate out and use that as the extent_record since the only way we
7239          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7240          */
7241         remove_cache_extent(extent_cache, &rec->cache);
7242
7243         good = to_extent_record(rec->dups.next);
7244         list_del_init(&good->list);
7245         INIT_LIST_HEAD(&good->backrefs);
7246         INIT_LIST_HEAD(&good->dups);
7247         good->cache.start = good->start;
7248         good->cache.size = good->nr;
7249         good->content_checked = 0;
7250         good->owner_ref_checked = 0;
7251         good->num_duplicates = 0;
7252         good->refs = rec->refs;
7253         list_splice_init(&rec->backrefs, &good->backrefs);
7254         while (1) {
7255                 cache = lookup_cache_extent(extent_cache, good->start,
7256                                             good->nr);
7257                 if (!cache)
7258                         break;
7259                 tmp = container_of(cache, struct extent_record, cache);
7260
7261                 /*
7262                  * If we find another overlapping extent and it's found_rec is
7263                  * set then it's a duplicate and we need to try and delete
7264                  * something.
7265                  */
7266                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7267                         if (list_empty(&good->list))
7268                                 list_add_tail(&good->list,
7269                                               &duplicate_extents);
7270                         good->num_duplicates += tmp->num_duplicates + 1;
7271                         list_splice_init(&tmp->dups, &good->dups);
7272                         list_del_init(&tmp->list);
7273                         list_add_tail(&tmp->list, &good->dups);
7274                         remove_cache_extent(extent_cache, &tmp->cache);
7275                         continue;
7276                 }
7277
7278                 /*
7279                  * Ok we have another non extent item backed extent rec, so lets
7280                  * just add it to this extent and carry on like we did above.
7281                  */
7282                 good->refs += tmp->refs;
7283                 list_splice_init(&tmp->backrefs, &good->backrefs);
7284                 remove_cache_extent(extent_cache, &tmp->cache);
7285                 free(tmp);
7286         }
7287         ret = insert_cache_extent(extent_cache, &good->cache);
7288         BUG_ON(ret);
7289         free(rec);
7290         return good->num_duplicates ? 0 : 1;
7291 }
7292
7293 static int delete_duplicate_records(struct btrfs_root *root,
7294                                     struct extent_record *rec)
7295 {
7296         struct btrfs_trans_handle *trans;
7297         LIST_HEAD(delete_list);
7298         struct btrfs_path *path;
7299         struct extent_record *tmp, *good, *n;
7300         int nr_del = 0;
7301         int ret = 0, err;
7302         struct btrfs_key key;
7303
7304         path = btrfs_alloc_path();
7305         if (!path) {
7306                 ret = -ENOMEM;
7307                 goto out;
7308         }
7309
7310         good = rec;
7311         /* Find the record that covers all of the duplicates. */
7312         list_for_each_entry(tmp, &rec->dups, list) {
7313                 if (good->start < tmp->start)
7314                         continue;
7315                 if (good->nr > tmp->nr)
7316                         continue;
7317
7318                 if (tmp->start + tmp->nr < good->start + good->nr) {
7319                         fprintf(stderr, "Ok we have overlapping extents that "
7320                                 "aren't completely covered by each other, this "
7321                                 "is going to require more careful thought.  "
7322                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7323                                 tmp->start, tmp->nr, good->start, good->nr);
7324                         abort();
7325                 }
7326                 good = tmp;
7327         }
7328
7329         if (good != rec)
7330                 list_add_tail(&rec->list, &delete_list);
7331
7332         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7333                 if (tmp == good)
7334                         continue;
7335                 list_move_tail(&tmp->list, &delete_list);
7336         }
7337
7338         root = root->fs_info->extent_root;
7339         trans = btrfs_start_transaction(root, 1);
7340         if (IS_ERR(trans)) {
7341                 ret = PTR_ERR(trans);
7342                 goto out;
7343         }
7344
7345         list_for_each_entry(tmp, &delete_list, list) {
7346                 if (tmp->found_rec == 0)
7347                         continue;
7348                 key.objectid = tmp->start;
7349                 key.type = BTRFS_EXTENT_ITEM_KEY;
7350                 key.offset = tmp->nr;
7351
7352                 /* Shouldn't happen but just in case */
7353                 if (tmp->metadata) {
7354                         fprintf(stderr, "Well this shouldn't happen, extent "
7355                                 "record overlaps but is metadata? "
7356                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7357                         abort();
7358                 }
7359
7360                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7361                 if (ret) {
7362                         if (ret > 0)
7363                                 ret = -EINVAL;
7364                         break;
7365                 }
7366                 ret = btrfs_del_item(trans, root, path);
7367                 if (ret)
7368                         break;
7369                 btrfs_release_path(path);
7370                 nr_del++;
7371         }
7372         err = btrfs_commit_transaction(trans, root);
7373         if (err && !ret)
7374                 ret = err;
7375 out:
7376         while (!list_empty(&delete_list)) {
7377                 tmp = to_extent_record(delete_list.next);
7378                 list_del_init(&tmp->list);
7379                 if (tmp == rec)
7380                         continue;
7381                 free(tmp);
7382         }
7383
7384         while (!list_empty(&rec->dups)) {
7385                 tmp = to_extent_record(rec->dups.next);
7386                 list_del_init(&tmp->list);
7387                 free(tmp);
7388         }
7389
7390         btrfs_free_path(path);
7391
7392         if (!ret && !nr_del)
7393                 rec->num_duplicates = 0;
7394
7395         return ret ? ret : nr_del;
7396 }
7397
7398 static int find_possible_backrefs(struct btrfs_fs_info *info,
7399                                   struct btrfs_path *path,
7400                                   struct cache_tree *extent_cache,
7401                                   struct extent_record *rec)
7402 {
7403         struct btrfs_root *root;
7404         struct extent_backref *back;
7405         struct data_backref *dback;
7406         struct cache_extent *cache;
7407         struct btrfs_file_extent_item *fi;
7408         struct btrfs_key key;
7409         u64 bytenr, bytes;
7410         int ret;
7411
7412         list_for_each_entry(back, &rec->backrefs, list) {
7413                 /* Don't care about full backrefs (poor unloved backrefs) */
7414                 if (back->full_backref || !back->is_data)
7415                         continue;
7416
7417                 dback = to_data_backref(back);
7418
7419                 /* We found this one, we don't need to do a lookup */
7420                 if (dback->found_ref)
7421                         continue;
7422
7423                 key.objectid = dback->root;
7424                 key.type = BTRFS_ROOT_ITEM_KEY;
7425                 key.offset = (u64)-1;
7426
7427                 root = btrfs_read_fs_root(info, &key);
7428
7429                 /* No root, definitely a bad ref, skip */
7430                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7431                         continue;
7432                 /* Other err, exit */
7433                 if (IS_ERR(root))
7434                         return PTR_ERR(root);
7435
7436                 key.objectid = dback->owner;
7437                 key.type = BTRFS_EXTENT_DATA_KEY;
7438                 key.offset = dback->offset;
7439                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7440                 if (ret) {
7441                         btrfs_release_path(path);
7442                         if (ret < 0)
7443                                 return ret;
7444                         /* Didn't find it, we can carry on */
7445                         ret = 0;
7446                         continue;
7447                 }
7448
7449                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7450                                     struct btrfs_file_extent_item);
7451                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7452                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7453                 btrfs_release_path(path);
7454                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7455                 if (cache) {
7456                         struct extent_record *tmp;
7457                         tmp = container_of(cache, struct extent_record, cache);
7458
7459                         /*
7460                          * If we found an extent record for the bytenr for this
7461                          * particular backref then we can't add it to our
7462                          * current extent record.  We only want to add backrefs
7463                          * that don't have a corresponding extent item in the
7464                          * extent tree since they likely belong to this record
7465                          * and we need to fix it if it doesn't match bytenrs.
7466                          */
7467                         if  (tmp->found_rec)
7468                                 continue;
7469                 }
7470
7471                 dback->found_ref += 1;
7472                 dback->disk_bytenr = bytenr;
7473                 dback->bytes = bytes;
7474
7475                 /*
7476                  * Set this so the verify backref code knows not to trust the
7477                  * values in this backref.
7478                  */
7479                 back->broken = 1;
7480         }
7481
7482         return 0;
7483 }
7484
7485 /*
7486  * Record orphan data ref into corresponding root.
7487  *
7488  * Return 0 if the extent item contains data ref and recorded.
7489  * Return 1 if the extent item contains no useful data ref
7490  *   On that case, it may contains only shared_dataref or metadata backref
7491  *   or the file extent exists(this should be handled by the extent bytenr
7492  *   recovery routine)
7493  * Return <0 if something goes wrong.
7494  */
7495 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7496                                       struct extent_record *rec)
7497 {
7498         struct btrfs_key key;
7499         struct btrfs_root *dest_root;
7500         struct extent_backref *back;
7501         struct data_backref *dback;
7502         struct orphan_data_extent *orphan;
7503         struct btrfs_path *path;
7504         int recorded_data_ref = 0;
7505         int ret = 0;
7506
7507         if (rec->metadata)
7508                 return 1;
7509         path = btrfs_alloc_path();
7510         if (!path)
7511                 return -ENOMEM;
7512         list_for_each_entry(back, &rec->backrefs, list) {
7513                 if (back->full_backref || !back->is_data ||
7514                     !back->found_extent_tree)
7515                         continue;
7516                 dback = to_data_backref(back);
7517                 if (dback->found_ref)
7518                         continue;
7519                 key.objectid = dback->root;
7520                 key.type = BTRFS_ROOT_ITEM_KEY;
7521                 key.offset = (u64)-1;
7522
7523                 dest_root = btrfs_read_fs_root(fs_info, &key);
7524
7525                 /* For non-exist root we just skip it */
7526                 if (IS_ERR(dest_root) || !dest_root)
7527                         continue;
7528
7529                 key.objectid = dback->owner;
7530                 key.type = BTRFS_EXTENT_DATA_KEY;
7531                 key.offset = dback->offset;
7532
7533                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7534                 /*
7535                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7536                  * we need to record it for inode/file extent rebuild.
7537                  * For ret > 0, we record it only for file extent rebuild.
7538                  * For ret == 0, the file extent exists but only bytenr
7539                  * mismatch, let the original bytenr fix routine to handle,
7540                  * don't record it.
7541                  */
7542                 if (ret == 0)
7543                         continue;
7544                 ret = 0;
7545                 orphan = malloc(sizeof(*orphan));
7546                 if (!orphan) {
7547                         ret = -ENOMEM;
7548                         goto out;
7549                 }
7550                 INIT_LIST_HEAD(&orphan->list);
7551                 orphan->root = dback->root;
7552                 orphan->objectid = dback->owner;
7553                 orphan->offset = dback->offset;
7554                 orphan->disk_bytenr = rec->cache.start;
7555                 orphan->disk_len = rec->cache.size;
7556                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7557                 recorded_data_ref = 1;
7558         }
7559 out:
7560         btrfs_free_path(path);
7561         if (!ret)
7562                 return !recorded_data_ref;
7563         else
7564                 return ret;
7565 }
7566
7567 /*
7568  * when an incorrect extent item is found, this will delete
7569  * all of the existing entries for it and recreate them
7570  * based on what the tree scan found.
7571  */
7572 static int fixup_extent_refs(struct btrfs_fs_info *info,
7573                              struct cache_tree *extent_cache,
7574                              struct extent_record *rec)
7575 {
7576         struct btrfs_trans_handle *trans = NULL;
7577         int ret;
7578         struct btrfs_path *path;
7579         struct list_head *cur = rec->backrefs.next;
7580         struct cache_extent *cache;
7581         struct extent_backref *back;
7582         int allocated = 0;
7583         u64 flags = 0;
7584
7585         if (rec->flag_block_full_backref)
7586                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7587
7588         path = btrfs_alloc_path();
7589         if (!path)
7590                 return -ENOMEM;
7591
7592         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7593                 /*
7594                  * Sometimes the backrefs themselves are so broken they don't
7595                  * get attached to any meaningful rec, so first go back and
7596                  * check any of our backrefs that we couldn't find and throw
7597                  * them into the list if we find the backref so that
7598                  * verify_backrefs can figure out what to do.
7599                  */
7600                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7601                 if (ret < 0)
7602                         goto out;
7603         }
7604
7605         /* step one, make sure all of the backrefs agree */
7606         ret = verify_backrefs(info, path, rec);
7607         if (ret < 0)
7608                 goto out;
7609
7610         trans = btrfs_start_transaction(info->extent_root, 1);
7611         if (IS_ERR(trans)) {
7612                 ret = PTR_ERR(trans);
7613                 goto out;
7614         }
7615
7616         /* step two, delete all the existing records */
7617         ret = delete_extent_records(trans, info->extent_root, path,
7618                                     rec->start, rec->max_size);
7619
7620         if (ret < 0)
7621                 goto out;
7622
7623         /* was this block corrupt?  If so, don't add references to it */
7624         cache = lookup_cache_extent(info->corrupt_blocks,
7625                                     rec->start, rec->max_size);
7626         if (cache) {
7627                 ret = 0;
7628                 goto out;
7629         }
7630
7631         /* step three, recreate all the refs we did find */
7632         while(cur != &rec->backrefs) {
7633                 back = to_extent_backref(cur);
7634                 cur = cur->next;
7635
7636                 /*
7637                  * if we didn't find any references, don't create a
7638                  * new extent record
7639                  */
7640                 if (!back->found_ref)
7641                         continue;
7642
7643                 rec->bad_full_backref = 0;
7644                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7645                 allocated = 1;
7646
7647                 if (ret)
7648                         goto out;
7649         }
7650 out:
7651         if (trans) {
7652                 int err = btrfs_commit_transaction(trans, info->extent_root);
7653                 if (!ret)
7654                         ret = err;
7655         }
7656
7657         btrfs_free_path(path);
7658         return ret;
7659 }
7660
7661 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7662                               struct extent_record *rec)
7663 {
7664         struct btrfs_trans_handle *trans;
7665         struct btrfs_root *root = fs_info->extent_root;
7666         struct btrfs_path *path;
7667         struct btrfs_extent_item *ei;
7668         struct btrfs_key key;
7669         u64 flags;
7670         int ret = 0;
7671
7672         key.objectid = rec->start;
7673         if (rec->metadata) {
7674                 key.type = BTRFS_METADATA_ITEM_KEY;
7675                 key.offset = rec->info_level;
7676         } else {
7677                 key.type = BTRFS_EXTENT_ITEM_KEY;
7678                 key.offset = rec->max_size;
7679         }
7680
7681         path = btrfs_alloc_path();
7682         if (!path)
7683                 return -ENOMEM;
7684
7685         trans = btrfs_start_transaction(root, 0);
7686         if (IS_ERR(trans)) {
7687                 btrfs_free_path(path);
7688                 return PTR_ERR(trans);
7689         }
7690
7691         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7692         if (ret < 0) {
7693                 btrfs_free_path(path);
7694                 btrfs_commit_transaction(trans, root);
7695                 return ret;
7696         } else if (ret) {
7697                 fprintf(stderr, "Didn't find extent for %llu\n",
7698                         (unsigned long long)rec->start);
7699                 btrfs_free_path(path);
7700                 btrfs_commit_transaction(trans, root);
7701                 return -ENOENT;
7702         }
7703
7704         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7705                             struct btrfs_extent_item);
7706         flags = btrfs_extent_flags(path->nodes[0], ei);
7707         if (rec->flag_block_full_backref) {
7708                 fprintf(stderr, "setting full backref on %llu\n",
7709                         (unsigned long long)key.objectid);
7710                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7711         } else {
7712                 fprintf(stderr, "clearing full backref on %llu\n",
7713                         (unsigned long long)key.objectid);
7714                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7715         }
7716         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7717         btrfs_mark_buffer_dirty(path->nodes[0]);
7718         btrfs_free_path(path);
7719         return btrfs_commit_transaction(trans, root);
7720 }
7721
7722 /* right now we only prune from the extent allocation tree */
7723 static int prune_one_block(struct btrfs_trans_handle *trans,
7724                            struct btrfs_fs_info *info,
7725                            struct btrfs_corrupt_block *corrupt)
7726 {
7727         int ret;
7728         struct btrfs_path path;
7729         struct extent_buffer *eb;
7730         u64 found;
7731         int slot;
7732         int nritems;
7733         int level = corrupt->level + 1;
7734
7735         btrfs_init_path(&path);
7736 again:
7737         /* we want to stop at the parent to our busted block */
7738         path.lowest_level = level;
7739
7740         ret = btrfs_search_slot(trans, info->extent_root,
7741                                 &corrupt->key, &path, -1, 1);
7742
7743         if (ret < 0)
7744                 goto out;
7745
7746         eb = path.nodes[level];
7747         if (!eb) {
7748                 ret = -ENOENT;
7749                 goto out;
7750         }
7751
7752         /*
7753          * hopefully the search gave us the block we want to prune,
7754          * lets try that first
7755          */
7756         slot = path.slots[level];
7757         found =  btrfs_node_blockptr(eb, slot);
7758         if (found == corrupt->cache.start)
7759                 goto del_ptr;
7760
7761         nritems = btrfs_header_nritems(eb);
7762
7763         /* the search failed, lets scan this node and hope we find it */
7764         for (slot = 0; slot < nritems; slot++) {
7765                 found =  btrfs_node_blockptr(eb, slot);
7766                 if (found == corrupt->cache.start)
7767                         goto del_ptr;
7768         }
7769         /*
7770          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7771          * to this block
7772          */
7773         if (eb == info->extent_root->node) {
7774                 ret = -ENOENT;
7775                 goto out;
7776         } else {
7777                 level++;
7778                 btrfs_release_path(&path);
7779                 goto again;
7780         }
7781
7782 del_ptr:
7783         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7784         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7785
7786 out:
7787         btrfs_release_path(&path);
7788         return ret;
7789 }
7790
7791 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7792 {
7793         struct btrfs_trans_handle *trans = NULL;
7794         struct cache_extent *cache;
7795         struct btrfs_corrupt_block *corrupt;
7796
7797         while (1) {
7798                 cache = search_cache_extent(info->corrupt_blocks, 0);
7799                 if (!cache)
7800                         break;
7801                 if (!trans) {
7802                         trans = btrfs_start_transaction(info->extent_root, 1);
7803                         if (IS_ERR(trans))
7804                                 return PTR_ERR(trans);
7805                 }
7806                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7807                 prune_one_block(trans, info, corrupt);
7808                 remove_cache_extent(info->corrupt_blocks, cache);
7809         }
7810         if (trans)
7811                 return btrfs_commit_transaction(trans, info->extent_root);
7812         return 0;
7813 }
7814
7815 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7816 {
7817         struct btrfs_block_group_cache *cache;
7818         u64 start, end;
7819         int ret;
7820
7821         while (1) {
7822                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7823                                             &start, &end, EXTENT_DIRTY);
7824                 if (ret)
7825                         break;
7826                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7827                                    GFP_NOFS);
7828         }
7829
7830         start = 0;
7831         while (1) {
7832                 cache = btrfs_lookup_first_block_group(fs_info, start);
7833                 if (!cache)
7834                         break;
7835                 if (cache->cached)
7836                         cache->cached = 0;
7837                 start = cache->key.objectid + cache->key.offset;
7838         }
7839 }
7840
7841 static int check_extent_refs(struct btrfs_root *root,
7842                              struct cache_tree *extent_cache)
7843 {
7844         struct extent_record *rec;
7845         struct cache_extent *cache;
7846         int err = 0;
7847         int ret = 0;
7848         int fixed = 0;
7849         int had_dups = 0;
7850         int recorded = 0;
7851
7852         if (repair) {
7853                 /*
7854                  * if we're doing a repair, we have to make sure
7855                  * we don't allocate from the problem extents.
7856                  * In the worst case, this will be all the
7857                  * extents in the FS
7858                  */
7859                 cache = search_cache_extent(extent_cache, 0);
7860                 while(cache) {
7861                         rec = container_of(cache, struct extent_record, cache);
7862                         set_extent_dirty(root->fs_info->excluded_extents,
7863                                          rec->start,
7864                                          rec->start + rec->max_size - 1,
7865                                          GFP_NOFS);
7866                         cache = next_cache_extent(cache);
7867                 }
7868
7869                 /* pin down all the corrupted blocks too */
7870                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7871                 while(cache) {
7872                         set_extent_dirty(root->fs_info->excluded_extents,
7873                                          cache->start,
7874                                          cache->start + cache->size - 1,
7875                                          GFP_NOFS);
7876                         cache = next_cache_extent(cache);
7877                 }
7878                 prune_corrupt_blocks(root->fs_info);
7879                 reset_cached_block_groups(root->fs_info);
7880         }
7881
7882         reset_cached_block_groups(root->fs_info);
7883
7884         /*
7885          * We need to delete any duplicate entries we find first otherwise we
7886          * could mess up the extent tree when we have backrefs that actually
7887          * belong to a different extent item and not the weird duplicate one.
7888          */
7889         while (repair && !list_empty(&duplicate_extents)) {
7890                 rec = to_extent_record(duplicate_extents.next);
7891                 list_del_init(&rec->list);
7892
7893                 /* Sometimes we can find a backref before we find an actual
7894                  * extent, so we need to process it a little bit to see if there
7895                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7896                  * if this is a backref screwup.  If we need to delete stuff
7897                  * process_duplicates() will return 0, otherwise it will return
7898                  * 1 and we
7899                  */
7900                 if (process_duplicates(root, extent_cache, rec))
7901                         continue;
7902                 ret = delete_duplicate_records(root, rec);
7903                 if (ret < 0)
7904                         return ret;
7905                 /*
7906                  * delete_duplicate_records will return the number of entries
7907                  * deleted, so if it's greater than 0 then we know we actually
7908                  * did something and we need to remove.
7909                  */
7910                 if (ret)
7911                         had_dups = 1;
7912         }
7913
7914         if (had_dups)
7915                 return -EAGAIN;
7916
7917         while(1) {
7918                 int cur_err = 0;
7919
7920                 fixed = 0;
7921                 recorded = 0;
7922                 cache = search_cache_extent(extent_cache, 0);
7923                 if (!cache)
7924                         break;
7925                 rec = container_of(cache, struct extent_record, cache);
7926                 if (rec->num_duplicates) {
7927                         fprintf(stderr, "extent item %llu has multiple extent "
7928                                 "items\n", (unsigned long long)rec->start);
7929                         err = 1;
7930                         cur_err = 1;
7931                 }
7932
7933                 if (rec->refs != rec->extent_item_refs) {
7934                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7935                                 (unsigned long long)rec->start,
7936                                 (unsigned long long)rec->nr);
7937                         fprintf(stderr, "extent item %llu, found %llu\n",
7938                                 (unsigned long long)rec->extent_item_refs,
7939                                 (unsigned long long)rec->refs);
7940                         ret = record_orphan_data_extents(root->fs_info, rec);
7941                         if (ret < 0)
7942                                 goto repair_abort;
7943                         if (ret == 0) {
7944                                 recorded = 1;
7945                         } else {
7946                                 /*
7947                                  * we can't use the extent to repair file
7948                                  * extent, let the fallback method handle it.
7949                                  */
7950                                 if (!fixed && repair) {
7951                                         ret = fixup_extent_refs(
7952                                                         root->fs_info,
7953                                                         extent_cache, rec);
7954                                         if (ret)
7955                                                 goto repair_abort;
7956                                         fixed = 1;
7957                                 }
7958                         }
7959                         err = 1;
7960                         cur_err = 1;
7961                 }
7962                 if (all_backpointers_checked(rec, 1)) {
7963                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7964                                 (unsigned long long)rec->start,
7965                                 (unsigned long long)rec->nr);
7966
7967                         if (!fixed && !recorded && repair) {
7968                                 ret = fixup_extent_refs(root->fs_info,
7969                                                         extent_cache, rec);
7970                                 if (ret)
7971                                         goto repair_abort;
7972                                 fixed = 1;
7973                         }
7974                         cur_err = 1;
7975                         err = 1;
7976                 }
7977                 if (!rec->owner_ref_checked) {
7978                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7979                                 (unsigned long long)rec->start,
7980                                 (unsigned long long)rec->nr);
7981                         if (!fixed && !recorded && repair) {
7982                                 ret = fixup_extent_refs(root->fs_info,
7983                                                         extent_cache, rec);
7984                                 if (ret)
7985                                         goto repair_abort;
7986                                 fixed = 1;
7987                         }
7988                         err = 1;
7989                         cur_err = 1;
7990                 }
7991                 if (rec->bad_full_backref) {
7992                         fprintf(stderr, "bad full backref, on [%llu]\n",
7993                                 (unsigned long long)rec->start);
7994                         if (repair) {
7995                                 ret = fixup_extent_flags(root->fs_info, rec);
7996                                 if (ret)
7997                                         goto repair_abort;
7998                                 fixed = 1;
7999                         }
8000                         err = 1;
8001                         cur_err = 1;
8002                 }
8003                 /*
8004                  * Although it's not a extent ref's problem, we reuse this
8005                  * routine for error reporting.
8006                  * No repair function yet.
8007                  */
8008                 if (rec->crossing_stripes) {
8009                         fprintf(stderr,
8010                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8011                                 rec->start, rec->start + rec->max_size);
8012                         err = 1;
8013                         cur_err = 1;
8014                 }
8015
8016                 if (rec->wrong_chunk_type) {
8017                         fprintf(stderr,
8018                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8019                                 rec->start, rec->start + rec->max_size);
8020                         err = 1;
8021                         cur_err = 1;
8022                 }
8023
8024                 remove_cache_extent(extent_cache, cache);
8025                 free_all_extent_backrefs(rec);
8026                 if (!init_extent_tree && repair && (!cur_err || fixed))
8027                         clear_extent_dirty(root->fs_info->excluded_extents,
8028                                            rec->start,
8029                                            rec->start + rec->max_size - 1,
8030                                            GFP_NOFS);
8031                 free(rec);
8032         }
8033 repair_abort:
8034         if (repair) {
8035                 if (ret && ret != -EAGAIN) {
8036                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8037                         exit(1);
8038                 } else if (!ret) {
8039                         struct btrfs_trans_handle *trans;
8040
8041                         root = root->fs_info->extent_root;
8042                         trans = btrfs_start_transaction(root, 1);
8043                         if (IS_ERR(trans)) {
8044                                 ret = PTR_ERR(trans);
8045                                 goto repair_abort;
8046                         }
8047
8048                         btrfs_fix_block_accounting(trans, root);
8049                         ret = btrfs_commit_transaction(trans, root);
8050                         if (ret)
8051                                 goto repair_abort;
8052                 }
8053                 if (err)
8054                         fprintf(stderr, "repaired damaged extent references\n");
8055                 return ret;
8056         }
8057         return err;
8058 }
8059
8060 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8061 {
8062         u64 stripe_size;
8063
8064         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8065                 stripe_size = length;
8066                 stripe_size /= num_stripes;
8067         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8068                 stripe_size = length * 2;
8069                 stripe_size /= num_stripes;
8070         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8071                 stripe_size = length;
8072                 stripe_size /= (num_stripes - 1);
8073         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8074                 stripe_size = length;
8075                 stripe_size /= (num_stripes - 2);
8076         } else {
8077                 stripe_size = length;
8078         }
8079         return stripe_size;
8080 }
8081
8082 /*
8083  * Check the chunk with its block group/dev list ref:
8084  * Return 0 if all refs seems valid.
8085  * Return 1 if part of refs seems valid, need later check for rebuild ref
8086  * like missing block group and needs to search extent tree to rebuild them.
8087  * Return -1 if essential refs are missing and unable to rebuild.
8088  */
8089 static int check_chunk_refs(struct chunk_record *chunk_rec,
8090                             struct block_group_tree *block_group_cache,
8091                             struct device_extent_tree *dev_extent_cache,
8092                             int silent)
8093 {
8094         struct cache_extent *block_group_item;
8095         struct block_group_record *block_group_rec;
8096         struct cache_extent *dev_extent_item;
8097         struct device_extent_record *dev_extent_rec;
8098         u64 devid;
8099         u64 offset;
8100         u64 length;
8101         int metadump_v2 = 0;
8102         int i;
8103         int ret = 0;
8104
8105         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8106                                                chunk_rec->offset,
8107                                                chunk_rec->length);
8108         if (block_group_item) {
8109                 block_group_rec = container_of(block_group_item,
8110                                                struct block_group_record,
8111                                                cache);
8112                 if (chunk_rec->length != block_group_rec->offset ||
8113                     chunk_rec->offset != block_group_rec->objectid ||
8114                     (!metadump_v2 &&
8115                      chunk_rec->type_flags != block_group_rec->flags)) {
8116                         if (!silent)
8117                                 fprintf(stderr,
8118                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8119                                         chunk_rec->objectid,
8120                                         chunk_rec->type,
8121                                         chunk_rec->offset,
8122                                         chunk_rec->length,
8123                                         chunk_rec->offset,
8124                                         chunk_rec->type_flags,
8125                                         block_group_rec->objectid,
8126                                         block_group_rec->type,
8127                                         block_group_rec->offset,
8128                                         block_group_rec->offset,
8129                                         block_group_rec->objectid,
8130                                         block_group_rec->flags);
8131                         ret = -1;
8132                 } else {
8133                         list_del_init(&block_group_rec->list);
8134                         chunk_rec->bg_rec = block_group_rec;
8135                 }
8136         } else {
8137                 if (!silent)
8138                         fprintf(stderr,
8139                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8140                                 chunk_rec->objectid,
8141                                 chunk_rec->type,
8142                                 chunk_rec->offset,
8143                                 chunk_rec->length,
8144                                 chunk_rec->offset,
8145                                 chunk_rec->type_flags);
8146                 ret = 1;
8147         }
8148
8149         if (metadump_v2)
8150                 return ret;
8151
8152         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8153                                     chunk_rec->num_stripes);
8154         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8155                 devid = chunk_rec->stripes[i].devid;
8156                 offset = chunk_rec->stripes[i].offset;
8157                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8158                                                        devid, offset, length);
8159                 if (dev_extent_item) {
8160                         dev_extent_rec = container_of(dev_extent_item,
8161                                                 struct device_extent_record,
8162                                                 cache);
8163                         if (dev_extent_rec->objectid != devid ||
8164                             dev_extent_rec->offset != offset ||
8165                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8166                             dev_extent_rec->length != length) {
8167                                 if (!silent)
8168                                         fprintf(stderr,
8169                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8170                                                 chunk_rec->objectid,
8171                                                 chunk_rec->type,
8172                                                 chunk_rec->offset,
8173                                                 chunk_rec->stripes[i].devid,
8174                                                 chunk_rec->stripes[i].offset,
8175                                                 dev_extent_rec->objectid,
8176                                                 dev_extent_rec->offset,
8177                                                 dev_extent_rec->length);
8178                                 ret = -1;
8179                         } else {
8180                                 list_move(&dev_extent_rec->chunk_list,
8181                                           &chunk_rec->dextents);
8182                         }
8183                 } else {
8184                         if (!silent)
8185                                 fprintf(stderr,
8186                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8187                                         chunk_rec->objectid,
8188                                         chunk_rec->type,
8189                                         chunk_rec->offset,
8190                                         chunk_rec->stripes[i].devid,
8191                                         chunk_rec->stripes[i].offset);
8192                         ret = -1;
8193                 }
8194         }
8195         return ret;
8196 }
8197
8198 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8199 int check_chunks(struct cache_tree *chunk_cache,
8200                  struct block_group_tree *block_group_cache,
8201                  struct device_extent_tree *dev_extent_cache,
8202                  struct list_head *good, struct list_head *bad,
8203                  struct list_head *rebuild, int silent)
8204 {
8205         struct cache_extent *chunk_item;
8206         struct chunk_record *chunk_rec;
8207         struct block_group_record *bg_rec;
8208         struct device_extent_record *dext_rec;
8209         int err;
8210         int ret = 0;
8211
8212         chunk_item = first_cache_extent(chunk_cache);
8213         while (chunk_item) {
8214                 chunk_rec = container_of(chunk_item, struct chunk_record,
8215                                          cache);
8216                 err = check_chunk_refs(chunk_rec, block_group_cache,
8217                                        dev_extent_cache, silent);
8218                 if (err < 0)
8219                         ret = err;
8220                 if (err == 0 && good)
8221                         list_add_tail(&chunk_rec->list, good);
8222                 if (err > 0 && rebuild)
8223                         list_add_tail(&chunk_rec->list, rebuild);
8224                 if (err < 0 && bad)
8225                         list_add_tail(&chunk_rec->list, bad);
8226                 chunk_item = next_cache_extent(chunk_item);
8227         }
8228
8229         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8230                 if (!silent)
8231                         fprintf(stderr,
8232                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8233                                 bg_rec->objectid,
8234                                 bg_rec->offset,
8235                                 bg_rec->flags);
8236                 if (!ret)
8237                         ret = 1;
8238         }
8239
8240         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8241                             chunk_list) {
8242                 if (!silent)
8243                         fprintf(stderr,
8244                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8245                                 dext_rec->objectid,
8246                                 dext_rec->offset,
8247                                 dext_rec->length);
8248                 if (!ret)
8249                         ret = 1;
8250         }
8251         return ret;
8252 }
8253
8254
8255 static int check_device_used(struct device_record *dev_rec,
8256                              struct device_extent_tree *dext_cache)
8257 {
8258         struct cache_extent *cache;
8259         struct device_extent_record *dev_extent_rec;
8260         u64 total_byte = 0;
8261
8262         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8263         while (cache) {
8264                 dev_extent_rec = container_of(cache,
8265                                               struct device_extent_record,
8266                                               cache);
8267                 if (dev_extent_rec->objectid != dev_rec->devid)
8268                         break;
8269
8270                 list_del_init(&dev_extent_rec->device_list);
8271                 total_byte += dev_extent_rec->length;
8272                 cache = next_cache_extent(cache);
8273         }
8274
8275         if (total_byte != dev_rec->byte_used) {
8276                 fprintf(stderr,
8277                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8278                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8279                         dev_rec->type, dev_rec->offset);
8280                 return -1;
8281         } else {
8282                 return 0;
8283         }
8284 }
8285
8286 /* check btrfs_dev_item -> btrfs_dev_extent */
8287 static int check_devices(struct rb_root *dev_cache,
8288                          struct device_extent_tree *dev_extent_cache)
8289 {
8290         struct rb_node *dev_node;
8291         struct device_record *dev_rec;
8292         struct device_extent_record *dext_rec;
8293         int err;
8294         int ret = 0;
8295
8296         dev_node = rb_first(dev_cache);
8297         while (dev_node) {
8298                 dev_rec = container_of(dev_node, struct device_record, node);
8299                 err = check_device_used(dev_rec, dev_extent_cache);
8300                 if (err)
8301                         ret = err;
8302
8303                 dev_node = rb_next(dev_node);
8304         }
8305         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8306                             device_list) {
8307                 fprintf(stderr,
8308                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8309                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8310                 if (!ret)
8311                         ret = 1;
8312         }
8313         return ret;
8314 }
8315
8316 static int add_root_item_to_list(struct list_head *head,
8317                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8318                                   u8 level, u8 drop_level,
8319                                   int level_size, struct btrfs_key *drop_key)
8320 {
8321
8322         struct root_item_record *ri_rec;
8323         ri_rec = malloc(sizeof(*ri_rec));
8324         if (!ri_rec)
8325                 return -ENOMEM;
8326         ri_rec->bytenr = bytenr;
8327         ri_rec->objectid = objectid;
8328         ri_rec->level = level;
8329         ri_rec->level_size = level_size;
8330         ri_rec->drop_level = drop_level;
8331         ri_rec->last_snapshot = last_snapshot;
8332         if (drop_key)
8333                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8334         list_add_tail(&ri_rec->list, head);
8335
8336         return 0;
8337 }
8338
8339 static void free_root_item_list(struct list_head *list)
8340 {
8341         struct root_item_record *ri_rec;
8342
8343         while (!list_empty(list)) {
8344                 ri_rec = list_first_entry(list, struct root_item_record,
8345                                           list);
8346                 list_del_init(&ri_rec->list);
8347                 free(ri_rec);
8348         }
8349 }
8350
8351 static int deal_root_from_list(struct list_head *list,
8352                                struct btrfs_root *root,
8353                                struct block_info *bits,
8354                                int bits_nr,
8355                                struct cache_tree *pending,
8356                                struct cache_tree *seen,
8357                                struct cache_tree *reada,
8358                                struct cache_tree *nodes,
8359                                struct cache_tree *extent_cache,
8360                                struct cache_tree *chunk_cache,
8361                                struct rb_root *dev_cache,
8362                                struct block_group_tree *block_group_cache,
8363                                struct device_extent_tree *dev_extent_cache)
8364 {
8365         int ret = 0;
8366         u64 last;
8367
8368         while (!list_empty(list)) {
8369                 struct root_item_record *rec;
8370                 struct extent_buffer *buf;
8371                 rec = list_entry(list->next,
8372                                  struct root_item_record, list);
8373                 last = 0;
8374                 buf = read_tree_block(root->fs_info->tree_root,
8375                                       rec->bytenr, rec->level_size, 0);
8376                 if (!extent_buffer_uptodate(buf)) {
8377                         free_extent_buffer(buf);
8378                         ret = -EIO;
8379                         break;
8380                 }
8381                 ret = add_root_to_pending(buf, extent_cache, pending,
8382                                     seen, nodes, rec->objectid);
8383                 if (ret < 0)
8384                         break;
8385                 /*
8386                  * To rebuild extent tree, we need deal with snapshot
8387                  * one by one, otherwise we deal with node firstly which
8388                  * can maximize readahead.
8389                  */
8390                 while (1) {
8391                         ret = run_next_block(root, bits, bits_nr, &last,
8392                                              pending, seen, reada, nodes,
8393                                              extent_cache, chunk_cache,
8394                                              dev_cache, block_group_cache,
8395                                              dev_extent_cache, rec);
8396                         if (ret != 0)
8397                                 break;
8398                 }
8399                 free_extent_buffer(buf);
8400                 list_del(&rec->list);
8401                 free(rec);
8402                 if (ret < 0)
8403                         break;
8404         }
8405         while (ret >= 0) {
8406                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8407                                      reada, nodes, extent_cache, chunk_cache,
8408                                      dev_cache, block_group_cache,
8409                                      dev_extent_cache, NULL);
8410                 if (ret != 0) {
8411                         if (ret > 0)
8412                                 ret = 0;
8413                         break;
8414                 }
8415         }
8416         return ret;
8417 }
8418
8419 static int check_chunks_and_extents(struct btrfs_root *root)
8420 {
8421         struct rb_root dev_cache;
8422         struct cache_tree chunk_cache;
8423         struct block_group_tree block_group_cache;
8424         struct device_extent_tree dev_extent_cache;
8425         struct cache_tree extent_cache;
8426         struct cache_tree seen;
8427         struct cache_tree pending;
8428         struct cache_tree reada;
8429         struct cache_tree nodes;
8430         struct extent_io_tree excluded_extents;
8431         struct cache_tree corrupt_blocks;
8432         struct btrfs_path path;
8433         struct btrfs_key key;
8434         struct btrfs_key found_key;
8435         int ret, err = 0;
8436         struct block_info *bits;
8437         int bits_nr;
8438         struct extent_buffer *leaf;
8439         int slot;
8440         struct btrfs_root_item ri;
8441         struct list_head dropping_trees;
8442         struct list_head normal_trees;
8443         struct btrfs_root *root1;
8444         u64 objectid;
8445         u32 level_size;
8446         u8 level;
8447
8448         dev_cache = RB_ROOT;
8449         cache_tree_init(&chunk_cache);
8450         block_group_tree_init(&block_group_cache);
8451         device_extent_tree_init(&dev_extent_cache);
8452
8453         cache_tree_init(&extent_cache);
8454         cache_tree_init(&seen);
8455         cache_tree_init(&pending);
8456         cache_tree_init(&nodes);
8457         cache_tree_init(&reada);
8458         cache_tree_init(&corrupt_blocks);
8459         extent_io_tree_init(&excluded_extents);
8460         INIT_LIST_HEAD(&dropping_trees);
8461         INIT_LIST_HEAD(&normal_trees);
8462
8463         if (repair) {
8464                 root->fs_info->excluded_extents = &excluded_extents;
8465                 root->fs_info->fsck_extent_cache = &extent_cache;
8466                 root->fs_info->free_extent_hook = free_extent_hook;
8467                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8468         }
8469
8470         bits_nr = 1024;
8471         bits = malloc(bits_nr * sizeof(struct block_info));
8472         if (!bits) {
8473                 perror("malloc");
8474                 exit(1);
8475         }
8476
8477         if (ctx.progress_enabled) {
8478                 ctx.tp = TASK_EXTENTS;
8479                 task_start(ctx.info);
8480         }
8481
8482 again:
8483         root1 = root->fs_info->tree_root;
8484         level = btrfs_header_level(root1->node);
8485         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8486                                     root1->node->start, 0, level, 0,
8487                                     root1->nodesize, NULL);
8488         if (ret < 0)
8489                 goto out;
8490         root1 = root->fs_info->chunk_root;
8491         level = btrfs_header_level(root1->node);
8492         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8493                                     root1->node->start, 0, level, 0,
8494                                     root1->nodesize, NULL);
8495         if (ret < 0)
8496                 goto out;
8497         btrfs_init_path(&path);
8498         key.offset = 0;
8499         key.objectid = 0;
8500         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8501         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8502                                         &key, &path, 0, 0);
8503         if (ret < 0)
8504                 goto out;
8505         while(1) {
8506                 leaf = path.nodes[0];
8507                 slot = path.slots[0];
8508                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8509                         ret = btrfs_next_leaf(root, &path);
8510                         if (ret != 0)
8511                                 break;
8512                         leaf = path.nodes[0];
8513                         slot = path.slots[0];
8514                 }
8515                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8516                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8517                         unsigned long offset;
8518                         u64 last_snapshot;
8519
8520                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8521                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8522                         last_snapshot = btrfs_root_last_snapshot(&ri);
8523                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8524                                 level = btrfs_root_level(&ri);
8525                                 level_size = root->nodesize;
8526                                 ret = add_root_item_to_list(&normal_trees,
8527                                                 found_key.objectid,
8528                                                 btrfs_root_bytenr(&ri),
8529                                                 last_snapshot, level,
8530                                                 0, level_size, NULL);
8531                                 if (ret < 0)
8532                                         goto out;
8533                         } else {
8534                                 level = btrfs_root_level(&ri);
8535                                 level_size = root->nodesize;
8536                                 objectid = found_key.objectid;
8537                                 btrfs_disk_key_to_cpu(&found_key,
8538                                                       &ri.drop_progress);
8539                                 ret = add_root_item_to_list(&dropping_trees,
8540                                                 objectid,
8541                                                 btrfs_root_bytenr(&ri),
8542                                                 last_snapshot, level,
8543                                                 ri.drop_level,
8544                                                 level_size, &found_key);
8545                                 if (ret < 0)
8546                                         goto out;
8547                         }
8548                 }
8549                 path.slots[0]++;
8550         }
8551         btrfs_release_path(&path);
8552
8553         /*
8554          * check_block can return -EAGAIN if it fixes something, please keep
8555          * this in mind when dealing with return values from these functions, if
8556          * we get -EAGAIN we want to fall through and restart the loop.
8557          */
8558         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8559                                   &seen, &reada, &nodes, &extent_cache,
8560                                   &chunk_cache, &dev_cache, &block_group_cache,
8561                                   &dev_extent_cache);
8562         if (ret < 0) {
8563                 if (ret == -EAGAIN)
8564                         goto loop;
8565                 goto out;
8566         }
8567         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8568                                   &pending, &seen, &reada, &nodes,
8569                                   &extent_cache, &chunk_cache, &dev_cache,
8570                                   &block_group_cache, &dev_extent_cache);
8571         if (ret < 0) {
8572                 if (ret == -EAGAIN)
8573                         goto loop;
8574                 goto out;
8575         }
8576
8577         ret = check_chunks(&chunk_cache, &block_group_cache,
8578                            &dev_extent_cache, NULL, NULL, NULL, 0);
8579         if (ret) {
8580                 if (ret == -EAGAIN)
8581                         goto loop;
8582                 err = ret;
8583         }
8584
8585         ret = check_extent_refs(root, &extent_cache);
8586         if (ret < 0) {
8587                 if (ret == -EAGAIN)
8588                         goto loop;
8589                 goto out;
8590         }
8591
8592         ret = check_devices(&dev_cache, &dev_extent_cache);
8593         if (ret && err)
8594                 ret = err;
8595
8596 out:
8597         task_stop(ctx.info);
8598         if (repair) {
8599                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8600                 extent_io_tree_cleanup(&excluded_extents);
8601                 root->fs_info->fsck_extent_cache = NULL;
8602                 root->fs_info->free_extent_hook = NULL;
8603                 root->fs_info->corrupt_blocks = NULL;
8604                 root->fs_info->excluded_extents = NULL;
8605         }
8606         free(bits);
8607         free_chunk_cache_tree(&chunk_cache);
8608         free_device_cache_tree(&dev_cache);
8609         free_block_group_tree(&block_group_cache);
8610         free_device_extent_tree(&dev_extent_cache);
8611         free_extent_cache_tree(&seen);
8612         free_extent_cache_tree(&pending);
8613         free_extent_cache_tree(&reada);
8614         free_extent_cache_tree(&nodes);
8615         return ret;
8616 loop:
8617         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8618         free_extent_cache_tree(&seen);
8619         free_extent_cache_tree(&pending);
8620         free_extent_cache_tree(&reada);
8621         free_extent_cache_tree(&nodes);
8622         free_chunk_cache_tree(&chunk_cache);
8623         free_block_group_tree(&block_group_cache);
8624         free_device_cache_tree(&dev_cache);
8625         free_device_extent_tree(&dev_extent_cache);
8626         free_extent_record_cache(root->fs_info, &extent_cache);
8627         free_root_item_list(&normal_trees);
8628         free_root_item_list(&dropping_trees);
8629         extent_io_tree_cleanup(&excluded_extents);
8630         goto again;
8631 }
8632
8633 /*
8634  * Check backrefs of a tree block given by @bytenr or @eb.
8635  *
8636  * @root:       the root containing the @bytenr or @eb
8637  * @eb:         tree block extent buffer, can be NULL
8638  * @bytenr:     bytenr of the tree block to search
8639  * @level:      tree level of the tree block
8640  * @owner:      owner of the tree block
8641  *
8642  * Return >0 for any error found and output error message
8643  * Return 0 for no error found
8644  */
8645 static int check_tree_block_ref(struct btrfs_root *root,
8646                                 struct extent_buffer *eb, u64 bytenr,
8647                                 int level, u64 owner)
8648 {
8649         struct btrfs_key key;
8650         struct btrfs_root *extent_root = root->fs_info->extent_root;
8651         struct btrfs_path path;
8652         struct btrfs_extent_item *ei;
8653         struct btrfs_extent_inline_ref *iref;
8654         struct extent_buffer *leaf;
8655         unsigned long end;
8656         unsigned long ptr;
8657         int slot;
8658         int skinny_level;
8659         int type;
8660         u32 nodesize = root->nodesize;
8661         u32 item_size;
8662         u64 offset;
8663         int found_ref = 0;
8664         int err = 0;
8665         int ret;
8666
8667         btrfs_init_path(&path);
8668         key.objectid = bytenr;
8669         if (btrfs_fs_incompat(root->fs_info,
8670                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8671                 key.type = BTRFS_METADATA_ITEM_KEY;
8672         else
8673                 key.type = BTRFS_EXTENT_ITEM_KEY;
8674         key.offset = (u64)-1;
8675
8676         /* Search for the backref in extent tree */
8677         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8678         if (ret < 0) {
8679                 err |= BACKREF_MISSING;
8680                 goto out;
8681         }
8682         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8683         if (ret) {
8684                 err |= BACKREF_MISSING;
8685                 goto out;
8686         }
8687
8688         leaf = path.nodes[0];
8689         slot = path.slots[0];
8690         btrfs_item_key_to_cpu(leaf, &key, slot);
8691
8692         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8693
8694         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8695                 skinny_level = (int)key.offset;
8696                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8697         } else {
8698                 struct btrfs_tree_block_info *info;
8699
8700                 info = (struct btrfs_tree_block_info *)(ei + 1);
8701                 skinny_level = btrfs_tree_block_level(leaf, info);
8702                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8703         }
8704
8705         if (eb) {
8706                 u64 header_gen;
8707                 u64 extent_gen;
8708
8709                 if (!(btrfs_extent_flags(leaf, ei) &
8710                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8711                         error(
8712                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8713                                 key.objectid, nodesize,
8714                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8715                         err = BACKREF_MISMATCH;
8716                 }
8717                 header_gen = btrfs_header_generation(eb);
8718                 extent_gen = btrfs_extent_generation(leaf, ei);
8719                 if (header_gen != extent_gen) {
8720                         error(
8721         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8722                                 key.objectid, nodesize, header_gen,
8723                                 extent_gen);
8724                         err = BACKREF_MISMATCH;
8725                 }
8726                 if (level != skinny_level) {
8727                         error(
8728                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8729                                 key.objectid, nodesize, level, skinny_level);
8730                         err = BACKREF_MISMATCH;
8731                 }
8732                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8733                         error(
8734                         "extent[%llu %u] is referred by other roots than %llu",
8735                                 key.objectid, nodesize, root->objectid);
8736                         err = BACKREF_MISMATCH;
8737                 }
8738         }
8739
8740         /*
8741          * Iterate the extent/metadata item to find the exact backref
8742          */
8743         item_size = btrfs_item_size_nr(leaf, slot);
8744         ptr = (unsigned long)iref;
8745         end = (unsigned long)ei + item_size;
8746         while (ptr < end) {
8747                 iref = (struct btrfs_extent_inline_ref *)ptr;
8748                 type = btrfs_extent_inline_ref_type(leaf, iref);
8749                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8750
8751                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8752                         (offset == root->objectid || offset == owner)) {
8753                         found_ref = 1;
8754                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8755                         /* Check if the backref points to valid referencer */
8756                         found_ref = !check_tree_block_ref(root, NULL, offset,
8757                                                           level + 1, owner);
8758                 }
8759
8760                 if (found_ref)
8761                         break;
8762                 ptr += btrfs_extent_inline_ref_size(type);
8763         }
8764
8765         /*
8766          * Inlined extent item doesn't have what we need, check
8767          * TREE_BLOCK_REF_KEY
8768          */
8769         if (!found_ref) {
8770                 btrfs_release_path(&path);
8771                 key.objectid = bytenr;
8772                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8773                 key.offset = root->objectid;
8774
8775                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8776                 if (!ret)
8777                         found_ref = 1;
8778         }
8779         if (!found_ref)
8780                 err |= BACKREF_MISSING;
8781 out:
8782         btrfs_release_path(&path);
8783         if (eb && (err & BACKREF_MISSING))
8784                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8785                         bytenr, nodesize, owner, level);
8786         return err;
8787 }
8788
8789 /*
8790  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8791  *
8792  * Return >0 any error found and output error message
8793  * Return 0 for no error found
8794  */
8795 static int check_extent_data_item(struct btrfs_root *root,
8796                                   struct extent_buffer *eb, int slot)
8797 {
8798         struct btrfs_file_extent_item *fi;
8799         struct btrfs_path path;
8800         struct btrfs_root *extent_root = root->fs_info->extent_root;
8801         struct btrfs_key fi_key;
8802         struct btrfs_key dbref_key;
8803         struct extent_buffer *leaf;
8804         struct btrfs_extent_item *ei;
8805         struct btrfs_extent_inline_ref *iref;
8806         struct btrfs_extent_data_ref *dref;
8807         u64 owner;
8808         u64 file_extent_gen;
8809         u64 disk_bytenr;
8810         u64 disk_num_bytes;
8811         u64 extent_num_bytes;
8812         u64 extent_flags;
8813         u64 extent_gen;
8814         u32 item_size;
8815         unsigned long end;
8816         unsigned long ptr;
8817         int type;
8818         u64 ref_root;
8819         int found_dbackref = 0;
8820         int err = 0;
8821         int ret;
8822
8823         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8824         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8825         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8826
8827         /* Nothing to check for hole and inline data extents */
8828         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8829             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8830                 return 0;
8831
8832         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8833         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8834         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8835
8836         /* Check unaligned disk_num_bytes and num_bytes */
8837         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8838                 error(
8839 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8840                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8841                         root->sectorsize);
8842                 err |= BYTES_UNALIGNED;
8843         } else {
8844                 data_bytes_allocated += disk_num_bytes;
8845         }
8846         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8847                 error(
8848 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8849                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8850                         root->sectorsize);
8851                 err |= BYTES_UNALIGNED;
8852         } else {
8853                 data_bytes_referenced += extent_num_bytes;
8854         }
8855         owner = btrfs_header_owner(eb);
8856
8857         /* Check the extent item of the file extent in extent tree */
8858         btrfs_init_path(&path);
8859         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8860         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8861         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8862
8863         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8864         if (ret) {
8865                 err |= BACKREF_MISSING;
8866                 goto error;
8867         }
8868
8869         leaf = path.nodes[0];
8870         slot = path.slots[0];
8871         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8872
8873         extent_flags = btrfs_extent_flags(leaf, ei);
8874         extent_gen = btrfs_extent_generation(leaf, ei);
8875
8876         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8877                 error(
8878                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8879                     disk_bytenr, disk_num_bytes,
8880                     BTRFS_EXTENT_FLAG_DATA);
8881                 err |= BACKREF_MISMATCH;
8882         }
8883
8884         if (file_extent_gen < extent_gen) {
8885                 error(
8886 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8887                         disk_bytenr, disk_num_bytes, file_extent_gen,
8888                         extent_gen);
8889                 err |= BACKREF_MISMATCH;
8890         }
8891
8892         /* Check data backref inside that extent item */
8893         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8894         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8895         ptr = (unsigned long)iref;
8896         end = (unsigned long)ei + item_size;
8897         while (ptr < end) {
8898                 iref = (struct btrfs_extent_inline_ref *)ptr;
8899                 type = btrfs_extent_inline_ref_type(leaf, iref);
8900                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8901
8902                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8903                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8904                         if (ref_root == owner || ref_root == root->objectid)
8905                                 found_dbackref = 1;
8906                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8907                         found_dbackref = !check_tree_block_ref(root, NULL,
8908                                 btrfs_extent_inline_ref_offset(leaf, iref),
8909                                 0, owner);
8910                 }
8911
8912                 if (found_dbackref)
8913                         break;
8914                 ptr += btrfs_extent_inline_ref_size(type);
8915         }
8916
8917         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8918         if (!found_dbackref) {
8919                 btrfs_release_path(&path);
8920
8921                 btrfs_init_path(&path);
8922                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8923                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8924                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8925                                 fi_key.objectid, fi_key.offset);
8926
8927                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8928                                         &dbref_key, &path, 0, 0);
8929                 if (!ret)
8930                         found_dbackref = 1;
8931         }
8932
8933         if (!found_dbackref)
8934                 err |= BACKREF_MISSING;
8935 error:
8936         btrfs_release_path(&path);
8937         if (err & BACKREF_MISSING) {
8938                 error("data extent[%llu %llu] backref lost",
8939                       disk_bytenr, disk_num_bytes);
8940         }
8941         return err;
8942 }
8943
8944 /*
8945  * Get real tree block level for the case like shared block
8946  * Return >= 0 as tree level
8947  * Return <0 for error
8948  */
8949 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8950 {
8951         struct extent_buffer *eb;
8952         struct btrfs_path path;
8953         struct btrfs_key key;
8954         struct btrfs_extent_item *ei;
8955         u64 flags;
8956         u64 transid;
8957         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8958         u8 backref_level;
8959         u8 header_level;
8960         int ret;
8961
8962         /* Search extent tree for extent generation and level */
8963         key.objectid = bytenr;
8964         key.type = BTRFS_METADATA_ITEM_KEY;
8965         key.offset = (u64)-1;
8966
8967         btrfs_init_path(&path);
8968         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8969         if (ret < 0)
8970                 goto release_out;
8971         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8972         if (ret < 0)
8973                 goto release_out;
8974         if (ret > 0) {
8975                 ret = -ENOENT;
8976                 goto release_out;
8977         }
8978
8979         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8980         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8981                             struct btrfs_extent_item);
8982         flags = btrfs_extent_flags(path.nodes[0], ei);
8983         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8984                 ret = -ENOENT;
8985                 goto release_out;
8986         }
8987
8988         /* Get transid for later read_tree_block() check */
8989         transid = btrfs_extent_generation(path.nodes[0], ei);
8990
8991         /* Get backref level as one source */
8992         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8993                 backref_level = key.offset;
8994         } else {
8995                 struct btrfs_tree_block_info *info;
8996
8997                 info = (struct btrfs_tree_block_info *)(ei + 1);
8998                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8999         }
9000         btrfs_release_path(&path);
9001
9002         /* Get level from tree block as an alternative source */
9003         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9004         if (!extent_buffer_uptodate(eb)) {
9005                 free_extent_buffer(eb);
9006                 return -EIO;
9007         }
9008         header_level = btrfs_header_level(eb);
9009         free_extent_buffer(eb);
9010
9011         if (header_level != backref_level)
9012                 return -EIO;
9013         return header_level;
9014
9015 release_out:
9016         btrfs_release_path(&path);
9017         return ret;
9018 }
9019
9020 /*
9021  * Check if a tree block backref is valid (points to a valid tree block)
9022  * if level == -1, level will be resolved
9023  * Return >0 for any error found and print error message
9024  */
9025 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9026                                     u64 bytenr, int level)
9027 {
9028         struct btrfs_root *root;
9029         struct btrfs_key key;
9030         struct btrfs_path path;
9031         struct extent_buffer *eb;
9032         struct extent_buffer *node;
9033         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9034         int err = 0;
9035         int ret;
9036
9037         /* Query level for level == -1 special case */
9038         if (level == -1)
9039                 level = query_tree_block_level(fs_info, bytenr);
9040         if (level < 0) {
9041                 err |= REFERENCER_MISSING;
9042                 goto out;
9043         }
9044
9045         key.objectid = root_id;
9046         key.type = BTRFS_ROOT_ITEM_KEY;
9047         key.offset = (u64)-1;
9048
9049         root = btrfs_read_fs_root(fs_info, &key);
9050         if (IS_ERR(root)) {
9051                 err |= REFERENCER_MISSING;
9052                 goto out;
9053         }
9054
9055         /* Read out the tree block to get item/node key */
9056         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9057         if (!extent_buffer_uptodate(eb)) {
9058                 err |= REFERENCER_MISSING;
9059                 free_extent_buffer(eb);
9060                 goto out;
9061         }
9062
9063         /* Empty tree, no need to check key */
9064         if (!btrfs_header_nritems(eb) && !level) {
9065                 free_extent_buffer(eb);
9066                 goto out;
9067         }
9068
9069         if (level)
9070                 btrfs_node_key_to_cpu(eb, &key, 0);
9071         else
9072                 btrfs_item_key_to_cpu(eb, &key, 0);
9073
9074         free_extent_buffer(eb);
9075
9076         btrfs_init_path(&path);
9077         path.lowest_level = level;
9078         /* Search with the first key, to ensure we can reach it */
9079         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9080         if (ret < 0) {
9081                 err |= REFERENCER_MISSING;
9082                 goto release_out;
9083         }
9084
9085         node = path.nodes[level];
9086         if (btrfs_header_bytenr(node) != bytenr) {
9087                 error(
9088         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9089                         bytenr, nodesize, bytenr,
9090                         btrfs_header_bytenr(node));
9091                 err |= REFERENCER_MISMATCH;
9092         }
9093         if (btrfs_header_level(node) != level) {
9094                 error(
9095         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9096                         bytenr, nodesize, level,
9097                         btrfs_header_level(node));
9098                 err |= REFERENCER_MISMATCH;
9099         }
9100
9101 release_out:
9102         btrfs_release_path(&path);
9103 out:
9104         if (err & REFERENCER_MISSING) {
9105                 if (level < 0)
9106                         error("extent [%llu %d] lost referencer (owner: %llu)",
9107                                 bytenr, nodesize, root_id);
9108                 else
9109                         error(
9110                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9111                                 bytenr, nodesize, root_id, level);
9112         }
9113
9114         return err;
9115 }
9116
9117 /*
9118  * Check referencer for shared block backref
9119  * If level == -1, this function will resolve the level.
9120  */
9121 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9122                                      u64 parent, u64 bytenr, int level)
9123 {
9124         struct extent_buffer *eb;
9125         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9126         u32 nr;
9127         int found_parent = 0;
9128         int i;
9129
9130         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9131         if (!extent_buffer_uptodate(eb))
9132                 goto out;
9133
9134         if (level == -1)
9135                 level = query_tree_block_level(fs_info, bytenr);
9136         if (level < 0)
9137                 goto out;
9138
9139         if (level + 1 != btrfs_header_level(eb))
9140                 goto out;
9141
9142         nr = btrfs_header_nritems(eb);
9143         for (i = 0; i < nr; i++) {
9144                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9145                         found_parent = 1;
9146                         break;
9147                 }
9148         }
9149 out:
9150         free_extent_buffer(eb);
9151         if (!found_parent) {
9152                 error(
9153         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9154                         bytenr, nodesize, parent, level);
9155                 return REFERENCER_MISSING;
9156         }
9157         return 0;
9158 }
9159
9160 /*
9161  * Check referencer for normal (inlined) data ref
9162  * If len == 0, it will be resolved by searching in extent tree
9163  */
9164 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9165                                      u64 root_id, u64 objectid, u64 offset,
9166                                      u64 bytenr, u64 len, u32 count)
9167 {
9168         struct btrfs_root *root;
9169         struct btrfs_root *extent_root = fs_info->extent_root;
9170         struct btrfs_key key;
9171         struct btrfs_path path;
9172         struct extent_buffer *leaf;
9173         struct btrfs_file_extent_item *fi;
9174         u32 found_count = 0;
9175         int slot;
9176         int ret = 0;
9177
9178         if (!len) {
9179                 key.objectid = bytenr;
9180                 key.type = BTRFS_EXTENT_ITEM_KEY;
9181                 key.offset = (u64)-1;
9182
9183                 btrfs_init_path(&path);
9184                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9185                 if (ret < 0)
9186                         goto out;
9187                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9188                 if (ret)
9189                         goto out;
9190                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9191                 if (key.objectid != bytenr ||
9192                     key.type != BTRFS_EXTENT_ITEM_KEY)
9193                         goto out;
9194                 len = key.offset;
9195                 btrfs_release_path(&path);
9196         }
9197         key.objectid = root_id;
9198         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9199         key.offset = (u64)-1;
9200         btrfs_init_path(&path);
9201
9202         root = btrfs_read_fs_root(fs_info, &key);
9203         if (IS_ERR(root))
9204                 goto out;
9205
9206         key.objectid = objectid;
9207         key.type = BTRFS_EXTENT_DATA_KEY;
9208         /*
9209          * It can be nasty as data backref offset is
9210          * file offset - file extent offset, which is smaller or
9211          * equal to original backref offset.  The only special case is
9212          * overflow.  So we need to special check and do further search.
9213          */
9214         key.offset = offset & (1ULL << 63) ? 0 : offset;
9215
9216         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9217         if (ret < 0)
9218                 goto out;
9219
9220         /*
9221          * Search afterwards to get correct one
9222          * NOTE: As we must do a comprehensive check on the data backref to
9223          * make sure the dref count also matches, we must iterate all file
9224          * extents for that inode.
9225          */
9226         while (1) {
9227                 leaf = path.nodes[0];
9228                 slot = path.slots[0];
9229
9230                 btrfs_item_key_to_cpu(leaf, &key, slot);
9231                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9232                         break;
9233                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9234                 /*
9235                  * Except normal disk bytenr and disk num bytes, we still
9236                  * need to do extra check on dbackref offset as
9237                  * dbackref offset = file_offset - file_extent_offset
9238                  */
9239                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9240                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9241                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9242                     offset)
9243                         found_count++;
9244
9245                 ret = btrfs_next_item(root, &path);
9246                 if (ret)
9247                         break;
9248         }
9249 out:
9250         btrfs_release_path(&path);
9251         if (found_count != count) {
9252                 error(
9253 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9254                         bytenr, len, root_id, objectid, offset, count, found_count);
9255                 return REFERENCER_MISSING;
9256         }
9257         return 0;
9258 }
9259
9260 /*
9261  * Check if the referencer of a shared data backref exists
9262  */
9263 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9264                                      u64 parent, u64 bytenr)
9265 {
9266         struct extent_buffer *eb;
9267         struct btrfs_key key;
9268         struct btrfs_file_extent_item *fi;
9269         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9270         u32 nr;
9271         int found_parent = 0;
9272         int i;
9273
9274         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9275         if (!extent_buffer_uptodate(eb))
9276                 goto out;
9277
9278         nr = btrfs_header_nritems(eb);
9279         for (i = 0; i < nr; i++) {
9280                 btrfs_item_key_to_cpu(eb, &key, i);
9281                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9282                         continue;
9283
9284                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9285                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9286                         continue;
9287
9288                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9289                         found_parent = 1;
9290                         break;
9291                 }
9292         }
9293
9294 out:
9295         free_extent_buffer(eb);
9296         if (!found_parent) {
9297                 error("shared extent %llu referencer lost (parent: %llu)",
9298                         bytenr, parent);
9299                 return REFERENCER_MISSING;
9300         }
9301         return 0;
9302 }
9303
9304 /*
9305  * This function will check a given extent item, including its backref and
9306  * itself (like crossing stripe boundary and type)
9307  *
9308  * Since we don't use extent_record anymore, introduce new error bit
9309  */
9310 static int check_extent_item(struct btrfs_fs_info *fs_info,
9311                              struct extent_buffer *eb, int slot)
9312 {
9313         struct btrfs_extent_item *ei;
9314         struct btrfs_extent_inline_ref *iref;
9315         struct btrfs_extent_data_ref *dref;
9316         unsigned long end;
9317         unsigned long ptr;
9318         int type;
9319         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9320         u32 item_size = btrfs_item_size_nr(eb, slot);
9321         u64 flags;
9322         u64 offset;
9323         int metadata = 0;
9324         int level;
9325         struct btrfs_key key;
9326         int ret;
9327         int err = 0;
9328
9329         btrfs_item_key_to_cpu(eb, &key, slot);
9330         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9331                 bytes_used += key.offset;
9332         else
9333                 bytes_used += nodesize;
9334
9335         if (item_size < sizeof(*ei)) {
9336                 /*
9337                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9338                  * old thing when on disk format is still un-determined.
9339                  * No need to care about it anymore
9340                  */
9341                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9342                 return -ENOTTY;
9343         }
9344
9345         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9346         flags = btrfs_extent_flags(eb, ei);
9347
9348         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9349                 metadata = 1;
9350         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9351                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9352                       key.objectid, key.objectid + nodesize);
9353                 err |= CROSSING_STRIPE_BOUNDARY;
9354         }
9355
9356         ptr = (unsigned long)(ei + 1);
9357
9358         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9359                 /* Old EXTENT_ITEM metadata */
9360                 struct btrfs_tree_block_info *info;
9361
9362                 info = (struct btrfs_tree_block_info *)ptr;
9363                 level = btrfs_tree_block_level(eb, info);
9364                 ptr += sizeof(struct btrfs_tree_block_info);
9365         } else {
9366                 /* New METADATA_ITEM */
9367                 level = key.offset;
9368         }
9369         end = (unsigned long)ei + item_size;
9370
9371         if (ptr >= end) {
9372                 err |= ITEM_SIZE_MISMATCH;
9373                 goto out;
9374         }
9375
9376         /* Now check every backref in this extent item */
9377 next:
9378         iref = (struct btrfs_extent_inline_ref *)ptr;
9379         type = btrfs_extent_inline_ref_type(eb, iref);
9380         offset = btrfs_extent_inline_ref_offset(eb, iref);
9381         switch (type) {
9382         case BTRFS_TREE_BLOCK_REF_KEY:
9383                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9384                                                level);
9385                 err |= ret;
9386                 break;
9387         case BTRFS_SHARED_BLOCK_REF_KEY:
9388                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9389                                                  level);
9390                 err |= ret;
9391                 break;
9392         case BTRFS_EXTENT_DATA_REF_KEY:
9393                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9394                 ret = check_extent_data_backref(fs_info,
9395                                 btrfs_extent_data_ref_root(eb, dref),
9396                                 btrfs_extent_data_ref_objectid(eb, dref),
9397                                 btrfs_extent_data_ref_offset(eb, dref),
9398                                 key.objectid, key.offset,
9399                                 btrfs_extent_data_ref_count(eb, dref));
9400                 err |= ret;
9401                 break;
9402         case BTRFS_SHARED_DATA_REF_KEY:
9403                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9404                 err |= ret;
9405                 break;
9406         default:
9407                 error("extent[%llu %d %llu] has unknown ref type: %d",
9408                         key.objectid, key.type, key.offset, type);
9409                 err |= UNKNOWN_TYPE;
9410                 goto out;
9411         }
9412
9413         ptr += btrfs_extent_inline_ref_size(type);
9414         if (ptr < end)
9415                 goto next;
9416
9417 out:
9418         return err;
9419 }
9420
9421 /*
9422  * Check if a dev extent item is referred correctly by its chunk
9423  */
9424 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9425                                  struct extent_buffer *eb, int slot)
9426 {
9427         struct btrfs_root *chunk_root = fs_info->chunk_root;
9428         struct btrfs_dev_extent *ptr;
9429         struct btrfs_path path;
9430         struct btrfs_key chunk_key;
9431         struct btrfs_key devext_key;
9432         struct btrfs_chunk *chunk;
9433         struct extent_buffer *l;
9434         int num_stripes;
9435         u64 length;
9436         int i;
9437         int found_chunk = 0;
9438         int ret;
9439
9440         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9441         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9442         length = btrfs_dev_extent_length(eb, ptr);
9443
9444         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9445         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9446         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9447
9448         btrfs_init_path(&path);
9449         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9450         if (ret)
9451                 goto out;
9452
9453         l = path.nodes[0];
9454         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9455         if (btrfs_chunk_length(l, chunk) != length)
9456                 goto out;
9457
9458         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9459         for (i = 0; i < num_stripes; i++) {
9460                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9461                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9462
9463                 if (devid == devext_key.objectid &&
9464                     offset == devext_key.offset) {
9465                         found_chunk = 1;
9466                         break;
9467                 }
9468         }
9469 out:
9470         btrfs_release_path(&path);
9471         if (!found_chunk) {
9472                 error(
9473                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9474                         devext_key.objectid, devext_key.offset, length);
9475                 return REFERENCER_MISSING;
9476         }
9477         return 0;
9478 }
9479
9480 /*
9481  * Check if the used space is correct with the dev item
9482  */
9483 static int check_dev_item(struct btrfs_fs_info *fs_info,
9484                           struct extent_buffer *eb, int slot)
9485 {
9486         struct btrfs_root *dev_root = fs_info->dev_root;
9487         struct btrfs_dev_item *dev_item;
9488         struct btrfs_path path;
9489         struct btrfs_key key;
9490         struct btrfs_dev_extent *ptr;
9491         u64 dev_id;
9492         u64 used;
9493         u64 total = 0;
9494         int ret;
9495
9496         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9497         dev_id = btrfs_device_id(eb, dev_item);
9498         used = btrfs_device_bytes_used(eb, dev_item);
9499
9500         key.objectid = dev_id;
9501         key.type = BTRFS_DEV_EXTENT_KEY;
9502         key.offset = 0;
9503
9504         btrfs_init_path(&path);
9505         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9506         if (ret < 0) {
9507                 btrfs_item_key_to_cpu(eb, &key, slot);
9508                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9509                         key.objectid, key.type, key.offset);
9510                 btrfs_release_path(&path);
9511                 return REFERENCER_MISSING;
9512         }
9513
9514         /* Iterate dev_extents to calculate the used space of a device */
9515         while (1) {
9516                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9517
9518                 if (key.objectid > dev_id)
9519                         break;
9520                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9521                         goto next;
9522
9523                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9524                                      struct btrfs_dev_extent);
9525                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9526 next:
9527                 ret = btrfs_next_item(dev_root, &path);
9528                 if (ret)
9529                         break;
9530         }
9531         btrfs_release_path(&path);
9532
9533         if (used != total) {
9534                 btrfs_item_key_to_cpu(eb, &key, slot);
9535                 error(
9536 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9537                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9538                         BTRFS_DEV_EXTENT_KEY, dev_id);
9539                 return ACCOUNTING_MISMATCH;
9540         }
9541         return 0;
9542 }
9543
9544 /*
9545  * Check a block group item with its referener (chunk) and its used space
9546  * with extent/metadata item
9547  */
9548 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9549                                   struct extent_buffer *eb, int slot)
9550 {
9551         struct btrfs_root *extent_root = fs_info->extent_root;
9552         struct btrfs_root *chunk_root = fs_info->chunk_root;
9553         struct btrfs_block_group_item *bi;
9554         struct btrfs_block_group_item bg_item;
9555         struct btrfs_path path;
9556         struct btrfs_key bg_key;
9557         struct btrfs_key chunk_key;
9558         struct btrfs_key extent_key;
9559         struct btrfs_chunk *chunk;
9560         struct extent_buffer *leaf;
9561         struct btrfs_extent_item *ei;
9562         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9563         u64 flags;
9564         u64 bg_flags;
9565         u64 used;
9566         u64 total = 0;
9567         int ret;
9568         int err = 0;
9569
9570         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9571         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9572         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9573         used = btrfs_block_group_used(&bg_item);
9574         bg_flags = btrfs_block_group_flags(&bg_item);
9575
9576         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9577         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9578         chunk_key.offset = bg_key.objectid;
9579
9580         btrfs_init_path(&path);
9581         /* Search for the referencer chunk */
9582         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9583         if (ret) {
9584                 error(
9585                 "block group[%llu %llu] did not find the related chunk item",
9586                         bg_key.objectid, bg_key.offset);
9587                 err |= REFERENCER_MISSING;
9588         } else {
9589                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9590                                         struct btrfs_chunk);
9591                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9592                                                 bg_key.offset) {
9593                         error(
9594         "block group[%llu %llu] related chunk item length does not match",
9595                                 bg_key.objectid, bg_key.offset);
9596                         err |= REFERENCER_MISMATCH;
9597                 }
9598         }
9599         btrfs_release_path(&path);
9600
9601         /* Search from the block group bytenr */
9602         extent_key.objectid = bg_key.objectid;
9603         extent_key.type = 0;
9604         extent_key.offset = 0;
9605
9606         btrfs_init_path(&path);
9607         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9608         if (ret < 0)
9609                 goto out;
9610
9611         /* Iterate extent tree to account used space */
9612         while (1) {
9613                 leaf = path.nodes[0];
9614                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9615                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9616                         break;
9617
9618                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9619                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9620                         goto next;
9621                 if (extent_key.objectid < bg_key.objectid)
9622                         goto next;
9623
9624                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9625                         total += nodesize;
9626                 else
9627                         total += extent_key.offset;
9628
9629                 ei = btrfs_item_ptr(leaf, path.slots[0],
9630                                     struct btrfs_extent_item);
9631                 flags = btrfs_extent_flags(leaf, ei);
9632                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9633                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9634                                 error(
9635                         "bad extent[%llu, %llu) type mismatch with chunk",
9636                                         extent_key.objectid,
9637                                         extent_key.objectid + extent_key.offset);
9638                                 err |= CHUNK_TYPE_MISMATCH;
9639                         }
9640                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9641                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9642                                     BTRFS_BLOCK_GROUP_METADATA))) {
9643                                 error(
9644                         "bad extent[%llu, %llu) type mismatch with chunk",
9645                                         extent_key.objectid,
9646                                         extent_key.objectid + nodesize);
9647                                 err |= CHUNK_TYPE_MISMATCH;
9648                         }
9649                 }
9650 next:
9651                 ret = btrfs_next_item(extent_root, &path);
9652                 if (ret)
9653                         break;
9654         }
9655
9656 out:
9657         btrfs_release_path(&path);
9658
9659         if (total != used) {
9660                 error(
9661                 "block group[%llu %llu] used %llu but extent items used %llu",
9662                         bg_key.objectid, bg_key.offset, used, total);
9663                 err |= ACCOUNTING_MISMATCH;
9664         }
9665         return err;
9666 }
9667
9668 /*
9669  * Check a chunk item.
9670  * Including checking all referred dev_extents and block group
9671  */
9672 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9673                             struct extent_buffer *eb, int slot)
9674 {
9675         struct btrfs_root *extent_root = fs_info->extent_root;
9676         struct btrfs_root *dev_root = fs_info->dev_root;
9677         struct btrfs_path path;
9678         struct btrfs_key chunk_key;
9679         struct btrfs_key bg_key;
9680         struct btrfs_key devext_key;
9681         struct btrfs_chunk *chunk;
9682         struct extent_buffer *leaf;
9683         struct btrfs_block_group_item *bi;
9684         struct btrfs_block_group_item bg_item;
9685         struct btrfs_dev_extent *ptr;
9686         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9687         u64 length;
9688         u64 chunk_end;
9689         u64 type;
9690         u64 profile;
9691         int num_stripes;
9692         u64 offset;
9693         u64 objectid;
9694         int i;
9695         int ret;
9696         int err = 0;
9697
9698         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9699         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9700         length = btrfs_chunk_length(eb, chunk);
9701         chunk_end = chunk_key.offset + length;
9702         if (!IS_ALIGNED(length, sectorsize)) {
9703                 error("chunk[%llu %llu) not aligned to %u",
9704                         chunk_key.offset, chunk_end, sectorsize);
9705                 err |= BYTES_UNALIGNED;
9706                 goto out;
9707         }
9708
9709         type = btrfs_chunk_type(eb, chunk);
9710         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9711         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9712                 error("chunk[%llu %llu) has no chunk type",
9713                         chunk_key.offset, chunk_end);
9714                 err |= UNKNOWN_TYPE;
9715         }
9716         if (profile && (profile & (profile - 1))) {
9717                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9718                         chunk_key.offset, chunk_end, profile);
9719                 err |= UNKNOWN_TYPE;
9720         }
9721
9722         bg_key.objectid = chunk_key.offset;
9723         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9724         bg_key.offset = length;
9725
9726         btrfs_init_path(&path);
9727         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9728         if (ret) {
9729                 error(
9730                 "chunk[%llu %llu) did not find the related block group item",
9731                         chunk_key.offset, chunk_end);
9732                 err |= REFERENCER_MISSING;
9733         } else{
9734                 leaf = path.nodes[0];
9735                 bi = btrfs_item_ptr(leaf, path.slots[0],
9736                                     struct btrfs_block_group_item);
9737                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9738                                    sizeof(bg_item));
9739                 if (btrfs_block_group_flags(&bg_item) != type) {
9740                         error(
9741 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9742                                 chunk_key.offset, chunk_end, type,
9743                                 btrfs_block_group_flags(&bg_item));
9744                         err |= REFERENCER_MISSING;
9745                 }
9746         }
9747
9748         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9749         for (i = 0; i < num_stripes; i++) {
9750                 btrfs_release_path(&path);
9751                 btrfs_init_path(&path);
9752                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9753                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9754                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9755
9756                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9757                                         0, 0);
9758                 if (ret)
9759                         goto not_match_dev;
9760
9761                 leaf = path.nodes[0];
9762                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9763                                      struct btrfs_dev_extent);
9764                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9765                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9766                 if (objectid != chunk_key.objectid ||
9767                     offset != chunk_key.offset ||
9768                     btrfs_dev_extent_length(leaf, ptr) != length)
9769                         goto not_match_dev;
9770                 continue;
9771 not_match_dev:
9772                 err |= BACKREF_MISSING;
9773                 error(
9774                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9775                         chunk_key.objectid, chunk_end, i);
9776                 continue;
9777         }
9778         btrfs_release_path(&path);
9779 out:
9780         return err;
9781 }
9782
9783 /*
9784  * Main entry function to check known items and update related accounting info
9785  */
9786 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9787 {
9788         struct btrfs_fs_info *fs_info = root->fs_info;
9789         struct btrfs_key key;
9790         int slot = 0;
9791         int type;
9792         struct btrfs_extent_data_ref *dref;
9793         int ret;
9794         int err = 0;
9795
9796 next:
9797         btrfs_item_key_to_cpu(eb, &key, slot);
9798         type = btrfs_key_type(&key);
9799
9800         switch (type) {
9801         case BTRFS_EXTENT_DATA_KEY:
9802                 ret = check_extent_data_item(root, eb, slot);
9803                 err |= ret;
9804                 break;
9805         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9806                 ret = check_block_group_item(fs_info, eb, slot);
9807                 err |= ret;
9808                 break;
9809         case BTRFS_DEV_ITEM_KEY:
9810                 ret = check_dev_item(fs_info, eb, slot);
9811                 err |= ret;
9812                 break;
9813         case BTRFS_CHUNK_ITEM_KEY:
9814                 ret = check_chunk_item(fs_info, eb, slot);
9815                 err |= ret;
9816                 break;
9817         case BTRFS_DEV_EXTENT_KEY:
9818                 ret = check_dev_extent_item(fs_info, eb, slot);
9819                 err |= ret;
9820                 break;
9821         case BTRFS_EXTENT_ITEM_KEY:
9822         case BTRFS_METADATA_ITEM_KEY:
9823                 ret = check_extent_item(fs_info, eb, slot);
9824                 err |= ret;
9825                 break;
9826         case BTRFS_EXTENT_CSUM_KEY:
9827                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9828                 break;
9829         case BTRFS_TREE_BLOCK_REF_KEY:
9830                 ret = check_tree_block_backref(fs_info, key.offset,
9831                                                key.objectid, -1);
9832                 err |= ret;
9833                 break;
9834         case BTRFS_EXTENT_DATA_REF_KEY:
9835                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9836                 ret = check_extent_data_backref(fs_info,
9837                                 btrfs_extent_data_ref_root(eb, dref),
9838                                 btrfs_extent_data_ref_objectid(eb, dref),
9839                                 btrfs_extent_data_ref_offset(eb, dref),
9840                                 key.objectid, 0,
9841                                 btrfs_extent_data_ref_count(eb, dref));
9842                 err |= ret;
9843                 break;
9844         case BTRFS_SHARED_BLOCK_REF_KEY:
9845                 ret = check_shared_block_backref(fs_info, key.offset,
9846                                                  key.objectid, -1);
9847                 err |= ret;
9848                 break;
9849         case BTRFS_SHARED_DATA_REF_KEY:
9850                 ret = check_shared_data_backref(fs_info, key.offset,
9851                                                 key.objectid);
9852                 err |= ret;
9853                 break;
9854         default:
9855                 break;
9856         }
9857
9858         if (++slot < btrfs_header_nritems(eb))
9859                 goto next;
9860
9861         return err;
9862 }
9863
9864 /*
9865  * Helper function for later fs/subvol tree check.  To determine if a tree
9866  * block should be checked.
9867  * This function will ensure only the direct referencer with lowest rootid to
9868  * check a fs/subvolume tree block.
9869  *
9870  * Backref check at extent tree would detect errors like missing subvolume
9871  * tree, so we can do aggressive check to reduce duplicated checks.
9872  */
9873 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9874 {
9875         struct btrfs_root *extent_root = root->fs_info->extent_root;
9876         struct btrfs_key key;
9877         struct btrfs_path path;
9878         struct extent_buffer *leaf;
9879         int slot;
9880         struct btrfs_extent_item *ei;
9881         unsigned long ptr;
9882         unsigned long end;
9883         int type;
9884         u32 item_size;
9885         u64 offset;
9886         struct btrfs_extent_inline_ref *iref;
9887         int ret;
9888
9889         btrfs_init_path(&path);
9890         key.objectid = btrfs_header_bytenr(eb);
9891         key.type = BTRFS_METADATA_ITEM_KEY;
9892         key.offset = (u64)-1;
9893
9894         /*
9895          * Any failure in backref resolving means we can't determine
9896          * whom the tree block belongs to.
9897          * So in that case, we need to check that tree block
9898          */
9899         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9900         if (ret < 0)
9901                 goto need_check;
9902
9903         ret = btrfs_previous_extent_item(extent_root, &path,
9904                                          btrfs_header_bytenr(eb));
9905         if (ret)
9906                 goto need_check;
9907
9908         leaf = path.nodes[0];
9909         slot = path.slots[0];
9910         btrfs_item_key_to_cpu(leaf, &key, slot);
9911         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9912
9913         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9914                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9915         } else {
9916                 struct btrfs_tree_block_info *info;
9917
9918                 info = (struct btrfs_tree_block_info *)(ei + 1);
9919                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9920         }
9921
9922         item_size = btrfs_item_size_nr(leaf, slot);
9923         ptr = (unsigned long)iref;
9924         end = (unsigned long)ei + item_size;
9925         while (ptr < end) {
9926                 iref = (struct btrfs_extent_inline_ref *)ptr;
9927                 type = btrfs_extent_inline_ref_type(leaf, iref);
9928                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9929
9930                 /*
9931                  * We only check the tree block if current root is
9932                  * the lowest referencer of it.
9933                  */
9934                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9935                     offset < root->objectid) {
9936                         btrfs_release_path(&path);
9937                         return 0;
9938                 }
9939
9940                 ptr += btrfs_extent_inline_ref_size(type);
9941         }
9942         /*
9943          * Normally we should also check keyed tree block ref, but that may be
9944          * very time consuming.  Inlined ref should already make us skip a lot
9945          * of refs now.  So skip search keyed tree block ref.
9946          */
9947
9948 need_check:
9949         btrfs_release_path(&path);
9950         return 1;
9951 }
9952
9953 /*
9954  * Traversal function for tree block. We will do:
9955  * 1) Skip shared fs/subvolume tree blocks
9956  * 2) Update related bytes accounting
9957  * 3) Pre-order traversal
9958  */
9959 static int traverse_tree_block(struct btrfs_root *root,
9960                                 struct extent_buffer *node)
9961 {
9962         struct extent_buffer *eb;
9963         struct btrfs_key key;
9964         struct btrfs_key drop_key;
9965         int level;
9966         u64 nr;
9967         int i;
9968         int err = 0;
9969         int ret;
9970
9971         /*
9972          * Skip shared fs/subvolume tree block, in that case they will
9973          * be checked by referencer with lowest rootid
9974          */
9975         if (is_fstree(root->objectid) && !should_check(root, node))
9976                 return 0;
9977
9978         /* Update bytes accounting */
9979         total_btree_bytes += node->len;
9980         if (fs_root_objectid(btrfs_header_owner(node)))
9981                 total_fs_tree_bytes += node->len;
9982         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9983                 total_extent_tree_bytes += node->len;
9984         if (!found_old_backref &&
9985             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9986             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9987             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9988                 found_old_backref = 1;
9989
9990         /* pre-order tranversal, check itself first */
9991         level = btrfs_header_level(node);
9992         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9993                                    btrfs_header_level(node),
9994                                    btrfs_header_owner(node));
9995         err |= ret;
9996         if (err)
9997                 error(
9998         "check %s failed root %llu bytenr %llu level %d, force continue check",
9999                         level ? "node":"leaf", root->objectid,
10000                         btrfs_header_bytenr(node), btrfs_header_level(node));
10001
10002         if (!level) {
10003                 btree_space_waste += btrfs_leaf_free_space(root, node);
10004                 ret = check_leaf_items(root, node);
10005                 err |= ret;
10006                 return err;
10007         }
10008
10009         nr = btrfs_header_nritems(node);
10010         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10011         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10012                 sizeof(struct btrfs_key_ptr);
10013
10014         /* Then check all its children */
10015         for (i = 0; i < nr; i++) {
10016                 u64 blocknr = btrfs_node_blockptr(node, i);
10017
10018                 btrfs_node_key_to_cpu(node, &key, i);
10019                 if (level == root->root_item.drop_level &&
10020                     is_dropped_key(&key, &drop_key))
10021                         continue;
10022
10023                 /*
10024                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10025                  * to call the function itself.
10026                  */
10027                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10028                 if (extent_buffer_uptodate(eb)) {
10029                         ret = traverse_tree_block(root, eb);
10030                         err |= ret;
10031                 }
10032                 free_extent_buffer(eb);
10033         }
10034
10035         return err;
10036 }
10037
10038 /*
10039  * Low memory usage version check_chunks_and_extents.
10040  */
10041 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10042 {
10043         struct btrfs_path path;
10044         struct btrfs_key key;
10045         struct btrfs_root *root1;
10046         struct btrfs_root *cur_root;
10047         int err = 0;
10048         int ret;
10049
10050         root1 = root->fs_info->chunk_root;
10051         ret = traverse_tree_block(root1, root1->node);
10052         err |= ret;
10053
10054         root1 = root->fs_info->tree_root;
10055         ret = traverse_tree_block(root1, root1->node);
10056         err |= ret;
10057
10058         btrfs_init_path(&path);
10059         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10060         key.offset = 0;
10061         key.type = BTRFS_ROOT_ITEM_KEY;
10062
10063         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10064         if (ret) {
10065                 error("cannot find extent treet in tree_root");
10066                 goto out;
10067         }
10068
10069         while (1) {
10070                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10071                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10072                         goto next;
10073                 key.offset = (u64)-1;
10074
10075                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10076                 if (IS_ERR(cur_root) || !cur_root) {
10077                         error("failed to read tree: %lld", key.objectid);
10078                         goto next;
10079                 }
10080
10081                 ret = traverse_tree_block(cur_root, cur_root->node);
10082                 err |= ret;
10083
10084 next:
10085                 ret = btrfs_next_item(root1, &path);
10086                 if (ret)
10087                         goto out;
10088         }
10089
10090 out:
10091         btrfs_release_path(&path);
10092         return err;
10093 }
10094
10095 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10096                            struct btrfs_root *root, int overwrite)
10097 {
10098         struct extent_buffer *c;
10099         struct extent_buffer *old = root->node;
10100         int level;
10101         int ret;
10102         struct btrfs_disk_key disk_key = {0,0,0};
10103
10104         level = 0;
10105
10106         if (overwrite) {
10107                 c = old;
10108                 extent_buffer_get(c);
10109                 goto init;
10110         }
10111         c = btrfs_alloc_free_block(trans, root,
10112                                    root->nodesize,
10113                                    root->root_key.objectid,
10114                                    &disk_key, level, 0, 0);
10115         if (IS_ERR(c)) {
10116                 c = old;
10117                 extent_buffer_get(c);
10118                 overwrite = 1;
10119         }
10120 init:
10121         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10122         btrfs_set_header_level(c, level);
10123         btrfs_set_header_bytenr(c, c->start);
10124         btrfs_set_header_generation(c, trans->transid);
10125         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10126         btrfs_set_header_owner(c, root->root_key.objectid);
10127
10128         write_extent_buffer(c, root->fs_info->fsid,
10129                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10130
10131         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10132                             btrfs_header_chunk_tree_uuid(c),
10133                             BTRFS_UUID_SIZE);
10134
10135         btrfs_mark_buffer_dirty(c);
10136         /*
10137          * this case can happen in the following case:
10138          *
10139          * 1.overwrite previous root.
10140          *
10141          * 2.reinit reloc data root, this is because we skip pin
10142          * down reloc data tree before which means we can allocate
10143          * same block bytenr here.
10144          */
10145         if (old->start == c->start) {
10146                 btrfs_set_root_generation(&root->root_item,
10147                                           trans->transid);
10148                 root->root_item.level = btrfs_header_level(root->node);
10149                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10150                                         &root->root_key, &root->root_item);
10151                 if (ret) {
10152                         free_extent_buffer(c);
10153                         return ret;
10154                 }
10155         }
10156         free_extent_buffer(old);
10157         root->node = c;
10158         add_root_to_dirty_list(root);
10159         return 0;
10160 }
10161
10162 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10163                                 struct extent_buffer *eb, int tree_root)
10164 {
10165         struct extent_buffer *tmp;
10166         struct btrfs_root_item *ri;
10167         struct btrfs_key key;
10168         u64 bytenr;
10169         u32 nodesize;
10170         int level = btrfs_header_level(eb);
10171         int nritems;
10172         int ret;
10173         int i;
10174
10175         /*
10176          * If we have pinned this block before, don't pin it again.
10177          * This can not only avoid forever loop with broken filesystem
10178          * but also give us some speedups.
10179          */
10180         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10181                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10182                 return 0;
10183
10184         btrfs_pin_extent(fs_info, eb->start, eb->len);
10185
10186         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10187         nritems = btrfs_header_nritems(eb);
10188         for (i = 0; i < nritems; i++) {
10189                 if (level == 0) {
10190                         btrfs_item_key_to_cpu(eb, &key, i);
10191                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10192                                 continue;
10193                         /* Skip the extent root and reloc roots */
10194                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10195                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10196                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10197                                 continue;
10198                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10199                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10200
10201                         /*
10202                          * If at any point we start needing the real root we
10203                          * will have to build a stump root for the root we are
10204                          * in, but for now this doesn't actually use the root so
10205                          * just pass in extent_root.
10206                          */
10207                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10208                                               nodesize, 0);
10209                         if (!extent_buffer_uptodate(tmp)) {
10210                                 fprintf(stderr, "Error reading root block\n");
10211                                 return -EIO;
10212                         }
10213                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10214                         free_extent_buffer(tmp);
10215                         if (ret)
10216                                 return ret;
10217                 } else {
10218                         bytenr = btrfs_node_blockptr(eb, i);
10219
10220                         /* If we aren't the tree root don't read the block */
10221                         if (level == 1 && !tree_root) {
10222                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10223                                 continue;
10224                         }
10225
10226                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10227                                               nodesize, 0);
10228                         if (!extent_buffer_uptodate(tmp)) {
10229                                 fprintf(stderr, "Error reading tree block\n");
10230                                 return -EIO;
10231                         }
10232                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10233                         free_extent_buffer(tmp);
10234                         if (ret)
10235                                 return ret;
10236                 }
10237         }
10238
10239         return 0;
10240 }
10241
10242 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10243 {
10244         int ret;
10245
10246         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10247         if (ret)
10248                 return ret;
10249
10250         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10251 }
10252
10253 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10254 {
10255         struct btrfs_block_group_cache *cache;
10256         struct btrfs_path *path;
10257         struct extent_buffer *leaf;
10258         struct btrfs_chunk *chunk;
10259         struct btrfs_key key;
10260         int ret;
10261         u64 start;
10262
10263         path = btrfs_alloc_path();
10264         if (!path)
10265                 return -ENOMEM;
10266
10267         key.objectid = 0;
10268         key.type = BTRFS_CHUNK_ITEM_KEY;
10269         key.offset = 0;
10270
10271         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10272         if (ret < 0) {
10273                 btrfs_free_path(path);
10274                 return ret;
10275         }
10276
10277         /*
10278          * We do this in case the block groups were screwed up and had alloc
10279          * bits that aren't actually set on the chunks.  This happens with
10280          * restored images every time and could happen in real life I guess.
10281          */
10282         fs_info->avail_data_alloc_bits = 0;
10283         fs_info->avail_metadata_alloc_bits = 0;
10284         fs_info->avail_system_alloc_bits = 0;
10285
10286         /* First we need to create the in-memory block groups */
10287         while (1) {
10288                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10289                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10290                         if (ret < 0) {
10291                                 btrfs_free_path(path);
10292                                 return ret;
10293                         }
10294                         if (ret) {
10295                                 ret = 0;
10296                                 break;
10297                         }
10298                 }
10299                 leaf = path->nodes[0];
10300                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10301                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10302                         path->slots[0]++;
10303                         continue;
10304                 }
10305
10306                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10307                                        struct btrfs_chunk);
10308                 btrfs_add_block_group(fs_info, 0,
10309                                       btrfs_chunk_type(leaf, chunk),
10310                                       key.objectid, key.offset,
10311                                       btrfs_chunk_length(leaf, chunk));
10312                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10313                                  key.offset + btrfs_chunk_length(leaf, chunk),
10314                                  GFP_NOFS);
10315                 path->slots[0]++;
10316         }
10317         start = 0;
10318         while (1) {
10319                 cache = btrfs_lookup_first_block_group(fs_info, start);
10320                 if (!cache)
10321                         break;
10322                 cache->cached = 1;
10323                 start = cache->key.objectid + cache->key.offset;
10324         }
10325
10326         btrfs_free_path(path);
10327         return 0;
10328 }
10329
10330 static int reset_balance(struct btrfs_trans_handle *trans,
10331                          struct btrfs_fs_info *fs_info)
10332 {
10333         struct btrfs_root *root = fs_info->tree_root;
10334         struct btrfs_path *path;
10335         struct extent_buffer *leaf;
10336         struct btrfs_key key;
10337         int del_slot, del_nr = 0;
10338         int ret;
10339         int found = 0;
10340
10341         path = btrfs_alloc_path();
10342         if (!path)
10343                 return -ENOMEM;
10344
10345         key.objectid = BTRFS_BALANCE_OBJECTID;
10346         key.type = BTRFS_BALANCE_ITEM_KEY;
10347         key.offset = 0;
10348
10349         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10350         if (ret) {
10351                 if (ret > 0)
10352                         ret = 0;
10353                 if (!ret)
10354                         goto reinit_data_reloc;
10355                 else
10356                         goto out;
10357         }
10358
10359         ret = btrfs_del_item(trans, root, path);
10360         if (ret)
10361                 goto out;
10362         btrfs_release_path(path);
10363
10364         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10365         key.type = BTRFS_ROOT_ITEM_KEY;
10366         key.offset = 0;
10367
10368         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10369         if (ret < 0)
10370                 goto out;
10371         while (1) {
10372                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10373                         if (!found)
10374                                 break;
10375
10376                         if (del_nr) {
10377                                 ret = btrfs_del_items(trans, root, path,
10378                                                       del_slot, del_nr);
10379                                 del_nr = 0;
10380                                 if (ret)
10381                                         goto out;
10382                         }
10383                         key.offset++;
10384                         btrfs_release_path(path);
10385
10386                         found = 0;
10387                         ret = btrfs_search_slot(trans, root, &key, path,
10388                                                 -1, 1);
10389                         if (ret < 0)
10390                                 goto out;
10391                         continue;
10392                 }
10393                 found = 1;
10394                 leaf = path->nodes[0];
10395                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10396                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10397                         break;
10398                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10399                         path->slots[0]++;
10400                         continue;
10401                 }
10402                 if (!del_nr) {
10403                         del_slot = path->slots[0];
10404                         del_nr = 1;
10405                 } else {
10406                         del_nr++;
10407                 }
10408                 path->slots[0]++;
10409         }
10410
10411         if (del_nr) {
10412                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10413                 if (ret)
10414                         goto out;
10415         }
10416         btrfs_release_path(path);
10417
10418 reinit_data_reloc:
10419         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10420         key.type = BTRFS_ROOT_ITEM_KEY;
10421         key.offset = (u64)-1;
10422         root = btrfs_read_fs_root(fs_info, &key);
10423         if (IS_ERR(root)) {
10424                 fprintf(stderr, "Error reading data reloc tree\n");
10425                 ret = PTR_ERR(root);
10426                 goto out;
10427         }
10428         record_root_in_trans(trans, root);
10429         ret = btrfs_fsck_reinit_root(trans, root, 0);
10430         if (ret)
10431                 goto out;
10432         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10433 out:
10434         btrfs_free_path(path);
10435         return ret;
10436 }
10437
10438 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10439                               struct btrfs_fs_info *fs_info)
10440 {
10441         u64 start = 0;
10442         int ret;
10443
10444         /*
10445          * The only reason we don't do this is because right now we're just
10446          * walking the trees we find and pinning down their bytes, we don't look
10447          * at any of the leaves.  In order to do mixed groups we'd have to check
10448          * the leaves of any fs roots and pin down the bytes for any file
10449          * extents we find.  Not hard but why do it if we don't have to?
10450          */
10451         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10452                 fprintf(stderr, "We don't support re-initing the extent tree "
10453                         "for mixed block groups yet, please notify a btrfs "
10454                         "developer you want to do this so they can add this "
10455                         "functionality.\n");
10456                 return -EINVAL;
10457         }
10458
10459         /*
10460          * first we need to walk all of the trees except the extent tree and pin
10461          * down the bytes that are in use so we don't overwrite any existing
10462          * metadata.
10463          */
10464         ret = pin_metadata_blocks(fs_info);
10465         if (ret) {
10466                 fprintf(stderr, "error pinning down used bytes\n");
10467                 return ret;
10468         }
10469
10470         /*
10471          * Need to drop all the block groups since we're going to recreate all
10472          * of them again.
10473          */
10474         btrfs_free_block_groups(fs_info);
10475         ret = reset_block_groups(fs_info);
10476         if (ret) {
10477                 fprintf(stderr, "error resetting the block groups\n");
10478                 return ret;
10479         }
10480
10481         /* Ok we can allocate now, reinit the extent root */
10482         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10483         if (ret) {
10484                 fprintf(stderr, "extent root initialization failed\n");
10485                 /*
10486                  * When the transaction code is updated we should end the
10487                  * transaction, but for now progs only knows about commit so
10488                  * just return an error.
10489                  */
10490                 return ret;
10491         }
10492
10493         /*
10494          * Now we have all the in-memory block groups setup so we can make
10495          * allocations properly, and the metadata we care about is safe since we
10496          * pinned all of it above.
10497          */
10498         while (1) {
10499                 struct btrfs_block_group_cache *cache;
10500
10501                 cache = btrfs_lookup_first_block_group(fs_info, start);
10502                 if (!cache)
10503                         break;
10504                 start = cache->key.objectid + cache->key.offset;
10505                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10506                                         &cache->key, &cache->item,
10507                                         sizeof(cache->item));
10508                 if (ret) {
10509                         fprintf(stderr, "Error adding block group\n");
10510                         return ret;
10511                 }
10512                 btrfs_extent_post_op(trans, fs_info->extent_root);
10513         }
10514
10515         ret = reset_balance(trans, fs_info);
10516         if (ret)
10517                 fprintf(stderr, "error resetting the pending balance\n");
10518
10519         return ret;
10520 }
10521
10522 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10523 {
10524         struct btrfs_path *path;
10525         struct btrfs_trans_handle *trans;
10526         struct btrfs_key key;
10527         int ret;
10528
10529         printf("Recowing metadata block %llu\n", eb->start);
10530         key.objectid = btrfs_header_owner(eb);
10531         key.type = BTRFS_ROOT_ITEM_KEY;
10532         key.offset = (u64)-1;
10533
10534         root = btrfs_read_fs_root(root->fs_info, &key);
10535         if (IS_ERR(root)) {
10536                 fprintf(stderr, "Couldn't find owner root %llu\n",
10537                         key.objectid);
10538                 return PTR_ERR(root);
10539         }
10540
10541         path = btrfs_alloc_path();
10542         if (!path)
10543                 return -ENOMEM;
10544
10545         trans = btrfs_start_transaction(root, 1);
10546         if (IS_ERR(trans)) {
10547                 btrfs_free_path(path);
10548                 return PTR_ERR(trans);
10549         }
10550
10551         path->lowest_level = btrfs_header_level(eb);
10552         if (path->lowest_level)
10553                 btrfs_node_key_to_cpu(eb, &key, 0);
10554         else
10555                 btrfs_item_key_to_cpu(eb, &key, 0);
10556
10557         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10558         btrfs_commit_transaction(trans, root);
10559         btrfs_free_path(path);
10560         return ret;
10561 }
10562
10563 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10564 {
10565         struct btrfs_path *path;
10566         struct btrfs_trans_handle *trans;
10567         struct btrfs_key key;
10568         int ret;
10569
10570         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10571                bad->key.type, bad->key.offset);
10572         key.objectid = bad->root_id;
10573         key.type = BTRFS_ROOT_ITEM_KEY;
10574         key.offset = (u64)-1;
10575
10576         root = btrfs_read_fs_root(root->fs_info, &key);
10577         if (IS_ERR(root)) {
10578                 fprintf(stderr, "Couldn't find owner root %llu\n",
10579                         key.objectid);
10580                 return PTR_ERR(root);
10581         }
10582
10583         path = btrfs_alloc_path();
10584         if (!path)
10585                 return -ENOMEM;
10586
10587         trans = btrfs_start_transaction(root, 1);
10588         if (IS_ERR(trans)) {
10589                 btrfs_free_path(path);
10590                 return PTR_ERR(trans);
10591         }
10592
10593         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10594         if (ret) {
10595                 if (ret > 0)
10596                         ret = 0;
10597                 goto out;
10598         }
10599         ret = btrfs_del_item(trans, root, path);
10600 out:
10601         btrfs_commit_transaction(trans, root);
10602         btrfs_free_path(path);
10603         return ret;
10604 }
10605
10606 static int zero_log_tree(struct btrfs_root *root)
10607 {
10608         struct btrfs_trans_handle *trans;
10609         int ret;
10610
10611         trans = btrfs_start_transaction(root, 1);
10612         if (IS_ERR(trans)) {
10613                 ret = PTR_ERR(trans);
10614                 return ret;
10615         }
10616         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10617         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10618         ret = btrfs_commit_transaction(trans, root);
10619         return ret;
10620 }
10621
10622 static int populate_csum(struct btrfs_trans_handle *trans,
10623                          struct btrfs_root *csum_root, char *buf, u64 start,
10624                          u64 len)
10625 {
10626         u64 offset = 0;
10627         u64 sectorsize;
10628         int ret = 0;
10629
10630         while (offset < len) {
10631                 sectorsize = csum_root->sectorsize;
10632                 ret = read_extent_data(csum_root, buf, start + offset,
10633                                        &sectorsize, 0);
10634                 if (ret)
10635                         break;
10636                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10637                                             start + offset, buf, sectorsize);
10638                 if (ret)
10639                         break;
10640                 offset += sectorsize;
10641         }
10642         return ret;
10643 }
10644
10645 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10646                                       struct btrfs_root *csum_root,
10647                                       struct btrfs_root *cur_root)
10648 {
10649         struct btrfs_path *path;
10650         struct btrfs_key key;
10651         struct extent_buffer *node;
10652         struct btrfs_file_extent_item *fi;
10653         char *buf = NULL;
10654         u64 start = 0;
10655         u64 len = 0;
10656         int slot = 0;
10657         int ret = 0;
10658
10659         path = btrfs_alloc_path();
10660         if (!path)
10661                 return -ENOMEM;
10662         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10663         if (!buf) {
10664                 ret = -ENOMEM;
10665                 goto out;
10666         }
10667
10668         key.objectid = 0;
10669         key.offset = 0;
10670         key.type = 0;
10671
10672         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10673         if (ret < 0)
10674                 goto out;
10675         /* Iterate all regular file extents and fill its csum */
10676         while (1) {
10677                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10678
10679                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10680                         goto next;
10681                 node = path->nodes[0];
10682                 slot = path->slots[0];
10683                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10684                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10685                         goto next;
10686                 start = btrfs_file_extent_disk_bytenr(node, fi);
10687                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10688
10689                 ret = populate_csum(trans, csum_root, buf, start, len);
10690                 if (ret == -EEXIST)
10691                         ret = 0;
10692                 if (ret < 0)
10693                         goto out;
10694 next:
10695                 /*
10696                  * TODO: if next leaf is corrupted, jump to nearest next valid
10697                  * leaf.
10698                  */
10699                 ret = btrfs_next_item(cur_root, path);
10700                 if (ret < 0)
10701                         goto out;
10702                 if (ret > 0) {
10703                         ret = 0;
10704                         goto out;
10705                 }
10706         }
10707
10708 out:
10709         btrfs_free_path(path);
10710         free(buf);
10711         return ret;
10712 }
10713
10714 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10715                                   struct btrfs_root *csum_root)
10716 {
10717         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10718         struct btrfs_path *path;
10719         struct btrfs_root *tree_root = fs_info->tree_root;
10720         struct btrfs_root *cur_root;
10721         struct extent_buffer *node;
10722         struct btrfs_key key;
10723         int slot = 0;
10724         int ret = 0;
10725
10726         path = btrfs_alloc_path();
10727         if (!path)
10728                 return -ENOMEM;
10729
10730         key.objectid = BTRFS_FS_TREE_OBJECTID;
10731         key.offset = 0;
10732         key.type = BTRFS_ROOT_ITEM_KEY;
10733
10734         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10735         if (ret < 0)
10736                 goto out;
10737         if (ret > 0) {
10738                 ret = -ENOENT;
10739                 goto out;
10740         }
10741
10742         while (1) {
10743                 node = path->nodes[0];
10744                 slot = path->slots[0];
10745                 btrfs_item_key_to_cpu(node, &key, slot);
10746                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10747                         goto out;
10748                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10749                         goto next;
10750                 if (!is_fstree(key.objectid))
10751                         goto next;
10752                 key.offset = (u64)-1;
10753
10754                 cur_root = btrfs_read_fs_root(fs_info, &key);
10755                 if (IS_ERR(cur_root) || !cur_root) {
10756                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10757                                 key.objectid);
10758                         goto out;
10759                 }
10760                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10761                                 cur_root);
10762                 if (ret < 0)
10763                         goto out;
10764 next:
10765                 ret = btrfs_next_item(tree_root, path);
10766                 if (ret > 0) {
10767                         ret = 0;
10768                         goto out;
10769                 }
10770                 if (ret < 0)
10771                         goto out;
10772         }
10773
10774 out:
10775         btrfs_free_path(path);
10776         return ret;
10777 }
10778
10779 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10780                                       struct btrfs_root *csum_root)
10781 {
10782         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10783         struct btrfs_path *path;
10784         struct btrfs_extent_item *ei;
10785         struct extent_buffer *leaf;
10786         char *buf;
10787         struct btrfs_key key;
10788         int ret;
10789
10790         path = btrfs_alloc_path();
10791         if (!path)
10792                 return -ENOMEM;
10793
10794         key.objectid = 0;
10795         key.type = BTRFS_EXTENT_ITEM_KEY;
10796         key.offset = 0;
10797
10798         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10799         if (ret < 0) {
10800                 btrfs_free_path(path);
10801                 return ret;
10802         }
10803
10804         buf = malloc(csum_root->sectorsize);
10805         if (!buf) {
10806                 btrfs_free_path(path);
10807                 return -ENOMEM;
10808         }
10809
10810         while (1) {
10811                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10812                         ret = btrfs_next_leaf(extent_root, path);
10813                         if (ret < 0)
10814                                 break;
10815                         if (ret) {
10816                                 ret = 0;
10817                                 break;
10818                         }
10819                 }
10820                 leaf = path->nodes[0];
10821
10822                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10823                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10824                         path->slots[0]++;
10825                         continue;
10826                 }
10827
10828                 ei = btrfs_item_ptr(leaf, path->slots[0],
10829                                     struct btrfs_extent_item);
10830                 if (!(btrfs_extent_flags(leaf, ei) &
10831                       BTRFS_EXTENT_FLAG_DATA)) {
10832                         path->slots[0]++;
10833                         continue;
10834                 }
10835
10836                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10837                                     key.offset);
10838                 if (ret)
10839                         break;
10840                 path->slots[0]++;
10841         }
10842
10843         btrfs_free_path(path);
10844         free(buf);
10845         return ret;
10846 }
10847
10848 /*
10849  * Recalculate the csum and put it into the csum tree.
10850  *
10851  * Extent tree init will wipe out all the extent info, so in that case, we
10852  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10853  * will use fs/subvol trees to init the csum tree.
10854  */
10855 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10856                           struct btrfs_root *csum_root,
10857                           int search_fs_tree)
10858 {
10859         if (search_fs_tree)
10860                 return fill_csum_tree_from_fs(trans, csum_root);
10861         else
10862                 return fill_csum_tree_from_extent(trans, csum_root);
10863 }
10864
10865 static void free_roots_info_cache(void)
10866 {
10867         if (!roots_info_cache)
10868                 return;
10869
10870         while (!cache_tree_empty(roots_info_cache)) {
10871                 struct cache_extent *entry;
10872                 struct root_item_info *rii;
10873
10874                 entry = first_cache_extent(roots_info_cache);
10875                 if (!entry)
10876                         break;
10877                 remove_cache_extent(roots_info_cache, entry);
10878                 rii = container_of(entry, struct root_item_info, cache_extent);
10879                 free(rii);
10880         }
10881
10882         free(roots_info_cache);
10883         roots_info_cache = NULL;
10884 }
10885
10886 static int build_roots_info_cache(struct btrfs_fs_info *info)
10887 {
10888         int ret = 0;
10889         struct btrfs_key key;
10890         struct extent_buffer *leaf;
10891         struct btrfs_path *path;
10892
10893         if (!roots_info_cache) {
10894                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10895                 if (!roots_info_cache)
10896                         return -ENOMEM;
10897                 cache_tree_init(roots_info_cache);
10898         }
10899
10900         path = btrfs_alloc_path();
10901         if (!path)
10902                 return -ENOMEM;
10903
10904         key.objectid = 0;
10905         key.type = BTRFS_EXTENT_ITEM_KEY;
10906         key.offset = 0;
10907
10908         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10909         if (ret < 0)
10910                 goto out;
10911         leaf = path->nodes[0];
10912
10913         while (1) {
10914                 struct btrfs_key found_key;
10915                 struct btrfs_extent_item *ei;
10916                 struct btrfs_extent_inline_ref *iref;
10917                 int slot = path->slots[0];
10918                 int type;
10919                 u64 flags;
10920                 u64 root_id;
10921                 u8 level;
10922                 struct cache_extent *entry;
10923                 struct root_item_info *rii;
10924
10925                 if (slot >= btrfs_header_nritems(leaf)) {
10926                         ret = btrfs_next_leaf(info->extent_root, path);
10927                         if (ret < 0) {
10928                                 break;
10929                         } else if (ret) {
10930                                 ret = 0;
10931                                 break;
10932                         }
10933                         leaf = path->nodes[0];
10934                         slot = path->slots[0];
10935                 }
10936
10937                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10938
10939                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10940                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10941                         goto next;
10942
10943                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10944                 flags = btrfs_extent_flags(leaf, ei);
10945
10946                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10947                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10948                         goto next;
10949
10950                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10951                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10952                         level = found_key.offset;
10953                 } else {
10954                         struct btrfs_tree_block_info *binfo;
10955
10956                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10957                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10958                         level = btrfs_tree_block_level(leaf, binfo);
10959                 }
10960
10961                 /*
10962                  * For a root extent, it must be of the following type and the
10963                  * first (and only one) iref in the item.
10964                  */
10965                 type = btrfs_extent_inline_ref_type(leaf, iref);
10966                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10967                         goto next;
10968
10969                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10970                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10971                 if (!entry) {
10972                         rii = malloc(sizeof(struct root_item_info));
10973                         if (!rii) {
10974                                 ret = -ENOMEM;
10975                                 goto out;
10976                         }
10977                         rii->cache_extent.start = root_id;
10978                         rii->cache_extent.size = 1;
10979                         rii->level = (u8)-1;
10980                         entry = &rii->cache_extent;
10981                         ret = insert_cache_extent(roots_info_cache, entry);
10982                         ASSERT(ret == 0);
10983                 } else {
10984                         rii = container_of(entry, struct root_item_info,
10985                                            cache_extent);
10986                 }
10987
10988                 ASSERT(rii->cache_extent.start == root_id);
10989                 ASSERT(rii->cache_extent.size == 1);
10990
10991                 if (level > rii->level || rii->level == (u8)-1) {
10992                         rii->level = level;
10993                         rii->bytenr = found_key.objectid;
10994                         rii->gen = btrfs_extent_generation(leaf, ei);
10995                         rii->node_count = 1;
10996                 } else if (level == rii->level) {
10997                         rii->node_count++;
10998                 }
10999 next:
11000                 path->slots[0]++;
11001         }
11002
11003 out:
11004         btrfs_free_path(path);
11005
11006         return ret;
11007 }
11008
11009 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11010                                   struct btrfs_path *path,
11011                                   const struct btrfs_key *root_key,
11012                                   const int read_only_mode)
11013 {
11014         const u64 root_id = root_key->objectid;
11015         struct cache_extent *entry;
11016         struct root_item_info *rii;
11017         struct btrfs_root_item ri;
11018         unsigned long offset;
11019
11020         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11021         if (!entry) {
11022                 fprintf(stderr,
11023                         "Error: could not find extent items for root %llu\n",
11024                         root_key->objectid);
11025                 return -ENOENT;
11026         }
11027
11028         rii = container_of(entry, struct root_item_info, cache_extent);
11029         ASSERT(rii->cache_extent.start == root_id);
11030         ASSERT(rii->cache_extent.size == 1);
11031
11032         if (rii->node_count != 1) {
11033                 fprintf(stderr,
11034                         "Error: could not find btree root extent for root %llu\n",
11035                         root_id);
11036                 return -ENOENT;
11037         }
11038
11039         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11040         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11041
11042         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11043             btrfs_root_level(&ri) != rii->level ||
11044             btrfs_root_generation(&ri) != rii->gen) {
11045
11046                 /*
11047                  * If we're in repair mode but our caller told us to not update
11048                  * the root item, i.e. just check if it needs to be updated, don't
11049                  * print this message, since the caller will call us again shortly
11050                  * for the same root item without read only mode (the caller will
11051                  * open a transaction first).
11052                  */
11053                 if (!(read_only_mode && repair))
11054                         fprintf(stderr,
11055                                 "%sroot item for root %llu,"
11056                                 " current bytenr %llu, current gen %llu, current level %u,"
11057                                 " new bytenr %llu, new gen %llu, new level %u\n",
11058                                 (read_only_mode ? "" : "fixing "),
11059                                 root_id,
11060                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11061                                 btrfs_root_level(&ri),
11062                                 rii->bytenr, rii->gen, rii->level);
11063
11064                 if (btrfs_root_generation(&ri) > rii->gen) {
11065                         fprintf(stderr,
11066                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11067                                 root_id, btrfs_root_generation(&ri), rii->gen);
11068                         return -EINVAL;
11069                 }
11070
11071                 if (!read_only_mode) {
11072                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11073                         btrfs_set_root_level(&ri, rii->level);
11074                         btrfs_set_root_generation(&ri, rii->gen);
11075                         write_extent_buffer(path->nodes[0], &ri,
11076                                             offset, sizeof(ri));
11077                 }
11078
11079                 return 1;
11080         }
11081
11082         return 0;
11083 }
11084
11085 /*
11086  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11087  * caused read-only snapshots to be corrupted if they were created at a moment
11088  * when the source subvolume/snapshot had orphan items. The issue was that the
11089  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11090  * node instead of the post orphan cleanup root node.
11091  * So this function, and its callees, just detects and fixes those cases. Even
11092  * though the regression was for read-only snapshots, this function applies to
11093  * any snapshot/subvolume root.
11094  * This must be run before any other repair code - not doing it so, makes other
11095  * repair code delete or modify backrefs in the extent tree for example, which
11096  * will result in an inconsistent fs after repairing the root items.
11097  */
11098 static int repair_root_items(struct btrfs_fs_info *info)
11099 {
11100         struct btrfs_path *path = NULL;
11101         struct btrfs_key key;
11102         struct extent_buffer *leaf;
11103         struct btrfs_trans_handle *trans = NULL;
11104         int ret = 0;
11105         int bad_roots = 0;
11106         int need_trans = 0;
11107
11108         ret = build_roots_info_cache(info);
11109         if (ret)
11110                 goto out;
11111
11112         path = btrfs_alloc_path();
11113         if (!path) {
11114                 ret = -ENOMEM;
11115                 goto out;
11116         }
11117
11118         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11119         key.type = BTRFS_ROOT_ITEM_KEY;
11120         key.offset = 0;
11121
11122 again:
11123         /*
11124          * Avoid opening and committing transactions if a leaf doesn't have
11125          * any root items that need to be fixed, so that we avoid rotating
11126          * backup roots unnecessarily.
11127          */
11128         if (need_trans) {
11129                 trans = btrfs_start_transaction(info->tree_root, 1);
11130                 if (IS_ERR(trans)) {
11131                         ret = PTR_ERR(trans);
11132                         goto out;
11133                 }
11134         }
11135
11136         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11137                                 0, trans ? 1 : 0);
11138         if (ret < 0)
11139                 goto out;
11140         leaf = path->nodes[0];
11141
11142         while (1) {
11143                 struct btrfs_key found_key;
11144
11145                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11146                         int no_more_keys = find_next_key(path, &key);
11147
11148                         btrfs_release_path(path);
11149                         if (trans) {
11150                                 ret = btrfs_commit_transaction(trans,
11151                                                                info->tree_root);
11152                                 trans = NULL;
11153                                 if (ret < 0)
11154                                         goto out;
11155                         }
11156                         need_trans = 0;
11157                         if (no_more_keys)
11158                                 break;
11159                         goto again;
11160                 }
11161
11162                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11163
11164                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11165                         goto next;
11166                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11167                         goto next;
11168
11169                 ret = maybe_repair_root_item(info, path, &found_key,
11170                                              trans ? 0 : 1);
11171                 if (ret < 0)
11172                         goto out;
11173                 if (ret) {
11174                         if (!trans && repair) {
11175                                 need_trans = 1;
11176                                 key = found_key;
11177                                 btrfs_release_path(path);
11178                                 goto again;
11179                         }
11180                         bad_roots++;
11181                 }
11182 next:
11183                 path->slots[0]++;
11184         }
11185         ret = 0;
11186 out:
11187         free_roots_info_cache();
11188         btrfs_free_path(path);
11189         if (trans)
11190                 btrfs_commit_transaction(trans, info->tree_root);
11191         if (ret < 0)
11192                 return ret;
11193
11194         return bad_roots;
11195 }
11196
11197 const char * const cmd_check_usage[] = {
11198         "btrfs check [options] <device>",
11199         "Check structural integrity of a filesystem (unmounted).",
11200         "Check structural integrity of an unmounted filesystem. Verify internal",
11201         "trees' consistency and item connectivity. In the repair mode try to",
11202         "fix the problems found. ",
11203         "WARNING: the repair mode is considered dangerous",
11204         "",
11205         "-s|--super <superblock>     use this superblock copy",
11206         "-b|--backup                 use the first valid backup root copy",
11207         "--repair                    try to repair the filesystem",
11208         "--readonly                  run in read-only mode (default)",
11209         "--init-csum-tree            create a new CRC tree",
11210         "--init-extent-tree          create a new extent tree",
11211         "--mode <MODE>               select mode, allows to make some memory/IO",
11212         "                            trade-offs, where MODE is one of:",
11213         "                            original - read inodes and extents to memory (requires",
11214         "                                       more memory, does less IO)",
11215         "                            lowmem   - try to use less memory but read blocks again",
11216         "                                       when needed",
11217         "--check-data-csum           verify checksums of data blocks",
11218         "-Q|--qgroup-report           print a report on qgroup consistency",
11219         "-E|--subvol-extents <subvolid>",
11220         "                            print subvolume extents and sharing state",
11221         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11222         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11223         "-p|--progress               indicate progress",
11224         NULL
11225 };
11226
11227 int cmd_check(int argc, char **argv)
11228 {
11229         struct cache_tree root_cache;
11230         struct btrfs_root *root;
11231         struct btrfs_fs_info *info;
11232         u64 bytenr = 0;
11233         u64 subvolid = 0;
11234         u64 tree_root_bytenr = 0;
11235         u64 chunk_root_bytenr = 0;
11236         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11237         int ret;
11238         u64 num;
11239         int init_csum_tree = 0;
11240         int readonly = 0;
11241         int qgroup_report = 0;
11242         int qgroups_repaired = 0;
11243         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11244
11245         while(1) {
11246                 int c;
11247                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11248                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11249                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11250                         GETOPT_VAL_MODE };
11251                 static const struct option long_options[] = {
11252                         { "super", required_argument, NULL, 's' },
11253                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11254                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11255                         { "init-csum-tree", no_argument, NULL,
11256                                 GETOPT_VAL_INIT_CSUM },
11257                         { "init-extent-tree", no_argument, NULL,
11258                                 GETOPT_VAL_INIT_EXTENT },
11259                         { "check-data-csum", no_argument, NULL,
11260                                 GETOPT_VAL_CHECK_CSUM },
11261                         { "backup", no_argument, NULL, 'b' },
11262                         { "subvol-extents", required_argument, NULL, 'E' },
11263                         { "qgroup-report", no_argument, NULL, 'Q' },
11264                         { "tree-root", required_argument, NULL, 'r' },
11265                         { "chunk-root", required_argument, NULL,
11266                                 GETOPT_VAL_CHUNK_TREE },
11267                         { "progress", no_argument, NULL, 'p' },
11268                         { "mode", required_argument, NULL,
11269                                 GETOPT_VAL_MODE },
11270                         { NULL, 0, NULL, 0}
11271                 };
11272
11273                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11274                 if (c < 0)
11275                         break;
11276                 switch(c) {
11277                         case 'a': /* ignored */ break;
11278                         case 'b':
11279                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11280                                 break;
11281                         case 's':
11282                                 num = arg_strtou64(optarg);
11283                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11284                                         error(
11285                                         "super mirror should be less than %d",
11286                                                 BTRFS_SUPER_MIRROR_MAX);
11287                                         exit(1);
11288                                 }
11289                                 bytenr = btrfs_sb_offset(((int)num));
11290                                 printf("using SB copy %llu, bytenr %llu\n", num,
11291                                        (unsigned long long)bytenr);
11292                                 break;
11293                         case 'Q':
11294                                 qgroup_report = 1;
11295                                 break;
11296                         case 'E':
11297                                 subvolid = arg_strtou64(optarg);
11298                                 break;
11299                         case 'r':
11300                                 tree_root_bytenr = arg_strtou64(optarg);
11301                                 break;
11302                         case GETOPT_VAL_CHUNK_TREE:
11303                                 chunk_root_bytenr = arg_strtou64(optarg);
11304                                 break;
11305                         case 'p':
11306                                 ctx.progress_enabled = true;
11307                                 break;
11308                         case '?':
11309                         case 'h':
11310                                 usage(cmd_check_usage);
11311                         case GETOPT_VAL_REPAIR:
11312                                 printf("enabling repair mode\n");
11313                                 repair = 1;
11314                                 ctree_flags |= OPEN_CTREE_WRITES;
11315                                 break;
11316                         case GETOPT_VAL_READONLY:
11317                                 readonly = 1;
11318                                 break;
11319                         case GETOPT_VAL_INIT_CSUM:
11320                                 printf("Creating a new CRC tree\n");
11321                                 init_csum_tree = 1;
11322                                 repair = 1;
11323                                 ctree_flags |= OPEN_CTREE_WRITES;
11324                                 break;
11325                         case GETOPT_VAL_INIT_EXTENT:
11326                                 init_extent_tree = 1;
11327                                 ctree_flags |= (OPEN_CTREE_WRITES |
11328                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11329                                 repair = 1;
11330                                 break;
11331                         case GETOPT_VAL_CHECK_CSUM:
11332                                 check_data_csum = 1;
11333                                 break;
11334                         case GETOPT_VAL_MODE:
11335                                 check_mode = parse_check_mode(optarg);
11336                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11337                                         error("unknown mode: %s", optarg);
11338                                         exit(1);
11339                                 }
11340                                 break;
11341                 }
11342         }
11343
11344         if (check_argc_exact(argc - optind, 1))
11345                 usage(cmd_check_usage);
11346
11347         if (ctx.progress_enabled) {
11348                 ctx.tp = TASK_NOTHING;
11349                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11350         }
11351
11352         /* This check is the only reason for --readonly to exist */
11353         if (readonly && repair) {
11354                 error("repair options are not compatible with --readonly");
11355                 exit(1);
11356         }
11357
11358         /*
11359          * Not supported yet
11360          */
11361         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11362                 error("low memory mode doesn't support repair yet");
11363                 exit(1);
11364         }
11365
11366         radix_tree_init();
11367         cache_tree_init(&root_cache);
11368
11369         if((ret = check_mounted(argv[optind])) < 0) {
11370                 error("could not check mount status: %s", strerror(-ret));
11371                 goto err_out;
11372         } else if(ret) {
11373                 error("%s is currently mounted, aborting", argv[optind]);
11374                 ret = -EBUSY;
11375                 goto err_out;
11376         }
11377
11378         /* only allow partial opening under repair mode */
11379         if (repair)
11380                 ctree_flags |= OPEN_CTREE_PARTIAL;
11381
11382         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11383                                   chunk_root_bytenr, ctree_flags);
11384         if (!info) {
11385                 error("cannot open file system");
11386                 ret = -EIO;
11387                 goto err_out;
11388         }
11389
11390         global_info = info;
11391         root = info->fs_root;
11392
11393         /*
11394          * repair mode will force us to commit transaction which
11395          * will make us fail to load log tree when mounting.
11396          */
11397         if (repair && btrfs_super_log_root(info->super_copy)) {
11398                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11399                 if (!ret) {
11400                         ret = 1;
11401                         goto close_out;
11402                 }
11403                 ret = zero_log_tree(root);
11404                 if (ret) {
11405                         error("failed to zero log tree: %d", ret);
11406                         goto close_out;
11407                 }
11408         }
11409
11410         uuid_unparse(info->super_copy->fsid, uuidbuf);
11411         if (qgroup_report) {
11412                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11413                        uuidbuf);
11414                 ret = qgroup_verify_all(info);
11415                 if (ret == 0)
11416                         report_qgroups(1);
11417                 goto close_out;
11418         }
11419         if (subvolid) {
11420                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11421                        subvolid, argv[optind], uuidbuf);
11422                 ret = print_extent_state(info, subvolid);
11423                 goto close_out;
11424         }
11425         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11426
11427         if (!extent_buffer_uptodate(info->tree_root->node) ||
11428             !extent_buffer_uptodate(info->dev_root->node) ||
11429             !extent_buffer_uptodate(info->chunk_root->node)) {
11430                 error("critical roots corrupted, unable to check the filesystem");
11431                 ret = -EIO;
11432                 goto close_out;
11433         }
11434
11435         if (init_extent_tree || init_csum_tree) {
11436                 struct btrfs_trans_handle *trans;
11437
11438                 trans = btrfs_start_transaction(info->extent_root, 0);
11439                 if (IS_ERR(trans)) {
11440                         error("error starting transaction");
11441                         ret = PTR_ERR(trans);
11442                         goto close_out;
11443                 }
11444
11445                 if (init_extent_tree) {
11446                         printf("Creating a new extent tree\n");
11447                         ret = reinit_extent_tree(trans, info);
11448                         if (ret)
11449                                 goto close_out;
11450                 }
11451
11452                 if (init_csum_tree) {
11453                         printf("Reinitialize checksum tree\n");
11454                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11455                         if (ret) {
11456                                 error("checksum tree initialization failed: %d",
11457                                                 ret);
11458                                 ret = -EIO;
11459                                 goto close_out;
11460                         }
11461
11462                         ret = fill_csum_tree(trans, info->csum_root,
11463                                              init_extent_tree);
11464                         if (ret) {
11465                                 error("checksum tree refilling failed: %d", ret);
11466                                 return -EIO;
11467                         }
11468                 }
11469                 /*
11470                  * Ok now we commit and run the normal fsck, which will add
11471                  * extent entries for all of the items it finds.
11472                  */
11473                 ret = btrfs_commit_transaction(trans, info->extent_root);
11474                 if (ret)
11475                         goto close_out;
11476         }
11477         if (!extent_buffer_uptodate(info->extent_root->node)) {
11478                 error("critical: extent_root, unable to check the filesystem");
11479                 ret = -EIO;
11480                 goto close_out;
11481         }
11482         if (!extent_buffer_uptodate(info->csum_root->node)) {
11483                 error("critical: csum_root, unable to check the filesystem");
11484                 ret = -EIO;
11485                 goto close_out;
11486         }
11487
11488         if (!ctx.progress_enabled)
11489                 printf("checking extents");
11490         if (check_mode == CHECK_MODE_LOWMEM)
11491                 ret = check_chunks_and_extents_v2(root);
11492         else
11493                 ret = check_chunks_and_extents(root);
11494         if (ret)
11495                 printf("Errors found in extent allocation tree or chunk allocation");
11496
11497         ret = repair_root_items(info);
11498         if (ret < 0)
11499                 goto close_out;
11500         if (repair) {
11501                 fprintf(stderr, "Fixed %d roots.\n", ret);
11502                 ret = 0;
11503         } else if (ret > 0) {
11504                 fprintf(stderr,
11505                        "Found %d roots with an outdated root item.\n",
11506                        ret);
11507                 fprintf(stderr,
11508                         "Please run a filesystem check with the option --repair to fix them.\n");
11509                 ret = 1;
11510                 goto close_out;
11511         }
11512
11513         if (!ctx.progress_enabled) {
11514                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11515                         fprintf(stderr, "checking free space tree\n");
11516                 else
11517                         fprintf(stderr, "checking free space cache\n");
11518         }
11519         ret = check_space_cache(root);
11520         if (ret)
11521                 goto out;
11522
11523         /*
11524          * We used to have to have these hole extents in between our real
11525          * extents so if we don't have this flag set we need to make sure there
11526          * are no gaps in the file extents for inodes, otherwise we can just
11527          * ignore it when this happens.
11528          */
11529         no_holes = btrfs_fs_incompat(root->fs_info,
11530                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11531         if (!ctx.progress_enabled)
11532                 fprintf(stderr, "checking fs roots\n");
11533         ret = check_fs_roots(root, &root_cache);
11534         if (ret)
11535                 goto out;
11536
11537         fprintf(stderr, "checking csums\n");
11538         ret = check_csums(root);
11539         if (ret)
11540                 goto out;
11541
11542         fprintf(stderr, "checking root refs\n");
11543         ret = check_root_refs(root, &root_cache);
11544         if (ret)
11545                 goto out;
11546
11547         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11548                 struct extent_buffer *eb;
11549
11550                 eb = list_first_entry(&root->fs_info->recow_ebs,
11551                                       struct extent_buffer, recow);
11552                 list_del_init(&eb->recow);
11553                 ret = recow_extent_buffer(root, eb);
11554                 if (ret)
11555                         break;
11556         }
11557
11558         while (!list_empty(&delete_items)) {
11559                 struct bad_item *bad;
11560
11561                 bad = list_first_entry(&delete_items, struct bad_item, list);
11562                 list_del_init(&bad->list);
11563                 if (repair)
11564                         ret = delete_bad_item(root, bad);
11565                 free(bad);
11566         }
11567
11568         if (info->quota_enabled) {
11569                 int err;
11570                 fprintf(stderr, "checking quota groups\n");
11571                 err = qgroup_verify_all(info);
11572                 if (err)
11573                         goto out;
11574                 report_qgroups(0);
11575                 err = repair_qgroups(info, &qgroups_repaired);
11576                 if (err)
11577                         goto out;
11578         }
11579
11580         if (!list_empty(&root->fs_info->recow_ebs)) {
11581                 error("transid errors in file system");
11582                 ret = 1;
11583         }
11584 out:
11585         /* Don't override original ret */
11586         if (!ret && qgroups_repaired)
11587                 ret = qgroups_repaired;
11588
11589         if (found_old_backref) { /*
11590                  * there was a disk format change when mixed
11591                  * backref was in testing tree. The old format
11592                  * existed about one week.
11593                  */
11594                 printf("\n * Found old mixed backref format. "
11595                        "The old format is not supported! *"
11596                        "\n * Please mount the FS in readonly mode, "
11597                        "backup data and re-format the FS. *\n\n");
11598                 ret = 1;
11599         }
11600         printf("found %llu bytes used err is %d\n",
11601                (unsigned long long)bytes_used, ret);
11602         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11603         printf("total tree bytes: %llu\n",
11604                (unsigned long long)total_btree_bytes);
11605         printf("total fs tree bytes: %llu\n",
11606                (unsigned long long)total_fs_tree_bytes);
11607         printf("total extent tree bytes: %llu\n",
11608                (unsigned long long)total_extent_tree_bytes);
11609         printf("btree space waste bytes: %llu\n",
11610                (unsigned long long)btree_space_waste);
11611         printf("file data blocks allocated: %llu\n referenced %llu\n",
11612                 (unsigned long long)data_bytes_allocated,
11613                 (unsigned long long)data_bytes_referenced);
11614
11615         free_qgroup_counts();
11616         free_root_recs_tree(&root_cache);
11617 close_out:
11618         close_ctree(root);
11619 err_out:
11620         if (ctx.progress_enabled)
11621                 task_deinit(ctx.info);
11622
11623         return ret;
11624 }