btrfs-progs: check: handle errors returned by add_extent_rec_nolookup
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         unsigned int filetype:8;
189         int errors;
190         unsigned int ref_type;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         size_t size;
663         int ret;
664
665         rec = malloc(sizeof(*rec));
666         if (!rec)
667                 return ERR_PTR(-ENOMEM);
668         memcpy(rec, orig_rec, sizeof(*rec));
669         rec->refs = 1;
670         INIT_LIST_HEAD(&rec->backrefs);
671         INIT_LIST_HEAD(&rec->orphan_extents);
672         rec->holes = RB_ROOT;
673
674         list_for_each_entry(orig, &orig_rec->backrefs, list) {
675                 size = sizeof(*orig) + orig->namelen + 1;
676                 backref = malloc(size);
677                 if (!backref) {
678                         ret = -ENOMEM;
679                         goto cleanup;
680                 }
681                 memcpy(backref, orig, size);
682                 list_add_tail(&backref->list, &rec->backrefs);
683         }
684         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
685                 dst_orphan = malloc(sizeof(*dst_orphan));
686                 if (!dst_orphan) {
687                         ret = -ENOMEM;
688                         goto cleanup;
689                 }
690                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
691                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
692         }
693         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
694         BUG_ON(ret < 0);
695
696         return rec;
697
698 cleanup:
699         if (!list_empty(&rec->backrefs))
700                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
701                         list_del(&orig->list);
702                         free(orig);
703                 }
704
705         if (!list_empty(&rec->orphan_extents))
706                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
707                         list_del(&orig->list);
708                         free(orig);
709                 }
710
711         free(rec);
712
713         return ERR_PTR(ret);
714 }
715
716 static void print_orphan_data_extents(struct list_head *orphan_extents,
717                                       u64 objectid)
718 {
719         struct orphan_data_extent *orphan;
720
721         if (list_empty(orphan_extents))
722                 return;
723         printf("The following data extent is lost in tree %llu:\n",
724                objectid);
725         list_for_each_entry(orphan, orphan_extents, list) {
726                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
727                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
728                        orphan->disk_len);
729         }
730 }
731
732 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
733 {
734         u64 root_objectid = root->root_key.objectid;
735         int errors = rec->errors;
736
737         if (!errors)
738                 return;
739         /* reloc root errors, we print its corresponding fs root objectid*/
740         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
741                 root_objectid = root->root_key.offset;
742                 fprintf(stderr, "reloc");
743         }
744         fprintf(stderr, "root %llu inode %llu errors %x",
745                 (unsigned long long) root_objectid,
746                 (unsigned long long) rec->ino, rec->errors);
747
748         if (errors & I_ERR_NO_INODE_ITEM)
749                 fprintf(stderr, ", no inode item");
750         if (errors & I_ERR_NO_ORPHAN_ITEM)
751                 fprintf(stderr, ", no orphan item");
752         if (errors & I_ERR_DUP_INODE_ITEM)
753                 fprintf(stderr, ", dup inode item");
754         if (errors & I_ERR_DUP_DIR_INDEX)
755                 fprintf(stderr, ", dup dir index");
756         if (errors & I_ERR_ODD_DIR_ITEM)
757                 fprintf(stderr, ", odd dir item");
758         if (errors & I_ERR_ODD_FILE_EXTENT)
759                 fprintf(stderr, ", odd file extent");
760         if (errors & I_ERR_BAD_FILE_EXTENT)
761                 fprintf(stderr, ", bad file extent");
762         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
763                 fprintf(stderr, ", file extent overlap");
764         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
765                 fprintf(stderr, ", file extent discount");
766         if (errors & I_ERR_DIR_ISIZE_WRONG)
767                 fprintf(stderr, ", dir isize wrong");
768         if (errors & I_ERR_FILE_NBYTES_WRONG)
769                 fprintf(stderr, ", nbytes wrong");
770         if (errors & I_ERR_ODD_CSUM_ITEM)
771                 fprintf(stderr, ", odd csum item");
772         if (errors & I_ERR_SOME_CSUM_MISSING)
773                 fprintf(stderr, ", some csum missing");
774         if (errors & I_ERR_LINK_COUNT_WRONG)
775                 fprintf(stderr, ", link count wrong");
776         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
777                 fprintf(stderr, ", orphan file extent");
778         fprintf(stderr, "\n");
779         /* Print the orphan extents if needed */
780         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
781                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
782
783         /* Print the holes if needed */
784         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
785                 struct file_extent_hole *hole;
786                 struct rb_node *node;
787                 int found = 0;
788
789                 node = rb_first(&rec->holes);
790                 fprintf(stderr, "Found file extent holes:\n");
791                 while (node) {
792                         found = 1;
793                         hole = rb_entry(node, struct file_extent_hole, node);
794                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
795                                 hole->start, hole->len);
796                         node = rb_next(node);
797                 }
798                 if (!found)
799                         fprintf(stderr, "\tstart: 0, len: %llu\n",
800                                 round_up(rec->isize, root->sectorsize));
801         }
802 }
803
804 static void print_ref_error(int errors)
805 {
806         if (errors & REF_ERR_NO_DIR_ITEM)
807                 fprintf(stderr, ", no dir item");
808         if (errors & REF_ERR_NO_DIR_INDEX)
809                 fprintf(stderr, ", no dir index");
810         if (errors & REF_ERR_NO_INODE_REF)
811                 fprintf(stderr, ", no inode ref");
812         if (errors & REF_ERR_DUP_DIR_ITEM)
813                 fprintf(stderr, ", dup dir item");
814         if (errors & REF_ERR_DUP_DIR_INDEX)
815                 fprintf(stderr, ", dup dir index");
816         if (errors & REF_ERR_DUP_INODE_REF)
817                 fprintf(stderr, ", dup inode ref");
818         if (errors & REF_ERR_INDEX_UNMATCH)
819                 fprintf(stderr, ", index mismatch");
820         if (errors & REF_ERR_FILETYPE_UNMATCH)
821                 fprintf(stderr, ", filetype mismatch");
822         if (errors & REF_ERR_NAME_TOO_LONG)
823                 fprintf(stderr, ", name too long");
824         if (errors & REF_ERR_NO_ROOT_REF)
825                 fprintf(stderr, ", no root ref");
826         if (errors & REF_ERR_NO_ROOT_BACKREF)
827                 fprintf(stderr, ", no root backref");
828         if (errors & REF_ERR_DUP_ROOT_REF)
829                 fprintf(stderr, ", dup root ref");
830         if (errors & REF_ERR_DUP_ROOT_BACKREF)
831                 fprintf(stderr, ", dup root backref");
832         fprintf(stderr, "\n");
833 }
834
835 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
836                                           u64 ino, int mod)
837 {
838         struct ptr_node *node;
839         struct cache_extent *cache;
840         struct inode_record *rec = NULL;
841         int ret;
842
843         cache = lookup_cache_extent(inode_cache, ino, 1);
844         if (cache) {
845                 node = container_of(cache, struct ptr_node, cache);
846                 rec = node->data;
847                 if (mod && rec->refs > 1) {
848                         node->data = clone_inode_rec(rec);
849                         if (IS_ERR(node->data))
850                                 return node->data;
851                         rec->refs--;
852                         rec = node->data;
853                 }
854         } else if (mod) {
855                 rec = calloc(1, sizeof(*rec));
856                 if (!rec)
857                         return ERR_PTR(-ENOMEM);
858                 rec->ino = ino;
859                 rec->extent_start = (u64)-1;
860                 rec->refs = 1;
861                 INIT_LIST_HEAD(&rec->backrefs);
862                 INIT_LIST_HEAD(&rec->orphan_extents);
863                 rec->holes = RB_ROOT;
864
865                 node = malloc(sizeof(*node));
866                 if (!node) {
867                         free(rec);
868                         return ERR_PTR(-ENOMEM);
869                 }
870                 node->cache.start = ino;
871                 node->cache.size = 1;
872                 node->data = rec;
873
874                 if (ino == BTRFS_FREE_INO_OBJECTID)
875                         rec->found_link = 1;
876
877                 ret = insert_cache_extent(inode_cache, &node->cache);
878                 if (ret)
879                         return ERR_PTR(-EEXIST);
880         }
881         return rec;
882 }
883
884 static void free_orphan_data_extents(struct list_head *orphan_extents)
885 {
886         struct orphan_data_extent *orphan;
887
888         while (!list_empty(orphan_extents)) {
889                 orphan = list_entry(orphan_extents->next,
890                                     struct orphan_data_extent, list);
891                 list_del(&orphan->list);
892                 free(orphan);
893         }
894 }
895
896 static void free_inode_rec(struct inode_record *rec)
897 {
898         struct inode_backref *backref;
899
900         if (--rec->refs > 0)
901                 return;
902
903         while (!list_empty(&rec->backrefs)) {
904                 backref = to_inode_backref(rec->backrefs.next);
905                 list_del(&backref->list);
906                 free(backref);
907         }
908         free_orphan_data_extents(&rec->orphan_extents);
909         free_file_extent_holes(&rec->holes);
910         free(rec);
911 }
912
913 static int can_free_inode_rec(struct inode_record *rec)
914 {
915         if (!rec->errors && rec->checked && rec->found_inode_item &&
916             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
917                 return 1;
918         return 0;
919 }
920
921 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
922                                  struct inode_record *rec)
923 {
924         struct cache_extent *cache;
925         struct inode_backref *tmp, *backref;
926         struct ptr_node *node;
927         unsigned char filetype;
928
929         if (!rec->found_inode_item)
930                 return;
931
932         filetype = imode_to_type(rec->imode);
933         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
934                 if (backref->found_dir_item && backref->found_dir_index) {
935                         if (backref->filetype != filetype)
936                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
937                         if (!backref->errors && backref->found_inode_ref &&
938                             rec->nlink == rec->found_link) {
939                                 list_del(&backref->list);
940                                 free(backref);
941                         }
942                 }
943         }
944
945         if (!rec->checked || rec->merging)
946                 return;
947
948         if (S_ISDIR(rec->imode)) {
949                 if (rec->found_size != rec->isize)
950                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
951                 if (rec->found_file_extent)
952                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
953         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
954                 if (rec->found_dir_item)
955                         rec->errors |= I_ERR_ODD_DIR_ITEM;
956                 if (rec->found_size != rec->nbytes)
957                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
958                 if (rec->nlink > 0 && !no_holes &&
959                     (rec->extent_end < rec->isize ||
960                      first_extent_gap(&rec->holes) < rec->isize))
961                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
962         }
963
964         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
965                 if (rec->found_csum_item && rec->nodatasum)
966                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
967                 if (rec->some_csum_missing && !rec->nodatasum)
968                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
969         }
970
971         BUG_ON(rec->refs != 1);
972         if (can_free_inode_rec(rec)) {
973                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
974                 node = container_of(cache, struct ptr_node, cache);
975                 BUG_ON(node->data != rec);
976                 remove_cache_extent(inode_cache, &node->cache);
977                 free(node);
978                 free_inode_rec(rec);
979         }
980 }
981
982 static int check_orphan_item(struct btrfs_root *root, u64 ino)
983 {
984         struct btrfs_path path;
985         struct btrfs_key key;
986         int ret;
987
988         key.objectid = BTRFS_ORPHAN_OBJECTID;
989         key.type = BTRFS_ORPHAN_ITEM_KEY;
990         key.offset = ino;
991
992         btrfs_init_path(&path);
993         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
994         btrfs_release_path(&path);
995         if (ret > 0)
996                 ret = -ENOENT;
997         return ret;
998 }
999
1000 static int process_inode_item(struct extent_buffer *eb,
1001                               int slot, struct btrfs_key *key,
1002                               struct shared_node *active_node)
1003 {
1004         struct inode_record *rec;
1005         struct btrfs_inode_item *item;
1006
1007         rec = active_node->current;
1008         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1009         if (rec->found_inode_item) {
1010                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1011                 return 1;
1012         }
1013         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1014         rec->nlink = btrfs_inode_nlink(eb, item);
1015         rec->isize = btrfs_inode_size(eb, item);
1016         rec->nbytes = btrfs_inode_nbytes(eb, item);
1017         rec->imode = btrfs_inode_mode(eb, item);
1018         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1019                 rec->nodatasum = 1;
1020         rec->found_inode_item = 1;
1021         if (rec->nlink == 0)
1022                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1023         maybe_free_inode_rec(&active_node->inode_cache, rec);
1024         return 0;
1025 }
1026
1027 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1028                                                 const char *name,
1029                                                 int namelen, u64 dir)
1030 {
1031         struct inode_backref *backref;
1032
1033         list_for_each_entry(backref, &rec->backrefs, list) {
1034                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1035                         break;
1036                 if (backref->dir != dir || backref->namelen != namelen)
1037                         continue;
1038                 if (memcmp(name, backref->name, namelen))
1039                         continue;
1040                 return backref;
1041         }
1042
1043         backref = malloc(sizeof(*backref) + namelen + 1);
1044         if (!backref)
1045                 return NULL;
1046         memset(backref, 0, sizeof(*backref));
1047         backref->dir = dir;
1048         backref->namelen = namelen;
1049         memcpy(backref->name, name, namelen);
1050         backref->name[namelen] = '\0';
1051         list_add_tail(&backref->list, &rec->backrefs);
1052         return backref;
1053 }
1054
1055 static int add_inode_backref(struct cache_tree *inode_cache,
1056                              u64 ino, u64 dir, u64 index,
1057                              const char *name, int namelen,
1058                              int filetype, int itemtype, int errors)
1059 {
1060         struct inode_record *rec;
1061         struct inode_backref *backref;
1062
1063         rec = get_inode_rec(inode_cache, ino, 1);
1064         BUG_ON(IS_ERR(rec));
1065         backref = get_inode_backref(rec, name, namelen, dir);
1066         BUG_ON(!backref);
1067         if (errors)
1068                 backref->errors |= errors;
1069         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1070                 if (backref->found_dir_index)
1071                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1072                 if (backref->found_inode_ref && backref->index != index)
1073                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1074                 if (backref->found_dir_item && backref->filetype != filetype)
1075                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1076
1077                 backref->index = index;
1078                 backref->filetype = filetype;
1079                 backref->found_dir_index = 1;
1080         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1081                 rec->found_link++;
1082                 if (backref->found_dir_item)
1083                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1084                 if (backref->found_dir_index && backref->filetype != filetype)
1085                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1086
1087                 backref->filetype = filetype;
1088                 backref->found_dir_item = 1;
1089         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1090                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1091                 if (backref->found_inode_ref)
1092                         backref->errors |= REF_ERR_DUP_INODE_REF;
1093                 if (backref->found_dir_index && backref->index != index)
1094                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1095                 else
1096                         backref->index = index;
1097
1098                 backref->ref_type = itemtype;
1099                 backref->found_inode_ref = 1;
1100         } else {
1101                 BUG_ON(1);
1102         }
1103
1104         maybe_free_inode_rec(inode_cache, rec);
1105         return 0;
1106 }
1107
1108 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1109                             struct cache_tree *dst_cache)
1110 {
1111         struct inode_backref *backref;
1112         u32 dir_count = 0;
1113         int ret = 0;
1114
1115         dst->merging = 1;
1116         list_for_each_entry(backref, &src->backrefs, list) {
1117                 if (backref->found_dir_index) {
1118                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1119                                         backref->index, backref->name,
1120                                         backref->namelen, backref->filetype,
1121                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1122                 }
1123                 if (backref->found_dir_item) {
1124                         dir_count++;
1125                         add_inode_backref(dst_cache, dst->ino,
1126                                         backref->dir, 0, backref->name,
1127                                         backref->namelen, backref->filetype,
1128                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1129                 }
1130                 if (backref->found_inode_ref) {
1131                         add_inode_backref(dst_cache, dst->ino,
1132                                         backref->dir, backref->index,
1133                                         backref->name, backref->namelen, 0,
1134                                         backref->ref_type, backref->errors);
1135                 }
1136         }
1137
1138         if (src->found_dir_item)
1139                 dst->found_dir_item = 1;
1140         if (src->found_file_extent)
1141                 dst->found_file_extent = 1;
1142         if (src->found_csum_item)
1143                 dst->found_csum_item = 1;
1144         if (src->some_csum_missing)
1145                 dst->some_csum_missing = 1;
1146         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1147                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1148                 if (ret < 0)
1149                         return ret;
1150         }
1151
1152         BUG_ON(src->found_link < dir_count);
1153         dst->found_link += src->found_link - dir_count;
1154         dst->found_size += src->found_size;
1155         if (src->extent_start != (u64)-1) {
1156                 if (dst->extent_start == (u64)-1) {
1157                         dst->extent_start = src->extent_start;
1158                         dst->extent_end = src->extent_end;
1159                 } else {
1160                         if (dst->extent_end > src->extent_start)
1161                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1162                         else if (dst->extent_end < src->extent_start) {
1163                                 ret = add_file_extent_hole(&dst->holes,
1164                                         dst->extent_end,
1165                                         src->extent_start - dst->extent_end);
1166                         }
1167                         if (dst->extent_end < src->extent_end)
1168                                 dst->extent_end = src->extent_end;
1169                 }
1170         }
1171
1172         dst->errors |= src->errors;
1173         if (src->found_inode_item) {
1174                 if (!dst->found_inode_item) {
1175                         dst->nlink = src->nlink;
1176                         dst->isize = src->isize;
1177                         dst->nbytes = src->nbytes;
1178                         dst->imode = src->imode;
1179                         dst->nodatasum = src->nodatasum;
1180                         dst->found_inode_item = 1;
1181                 } else {
1182                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1183                 }
1184         }
1185         dst->merging = 0;
1186
1187         return 0;
1188 }
1189
1190 static int splice_shared_node(struct shared_node *src_node,
1191                               struct shared_node *dst_node)
1192 {
1193         struct cache_extent *cache;
1194         struct ptr_node *node, *ins;
1195         struct cache_tree *src, *dst;
1196         struct inode_record *rec, *conflict;
1197         u64 current_ino = 0;
1198         int splice = 0;
1199         int ret;
1200
1201         if (--src_node->refs == 0)
1202                 splice = 1;
1203         if (src_node->current)
1204                 current_ino = src_node->current->ino;
1205
1206         src = &src_node->root_cache;
1207         dst = &dst_node->root_cache;
1208 again:
1209         cache = search_cache_extent(src, 0);
1210         while (cache) {
1211                 node = container_of(cache, struct ptr_node, cache);
1212                 rec = node->data;
1213                 cache = next_cache_extent(cache);
1214
1215                 if (splice) {
1216                         remove_cache_extent(src, &node->cache);
1217                         ins = node;
1218                 } else {
1219                         ins = malloc(sizeof(*ins));
1220                         BUG_ON(!ins);
1221                         ins->cache.start = node->cache.start;
1222                         ins->cache.size = node->cache.size;
1223                         ins->data = rec;
1224                         rec->refs++;
1225                 }
1226                 ret = insert_cache_extent(dst, &ins->cache);
1227                 if (ret == -EEXIST) {
1228                         conflict = get_inode_rec(dst, rec->ino, 1);
1229                         BUG_ON(IS_ERR(conflict));
1230                         merge_inode_recs(rec, conflict, dst);
1231                         if (rec->checked) {
1232                                 conflict->checked = 1;
1233                                 if (dst_node->current == conflict)
1234                                         dst_node->current = NULL;
1235                         }
1236                         maybe_free_inode_rec(dst, conflict);
1237                         free_inode_rec(rec);
1238                         free(ins);
1239                 } else {
1240                         BUG_ON(ret);
1241                 }
1242         }
1243
1244         if (src == &src_node->root_cache) {
1245                 src = &src_node->inode_cache;
1246                 dst = &dst_node->inode_cache;
1247                 goto again;
1248         }
1249
1250         if (current_ino > 0 && (!dst_node->current ||
1251             current_ino > dst_node->current->ino)) {
1252                 if (dst_node->current) {
1253                         dst_node->current->checked = 1;
1254                         maybe_free_inode_rec(dst, dst_node->current);
1255                 }
1256                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1257                 BUG_ON(IS_ERR(dst_node->current));
1258         }
1259         return 0;
1260 }
1261
1262 static void free_inode_ptr(struct cache_extent *cache)
1263 {
1264         struct ptr_node *node;
1265         struct inode_record *rec;
1266
1267         node = container_of(cache, struct ptr_node, cache);
1268         rec = node->data;
1269         free_inode_rec(rec);
1270         free(node);
1271 }
1272
1273 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1274
1275 static struct shared_node *find_shared_node(struct cache_tree *shared,
1276                                             u64 bytenr)
1277 {
1278         struct cache_extent *cache;
1279         struct shared_node *node;
1280
1281         cache = lookup_cache_extent(shared, bytenr, 1);
1282         if (cache) {
1283                 node = container_of(cache, struct shared_node, cache);
1284                 return node;
1285         }
1286         return NULL;
1287 }
1288
1289 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1290 {
1291         int ret;
1292         struct shared_node *node;
1293
1294         node = calloc(1, sizeof(*node));
1295         if (!node)
1296                 return -ENOMEM;
1297         node->cache.start = bytenr;
1298         node->cache.size = 1;
1299         cache_tree_init(&node->root_cache);
1300         cache_tree_init(&node->inode_cache);
1301         node->refs = refs;
1302
1303         ret = insert_cache_extent(shared, &node->cache);
1304
1305         return ret;
1306 }
1307
1308 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1309                              struct walk_control *wc, int level)
1310 {
1311         struct shared_node *node;
1312         struct shared_node *dest;
1313         int ret;
1314
1315         if (level == wc->active_node)
1316                 return 0;
1317
1318         BUG_ON(wc->active_node <= level);
1319         node = find_shared_node(&wc->shared, bytenr);
1320         if (!node) {
1321                 ret = add_shared_node(&wc->shared, bytenr, refs);
1322                 BUG_ON(ret);
1323                 node = find_shared_node(&wc->shared, bytenr);
1324                 wc->nodes[level] = node;
1325                 wc->active_node = level;
1326                 return 0;
1327         }
1328
1329         if (wc->root_level == wc->active_node &&
1330             btrfs_root_refs(&root->root_item) == 0) {
1331                 if (--node->refs == 0) {
1332                         free_inode_recs_tree(&node->root_cache);
1333                         free_inode_recs_tree(&node->inode_cache);
1334                         remove_cache_extent(&wc->shared, &node->cache);
1335                         free(node);
1336                 }
1337                 return 1;
1338         }
1339
1340         dest = wc->nodes[wc->active_node];
1341         splice_shared_node(node, dest);
1342         if (node->refs == 0) {
1343                 remove_cache_extent(&wc->shared, &node->cache);
1344                 free(node);
1345         }
1346         return 1;
1347 }
1348
1349 static int leave_shared_node(struct btrfs_root *root,
1350                              struct walk_control *wc, int level)
1351 {
1352         struct shared_node *node;
1353         struct shared_node *dest;
1354         int i;
1355
1356         if (level == wc->root_level)
1357                 return 0;
1358
1359         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1360                 if (wc->nodes[i])
1361                         break;
1362         }
1363         BUG_ON(i >= BTRFS_MAX_LEVEL);
1364
1365         node = wc->nodes[wc->active_node];
1366         wc->nodes[wc->active_node] = NULL;
1367         wc->active_node = i;
1368
1369         dest = wc->nodes[wc->active_node];
1370         if (wc->active_node < wc->root_level ||
1371             btrfs_root_refs(&root->root_item) > 0) {
1372                 BUG_ON(node->refs <= 1);
1373                 splice_shared_node(node, dest);
1374         } else {
1375                 BUG_ON(node->refs < 2);
1376                 node->refs--;
1377         }
1378         return 0;
1379 }
1380
1381 /*
1382  * Returns:
1383  * < 0 - on error
1384  * 1   - if the root with id child_root_id is a child of root parent_root_id
1385  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1386  *       has other root(s) as parent(s)
1387  * 2   - if the root child_root_id doesn't have any parent roots
1388  */
1389 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1390                          u64 child_root_id)
1391 {
1392         struct btrfs_path path;
1393         struct btrfs_key key;
1394         struct extent_buffer *leaf;
1395         int has_parent = 0;
1396         int ret;
1397
1398         btrfs_init_path(&path);
1399
1400         key.objectid = parent_root_id;
1401         key.type = BTRFS_ROOT_REF_KEY;
1402         key.offset = child_root_id;
1403         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1404                                 0, 0);
1405         if (ret < 0)
1406                 return ret;
1407         btrfs_release_path(&path);
1408         if (!ret)
1409                 return 1;
1410
1411         key.objectid = child_root_id;
1412         key.type = BTRFS_ROOT_BACKREF_KEY;
1413         key.offset = 0;
1414         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1415                                 0, 0);
1416         if (ret < 0)
1417                 goto out;
1418
1419         while (1) {
1420                 leaf = path.nodes[0];
1421                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1422                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1423                         if (ret)
1424                                 break;
1425                         leaf = path.nodes[0];
1426                 }
1427
1428                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1429                 if (key.objectid != child_root_id ||
1430                     key.type != BTRFS_ROOT_BACKREF_KEY)
1431                         break;
1432
1433                 has_parent = 1;
1434
1435                 if (key.offset == parent_root_id) {
1436                         btrfs_release_path(&path);
1437                         return 1;
1438                 }
1439
1440                 path.slots[0]++;
1441         }
1442 out:
1443         btrfs_release_path(&path);
1444         if (ret < 0)
1445                 return ret;
1446         return has_parent ? 0 : 2;
1447 }
1448
1449 static int process_dir_item(struct btrfs_root *root,
1450                             struct extent_buffer *eb,
1451                             int slot, struct btrfs_key *key,
1452                             struct shared_node *active_node)
1453 {
1454         u32 total;
1455         u32 cur = 0;
1456         u32 len;
1457         u32 name_len;
1458         u32 data_len;
1459         int error;
1460         int nritems = 0;
1461         int filetype;
1462         struct btrfs_dir_item *di;
1463         struct inode_record *rec;
1464         struct cache_tree *root_cache;
1465         struct cache_tree *inode_cache;
1466         struct btrfs_key location;
1467         char namebuf[BTRFS_NAME_LEN];
1468
1469         root_cache = &active_node->root_cache;
1470         inode_cache = &active_node->inode_cache;
1471         rec = active_node->current;
1472         rec->found_dir_item = 1;
1473
1474         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1475         total = btrfs_item_size_nr(eb, slot);
1476         while (cur < total) {
1477                 nritems++;
1478                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1479                 name_len = btrfs_dir_name_len(eb, di);
1480                 data_len = btrfs_dir_data_len(eb, di);
1481                 filetype = btrfs_dir_type(eb, di);
1482
1483                 rec->found_size += name_len;
1484                 if (name_len <= BTRFS_NAME_LEN) {
1485                         len = name_len;
1486                         error = 0;
1487                 } else {
1488                         len = BTRFS_NAME_LEN;
1489                         error = REF_ERR_NAME_TOO_LONG;
1490                 }
1491                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1492
1493                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1494                         add_inode_backref(inode_cache, location.objectid,
1495                                           key->objectid, key->offset, namebuf,
1496                                           len, filetype, key->type, error);
1497                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1498                         add_inode_backref(root_cache, location.objectid,
1499                                           key->objectid, key->offset,
1500                                           namebuf, len, filetype,
1501                                           key->type, error);
1502                 } else {
1503                         fprintf(stderr, "invalid location in dir item %u\n",
1504                                 location.type);
1505                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1506                                           key->objectid, key->offset, namebuf,
1507                                           len, filetype, key->type, error);
1508                 }
1509
1510                 len = sizeof(*di) + name_len + data_len;
1511                 di = (struct btrfs_dir_item *)((char *)di + len);
1512                 cur += len;
1513         }
1514         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1515                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1516
1517         return 0;
1518 }
1519
1520 static int process_inode_ref(struct extent_buffer *eb,
1521                              int slot, struct btrfs_key *key,
1522                              struct shared_node *active_node)
1523 {
1524         u32 total;
1525         u32 cur = 0;
1526         u32 len;
1527         u32 name_len;
1528         u64 index;
1529         int error;
1530         struct cache_tree *inode_cache;
1531         struct btrfs_inode_ref *ref;
1532         char namebuf[BTRFS_NAME_LEN];
1533
1534         inode_cache = &active_node->inode_cache;
1535
1536         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1537         total = btrfs_item_size_nr(eb, slot);
1538         while (cur < total) {
1539                 name_len = btrfs_inode_ref_name_len(eb, ref);
1540                 index = btrfs_inode_ref_index(eb, ref);
1541                 if (name_len <= BTRFS_NAME_LEN) {
1542                         len = name_len;
1543                         error = 0;
1544                 } else {
1545                         len = BTRFS_NAME_LEN;
1546                         error = REF_ERR_NAME_TOO_LONG;
1547                 }
1548                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1549                 add_inode_backref(inode_cache, key->objectid, key->offset,
1550                                   index, namebuf, len, 0, key->type, error);
1551
1552                 len = sizeof(*ref) + name_len;
1553                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1554                 cur += len;
1555         }
1556         return 0;
1557 }
1558
1559 static int process_inode_extref(struct extent_buffer *eb,
1560                                 int slot, struct btrfs_key *key,
1561                                 struct shared_node *active_node)
1562 {
1563         u32 total;
1564         u32 cur = 0;
1565         u32 len;
1566         u32 name_len;
1567         u64 index;
1568         u64 parent;
1569         int error;
1570         struct cache_tree *inode_cache;
1571         struct btrfs_inode_extref *extref;
1572         char namebuf[BTRFS_NAME_LEN];
1573
1574         inode_cache = &active_node->inode_cache;
1575
1576         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1577         total = btrfs_item_size_nr(eb, slot);
1578         while (cur < total) {
1579                 name_len = btrfs_inode_extref_name_len(eb, extref);
1580                 index = btrfs_inode_extref_index(eb, extref);
1581                 parent = btrfs_inode_extref_parent(eb, extref);
1582                 if (name_len <= BTRFS_NAME_LEN) {
1583                         len = name_len;
1584                         error = 0;
1585                 } else {
1586                         len = BTRFS_NAME_LEN;
1587                         error = REF_ERR_NAME_TOO_LONG;
1588                 }
1589                 read_extent_buffer(eb, namebuf,
1590                                    (unsigned long)(extref + 1), len);
1591                 add_inode_backref(inode_cache, key->objectid, parent,
1592                                   index, namebuf, len, 0, key->type, error);
1593
1594                 len = sizeof(*extref) + name_len;
1595                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1596                 cur += len;
1597         }
1598         return 0;
1599
1600 }
1601
1602 static int count_csum_range(struct btrfs_root *root, u64 start,
1603                             u64 len, u64 *found)
1604 {
1605         struct btrfs_key key;
1606         struct btrfs_path path;
1607         struct extent_buffer *leaf;
1608         int ret;
1609         size_t size;
1610         *found = 0;
1611         u64 csum_end;
1612         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1613
1614         btrfs_init_path(&path);
1615
1616         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1617         key.offset = start;
1618         key.type = BTRFS_EXTENT_CSUM_KEY;
1619
1620         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1621                                 &key, &path, 0, 0);
1622         if (ret < 0)
1623                 goto out;
1624         if (ret > 0 && path.slots[0] > 0) {
1625                 leaf = path.nodes[0];
1626                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1627                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1628                     key.type == BTRFS_EXTENT_CSUM_KEY)
1629                         path.slots[0]--;
1630         }
1631
1632         while (len > 0) {
1633                 leaf = path.nodes[0];
1634                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1635                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1636                         if (ret > 0)
1637                                 break;
1638                         else if (ret < 0)
1639                                 goto out;
1640                         leaf = path.nodes[0];
1641                 }
1642
1643                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1644                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1645                     key.type != BTRFS_EXTENT_CSUM_KEY)
1646                         break;
1647
1648                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1649                 if (key.offset >= start + len)
1650                         break;
1651
1652                 if (key.offset > start)
1653                         start = key.offset;
1654
1655                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1656                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1657                 if (csum_end > start) {
1658                         size = min(csum_end - start, len);
1659                         len -= size;
1660                         start += size;
1661                         *found += size;
1662                 }
1663
1664                 path.slots[0]++;
1665         }
1666 out:
1667         btrfs_release_path(&path);
1668         if (ret < 0)
1669                 return ret;
1670         return 0;
1671 }
1672
1673 static int process_file_extent(struct btrfs_root *root,
1674                                 struct extent_buffer *eb,
1675                                 int slot, struct btrfs_key *key,
1676                                 struct shared_node *active_node)
1677 {
1678         struct inode_record *rec;
1679         struct btrfs_file_extent_item *fi;
1680         u64 num_bytes = 0;
1681         u64 disk_bytenr = 0;
1682         u64 extent_offset = 0;
1683         u64 mask = root->sectorsize - 1;
1684         int extent_type;
1685         int ret;
1686
1687         rec = active_node->current;
1688         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1689         rec->found_file_extent = 1;
1690
1691         if (rec->extent_start == (u64)-1) {
1692                 rec->extent_start = key->offset;
1693                 rec->extent_end = key->offset;
1694         }
1695
1696         if (rec->extent_end > key->offset)
1697                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1698         else if (rec->extent_end < key->offset) {
1699                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1700                                            key->offset - rec->extent_end);
1701                 if (ret < 0)
1702                         return ret;
1703         }
1704
1705         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1706         extent_type = btrfs_file_extent_type(eb, fi);
1707
1708         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1709                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1710                 if (num_bytes == 0)
1711                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1712                 rec->found_size += num_bytes;
1713                 num_bytes = (num_bytes + mask) & ~mask;
1714         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1715                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1716                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1717                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1718                 extent_offset = btrfs_file_extent_offset(eb, fi);
1719                 if (num_bytes == 0 || (num_bytes & mask))
1720                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1721                 if (num_bytes + extent_offset >
1722                     btrfs_file_extent_ram_bytes(eb, fi))
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1725                     (btrfs_file_extent_compression(eb, fi) ||
1726                      btrfs_file_extent_encryption(eb, fi) ||
1727                      btrfs_file_extent_other_encoding(eb, fi)))
1728                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1729                 if (disk_bytenr > 0)
1730                         rec->found_size += num_bytes;
1731         } else {
1732                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733         }
1734         rec->extent_end = key->offset + num_bytes;
1735
1736         /*
1737          * The data reloc tree will copy full extents into its inode and then
1738          * copy the corresponding csums.  Because the extent it copied could be
1739          * a preallocated extent that hasn't been written to yet there may be no
1740          * csums to copy, ergo we won't have csums for our file extent.  This is
1741          * ok so just don't bother checking csums if the inode belongs to the
1742          * data reloc tree.
1743          */
1744         if (disk_bytenr > 0 &&
1745             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1746                 u64 found;
1747                 if (btrfs_file_extent_compression(eb, fi))
1748                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1749                 else
1750                         disk_bytenr += extent_offset;
1751
1752                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1753                 if (ret < 0)
1754                         return ret;
1755                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1756                         if (found > 0)
1757                                 rec->found_csum_item = 1;
1758                         if (found < num_bytes)
1759                                 rec->some_csum_missing = 1;
1760                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1761                         if (found > 0)
1762                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1763                 }
1764         }
1765         return 0;
1766 }
1767
1768 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1769                             struct walk_control *wc)
1770 {
1771         struct btrfs_key key;
1772         u32 nritems;
1773         int i;
1774         int ret = 0;
1775         struct cache_tree *inode_cache;
1776         struct shared_node *active_node;
1777
1778         if (wc->root_level == wc->active_node &&
1779             btrfs_root_refs(&root->root_item) == 0)
1780                 return 0;
1781
1782         active_node = wc->nodes[wc->active_node];
1783         inode_cache = &active_node->inode_cache;
1784         nritems = btrfs_header_nritems(eb);
1785         for (i = 0; i < nritems; i++) {
1786                 btrfs_item_key_to_cpu(eb, &key, i);
1787
1788                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1789                         continue;
1790                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1791                         continue;
1792
1793                 if (active_node->current == NULL ||
1794                     active_node->current->ino < key.objectid) {
1795                         if (active_node->current) {
1796                                 active_node->current->checked = 1;
1797                                 maybe_free_inode_rec(inode_cache,
1798                                                      active_node->current);
1799                         }
1800                         active_node->current = get_inode_rec(inode_cache,
1801                                                              key.objectid, 1);
1802                         BUG_ON(IS_ERR(active_node->current));
1803                 }
1804                 switch (key.type) {
1805                 case BTRFS_DIR_ITEM_KEY:
1806                 case BTRFS_DIR_INDEX_KEY:
1807                         ret = process_dir_item(root, eb, i, &key, active_node);
1808                         break;
1809                 case BTRFS_INODE_REF_KEY:
1810                         ret = process_inode_ref(eb, i, &key, active_node);
1811                         break;
1812                 case BTRFS_INODE_EXTREF_KEY:
1813                         ret = process_inode_extref(eb, i, &key, active_node);
1814                         break;
1815                 case BTRFS_INODE_ITEM_KEY:
1816                         ret = process_inode_item(eb, i, &key, active_node);
1817                         break;
1818                 case BTRFS_EXTENT_DATA_KEY:
1819                         ret = process_file_extent(root, eb, i, &key,
1820                                                   active_node);
1821                         break;
1822                 default:
1823                         break;
1824                 };
1825         }
1826         return ret;
1827 }
1828
1829 static void reada_walk_down(struct btrfs_root *root,
1830                             struct extent_buffer *node, int slot)
1831 {
1832         u64 bytenr;
1833         u64 ptr_gen;
1834         u32 nritems;
1835         u32 blocksize;
1836         int i;
1837         int level;
1838
1839         level = btrfs_header_level(node);
1840         if (level != 1)
1841                 return;
1842
1843         nritems = btrfs_header_nritems(node);
1844         blocksize = root->nodesize;
1845         for (i = slot; i < nritems; i++) {
1846                 bytenr = btrfs_node_blockptr(node, i);
1847                 ptr_gen = btrfs_node_ptr_generation(node, i);
1848                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1849         }
1850 }
1851
1852 /*
1853  * Check the child node/leaf by the following condition:
1854  * 1. the first item key of the node/leaf should be the same with the one
1855  *    in parent.
1856  * 2. block in parent node should match the child node/leaf.
1857  * 3. generation of parent node and child's header should be consistent.
1858  *
1859  * Or the child node/leaf pointed by the key in parent is not valid.
1860  *
1861  * We hope to check leaf owner too, but since subvol may share leaves,
1862  * which makes leaf owner check not so strong, key check should be
1863  * sufficient enough for that case.
1864  */
1865 static int check_child_node(struct btrfs_root *root,
1866                             struct extent_buffer *parent, int slot,
1867                             struct extent_buffer *child)
1868 {
1869         struct btrfs_key parent_key;
1870         struct btrfs_key child_key;
1871         int ret = 0;
1872
1873         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1874         if (btrfs_header_level(child) == 0)
1875                 btrfs_item_key_to_cpu(child, &child_key, 0);
1876         else
1877                 btrfs_node_key_to_cpu(child, &child_key, 0);
1878
1879         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1880                 ret = -EINVAL;
1881                 fprintf(stderr,
1882                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1883                         parent_key.objectid, parent_key.type, parent_key.offset,
1884                         child_key.objectid, child_key.type, child_key.offset);
1885         }
1886         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1887                 ret = -EINVAL;
1888                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1889                         btrfs_node_blockptr(parent, slot),
1890                         btrfs_header_bytenr(child));
1891         }
1892         if (btrfs_node_ptr_generation(parent, slot) !=
1893             btrfs_header_generation(child)) {
1894                 ret = -EINVAL;
1895                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1896                         btrfs_header_generation(child),
1897                         btrfs_node_ptr_generation(parent, slot));
1898         }
1899         return ret;
1900 }
1901
1902 struct node_refs {
1903         u64 bytenr[BTRFS_MAX_LEVEL];
1904         u64 refs[BTRFS_MAX_LEVEL];
1905 };
1906
1907 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1908                           struct walk_control *wc, int *level,
1909                           struct node_refs *nrefs)
1910 {
1911         enum btrfs_tree_block_status status;
1912         u64 bytenr;
1913         u64 ptr_gen;
1914         struct extent_buffer *next;
1915         struct extent_buffer *cur;
1916         u32 blocksize;
1917         int ret, err = 0;
1918         u64 refs;
1919
1920         WARN_ON(*level < 0);
1921         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1922
1923         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1924                 refs = nrefs->refs[*level];
1925                 ret = 0;
1926         } else {
1927                 ret = btrfs_lookup_extent_info(NULL, root,
1928                                        path->nodes[*level]->start,
1929                                        *level, 1, &refs, NULL);
1930                 if (ret < 0) {
1931                         err = ret;
1932                         goto out;
1933                 }
1934                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1935                 nrefs->refs[*level] = refs;
1936         }
1937
1938         if (refs > 1) {
1939                 ret = enter_shared_node(root, path->nodes[*level]->start,
1940                                         refs, wc, *level);
1941                 if (ret > 0) {
1942                         err = ret;
1943                         goto out;
1944                 }
1945         }
1946
1947         while (*level >= 0) {
1948                 WARN_ON(*level < 0);
1949                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1950                 cur = path->nodes[*level];
1951
1952                 if (btrfs_header_level(cur) != *level)
1953                         WARN_ON(1);
1954
1955                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1956                         break;
1957                 if (*level == 0) {
1958                         ret = process_one_leaf(root, cur, wc);
1959                         if (ret < 0)
1960                                 err = ret;
1961                         break;
1962                 }
1963                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1964                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1965                 blocksize = root->nodesize;
1966
1967                 if (bytenr == nrefs->bytenr[*level - 1]) {
1968                         refs = nrefs->refs[*level - 1];
1969                 } else {
1970                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1971                                         *level - 1, 1, &refs, NULL);
1972                         if (ret < 0) {
1973                                 refs = 0;
1974                         } else {
1975                                 nrefs->bytenr[*level - 1] = bytenr;
1976                                 nrefs->refs[*level - 1] = refs;
1977                         }
1978                 }
1979
1980                 if (refs > 1) {
1981                         ret = enter_shared_node(root, bytenr, refs,
1982                                                 wc, *level - 1);
1983                         if (ret > 0) {
1984                                 path->slots[*level]++;
1985                                 continue;
1986                         }
1987                 }
1988
1989                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1990                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1991                         free_extent_buffer(next);
1992                         reada_walk_down(root, cur, path->slots[*level]);
1993                         next = read_tree_block(root, bytenr, blocksize,
1994                                                ptr_gen);
1995                         if (!extent_buffer_uptodate(next)) {
1996                                 struct btrfs_key node_key;
1997
1998                                 btrfs_node_key_to_cpu(path->nodes[*level],
1999                                                       &node_key,
2000                                                       path->slots[*level]);
2001                                 btrfs_add_corrupt_extent_record(root->fs_info,
2002                                                 &node_key,
2003                                                 path->nodes[*level]->start,
2004                                                 root->nodesize, *level);
2005                                 err = -EIO;
2006                                 goto out;
2007                         }
2008                 }
2009
2010                 ret = check_child_node(root, cur, path->slots[*level], next);
2011                 if (ret) {
2012                         err = ret;
2013                         goto out;
2014                 }
2015
2016                 if (btrfs_is_leaf(next))
2017                         status = btrfs_check_leaf(root, NULL, next);
2018                 else
2019                         status = btrfs_check_node(root, NULL, next);
2020                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2021                         free_extent_buffer(next);
2022                         err = -EIO;
2023                         goto out;
2024                 }
2025
2026                 *level = *level - 1;
2027                 free_extent_buffer(path->nodes[*level]);
2028                 path->nodes[*level] = next;
2029                 path->slots[*level] = 0;
2030         }
2031 out:
2032         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2033         return err;
2034 }
2035
2036 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2037                         struct walk_control *wc, int *level)
2038 {
2039         int i;
2040         struct extent_buffer *leaf;
2041
2042         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2043                 leaf = path->nodes[i];
2044                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2045                         path->slots[i]++;
2046                         *level = i;
2047                         return 0;
2048                 } else {
2049                         free_extent_buffer(path->nodes[*level]);
2050                         path->nodes[*level] = NULL;
2051                         BUG_ON(*level > wc->active_node);
2052                         if (*level == wc->active_node)
2053                                 leave_shared_node(root, wc, *level);
2054                         *level = i + 1;
2055                 }
2056         }
2057         return 1;
2058 }
2059
2060 static int check_root_dir(struct inode_record *rec)
2061 {
2062         struct inode_backref *backref;
2063         int ret = -1;
2064
2065         if (!rec->found_inode_item || rec->errors)
2066                 goto out;
2067         if (rec->nlink != 1 || rec->found_link != 0)
2068                 goto out;
2069         if (list_empty(&rec->backrefs))
2070                 goto out;
2071         backref = to_inode_backref(rec->backrefs.next);
2072         if (!backref->found_inode_ref)
2073                 goto out;
2074         if (backref->index != 0 || backref->namelen != 2 ||
2075             memcmp(backref->name, "..", 2))
2076                 goto out;
2077         if (backref->found_dir_index || backref->found_dir_item)
2078                 goto out;
2079         ret = 0;
2080 out:
2081         return ret;
2082 }
2083
2084 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2085                               struct btrfs_root *root, struct btrfs_path *path,
2086                               struct inode_record *rec)
2087 {
2088         struct btrfs_inode_item *ei;
2089         struct btrfs_key key;
2090         int ret;
2091
2092         key.objectid = rec->ino;
2093         key.type = BTRFS_INODE_ITEM_KEY;
2094         key.offset = (u64)-1;
2095
2096         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2097         if (ret < 0)
2098                 goto out;
2099         if (ret) {
2100                 if (!path->slots[0]) {
2101                         ret = -ENOENT;
2102                         goto out;
2103                 }
2104                 path->slots[0]--;
2105                 ret = 0;
2106         }
2107         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2108         if (key.objectid != rec->ino) {
2109                 ret = -ENOENT;
2110                 goto out;
2111         }
2112
2113         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2114                             struct btrfs_inode_item);
2115         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2116         btrfs_mark_buffer_dirty(path->nodes[0]);
2117         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2118         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2119                root->root_key.objectid);
2120 out:
2121         btrfs_release_path(path);
2122         return ret;
2123 }
2124
2125 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2126                                     struct btrfs_root *root,
2127                                     struct btrfs_path *path,
2128                                     struct inode_record *rec)
2129 {
2130         int ret;
2131
2132         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2133         btrfs_release_path(path);
2134         if (!ret)
2135                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2136         return ret;
2137 }
2138
2139 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2140                                struct btrfs_root *root,
2141                                struct btrfs_path *path,
2142                                struct inode_record *rec)
2143 {
2144         struct btrfs_inode_item *ei;
2145         struct btrfs_key key;
2146         int ret = 0;
2147
2148         key.objectid = rec->ino;
2149         key.type = BTRFS_INODE_ITEM_KEY;
2150         key.offset = 0;
2151
2152         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2153         if (ret) {
2154                 if (ret > 0)
2155                         ret = -ENOENT;
2156                 goto out;
2157         }
2158
2159         /* Since ret == 0, no need to check anything */
2160         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2161                             struct btrfs_inode_item);
2162         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2163         btrfs_mark_buffer_dirty(path->nodes[0]);
2164         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2165         printf("reset nbytes for ino %llu root %llu\n",
2166                rec->ino, root->root_key.objectid);
2167 out:
2168         btrfs_release_path(path);
2169         return ret;
2170 }
2171
2172 static int add_missing_dir_index(struct btrfs_root *root,
2173                                  struct cache_tree *inode_cache,
2174                                  struct inode_record *rec,
2175                                  struct inode_backref *backref)
2176 {
2177         struct btrfs_path *path;
2178         struct btrfs_trans_handle *trans;
2179         struct btrfs_dir_item *dir_item;
2180         struct extent_buffer *leaf;
2181         struct btrfs_key key;
2182         struct btrfs_disk_key disk_key;
2183         struct inode_record *dir_rec;
2184         unsigned long name_ptr;
2185         u32 data_size = sizeof(*dir_item) + backref->namelen;
2186         int ret;
2187
2188         path = btrfs_alloc_path();
2189         if (!path)
2190                 return -ENOMEM;
2191
2192         trans = btrfs_start_transaction(root, 1);
2193         if (IS_ERR(trans)) {
2194                 btrfs_free_path(path);
2195                 return PTR_ERR(trans);
2196         }
2197
2198         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2199                 (unsigned long long)rec->ino);
2200         key.objectid = backref->dir;
2201         key.type = BTRFS_DIR_INDEX_KEY;
2202         key.offset = backref->index;
2203
2204         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2205         BUG_ON(ret);
2206
2207         leaf = path->nodes[0];
2208         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2209
2210         disk_key.objectid = cpu_to_le64(rec->ino);
2211         disk_key.type = BTRFS_INODE_ITEM_KEY;
2212         disk_key.offset = 0;
2213
2214         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2215         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2216         btrfs_set_dir_data_len(leaf, dir_item, 0);
2217         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2218         name_ptr = (unsigned long)(dir_item + 1);
2219         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2220         btrfs_mark_buffer_dirty(leaf);
2221         btrfs_free_path(path);
2222         btrfs_commit_transaction(trans, root);
2223
2224         backref->found_dir_index = 1;
2225         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2226         BUG_ON(IS_ERR(dir_rec));
2227         if (!dir_rec)
2228                 return 0;
2229         dir_rec->found_size += backref->namelen;
2230         if (dir_rec->found_size == dir_rec->isize &&
2231             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2232                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2233         if (dir_rec->found_size != dir_rec->isize)
2234                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2235
2236         return 0;
2237 }
2238
2239 static int delete_dir_index(struct btrfs_root *root,
2240                             struct cache_tree *inode_cache,
2241                             struct inode_record *rec,
2242                             struct inode_backref *backref)
2243 {
2244         struct btrfs_trans_handle *trans;
2245         struct btrfs_dir_item *di;
2246         struct btrfs_path *path;
2247         int ret = 0;
2248
2249         path = btrfs_alloc_path();
2250         if (!path)
2251                 return -ENOMEM;
2252
2253         trans = btrfs_start_transaction(root, 1);
2254         if (IS_ERR(trans)) {
2255                 btrfs_free_path(path);
2256                 return PTR_ERR(trans);
2257         }
2258
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2266                                     backref->name, backref->namelen,
2267                                     backref->index, -1);
2268         if (IS_ERR(di)) {
2269                 ret = PTR_ERR(di);
2270                 btrfs_free_path(path);
2271                 btrfs_commit_transaction(trans, root);
2272                 if (ret == -ENOENT)
2273                         return 0;
2274                 return ret;
2275         }
2276
2277         if (!di)
2278                 ret = btrfs_del_item(trans, root, path);
2279         else
2280                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2281         BUG_ON(ret);
2282         btrfs_free_path(path);
2283         btrfs_commit_transaction(trans, root);
2284         return ret;
2285 }
2286
2287 static int create_inode_item(struct btrfs_root *root,
2288                              struct inode_record *rec,
2289                              struct inode_backref *backref, int root_dir)
2290 {
2291         struct btrfs_trans_handle *trans;
2292         struct btrfs_inode_item inode_item;
2293         time_t now = time(NULL);
2294         int ret;
2295
2296         trans = btrfs_start_transaction(root, 1);
2297         if (IS_ERR(trans)) {
2298                 ret = PTR_ERR(trans);
2299                 return ret;
2300         }
2301
2302         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2303                 "be incomplete, please check permissions and content after "
2304                 "the fsck completes.\n", (unsigned long long)root->objectid,
2305                 (unsigned long long)rec->ino);
2306
2307         memset(&inode_item, 0, sizeof(inode_item));
2308         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2309         if (root_dir)
2310                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2311         else
2312                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2313         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2314         if (rec->found_dir_item) {
2315                 if (rec->found_file_extent)
2316                         fprintf(stderr, "root %llu inode %llu has both a dir "
2317                                 "item and extents, unsure if it is a dir or a "
2318                                 "regular file so setting it as a directory\n",
2319                                 (unsigned long long)root->objectid,
2320                                 (unsigned long long)rec->ino);
2321                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2322                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2323         } else if (!rec->found_dir_item) {
2324                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2325                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2326         }
2327         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2328         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2329         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2334         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2335
2336         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2337         BUG_ON(ret);
2338         btrfs_commit_transaction(trans, root);
2339         return 0;
2340 }
2341
2342 static int repair_inode_backrefs(struct btrfs_root *root,
2343                                  struct inode_record *rec,
2344                                  struct cache_tree *inode_cache,
2345                                  int delete)
2346 {
2347         struct inode_backref *tmp, *backref;
2348         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2349         int ret = 0;
2350         int repaired = 0;
2351
2352         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2353                 if (!delete && rec->ino == root_dirid) {
2354                         if (!rec->found_inode_item) {
2355                                 ret = create_inode_item(root, rec, backref, 1);
2356                                 if (ret)
2357                                         break;
2358                                 repaired++;
2359                         }
2360                 }
2361
2362                 /* Index 0 for root dir's are special, don't mess with it */
2363                 if (rec->ino == root_dirid && backref->index == 0)
2364                         continue;
2365
2366                 if (delete &&
2367                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2368                      (backref->found_dir_index && backref->found_inode_ref &&
2369                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2370                         ret = delete_dir_index(root, inode_cache, rec, backref);
2371                         if (ret)
2372                                 break;
2373                         repaired++;
2374                         list_del(&backref->list);
2375                         free(backref);
2376                 }
2377
2378                 if (!delete && !backref->found_dir_index &&
2379                     backref->found_dir_item && backref->found_inode_ref) {
2380                         ret = add_missing_dir_index(root, inode_cache, rec,
2381                                                     backref);
2382                         if (ret)
2383                                 break;
2384                         repaired++;
2385                         if (backref->found_dir_item &&
2386                             backref->found_dir_index &&
2387                             backref->found_dir_index) {
2388                                 if (!backref->errors &&
2389                                     backref->found_inode_ref) {
2390                                         list_del(&backref->list);
2391                                         free(backref);
2392                                 }
2393                         }
2394                 }
2395
2396                 if (!delete && (!backref->found_dir_index &&
2397                                 !backref->found_dir_item &&
2398                                 backref->found_inode_ref)) {
2399                         struct btrfs_trans_handle *trans;
2400                         struct btrfs_key location;
2401
2402                         ret = check_dir_conflict(root, backref->name,
2403                                                  backref->namelen,
2404                                                  backref->dir,
2405                                                  backref->index);
2406                         if (ret) {
2407                                 /*
2408                                  * let nlink fixing routine to handle it,
2409                                  * which can do it better.
2410                                  */
2411                                 ret = 0;
2412                                 break;
2413                         }
2414                         location.objectid = rec->ino;
2415                         location.type = BTRFS_INODE_ITEM_KEY;
2416                         location.offset = 0;
2417
2418                         trans = btrfs_start_transaction(root, 1);
2419                         if (IS_ERR(trans)) {
2420                                 ret = PTR_ERR(trans);
2421                                 break;
2422                         }
2423                         fprintf(stderr, "adding missing dir index/item pair "
2424                                 "for inode %llu\n",
2425                                 (unsigned long long)rec->ino);
2426                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2427                                                     backref->namelen,
2428                                                     backref->dir, &location,
2429                                                     imode_to_type(rec->imode),
2430                                                     backref->index);
2431                         BUG_ON(ret);
2432                         btrfs_commit_transaction(trans, root);
2433                         repaired++;
2434                 }
2435
2436                 if (!delete && (backref->found_inode_ref &&
2437                                 backref->found_dir_index &&
2438                                 backref->found_dir_item &&
2439                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2440                                 !rec->found_inode_item)) {
2441                         ret = create_inode_item(root, rec, backref, 0);
2442                         if (ret)
2443                                 break;
2444                         repaired++;
2445                 }
2446
2447         }
2448         return ret ? ret : repaired;
2449 }
2450
2451 /*
2452  * To determine the file type for nlink/inode_item repair
2453  *
2454  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2455  * Return -ENOENT if file type is not found.
2456  */
2457 static int find_file_type(struct inode_record *rec, u8 *type)
2458 {
2459         struct inode_backref *backref;
2460
2461         /* For inode item recovered case */
2462         if (rec->found_inode_item) {
2463                 *type = imode_to_type(rec->imode);
2464                 return 0;
2465         }
2466
2467         list_for_each_entry(backref, &rec->backrefs, list) {
2468                 if (backref->found_dir_index || backref->found_dir_item) {
2469                         *type = backref->filetype;
2470                         return 0;
2471                 }
2472         }
2473         return -ENOENT;
2474 }
2475
2476 /*
2477  * To determine the file name for nlink repair
2478  *
2479  * Return 0 if file name is found, set name and namelen.
2480  * Return -ENOENT if file name is not found.
2481  */
2482 static int find_file_name(struct inode_record *rec,
2483                           char *name, int *namelen)
2484 {
2485         struct inode_backref *backref;
2486
2487         list_for_each_entry(backref, &rec->backrefs, list) {
2488                 if (backref->found_dir_index || backref->found_dir_item ||
2489                     backref->found_inode_ref) {
2490                         memcpy(name, backref->name, backref->namelen);
2491                         *namelen = backref->namelen;
2492                         return 0;
2493                 }
2494         }
2495         return -ENOENT;
2496 }
2497
2498 /* Reset the nlink of the inode to the correct one */
2499 static int reset_nlink(struct btrfs_trans_handle *trans,
2500                        struct btrfs_root *root,
2501                        struct btrfs_path *path,
2502                        struct inode_record *rec)
2503 {
2504         struct inode_backref *backref;
2505         struct inode_backref *tmp;
2506         struct btrfs_key key;
2507         struct btrfs_inode_item *inode_item;
2508         int ret = 0;
2509
2510         /* We don't believe this either, reset it and iterate backref */
2511         rec->found_link = 0;
2512
2513         /* Remove all backref including the valid ones */
2514         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2515                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2516                                    backref->index, backref->name,
2517                                    backref->namelen, 0);
2518                 if (ret < 0)
2519                         goto out;
2520
2521                 /* remove invalid backref, so it won't be added back */
2522                 if (!(backref->found_dir_index &&
2523                       backref->found_dir_item &&
2524                       backref->found_inode_ref)) {
2525                         list_del(&backref->list);
2526                         free(backref);
2527                 } else {
2528                         rec->found_link++;
2529                 }
2530         }
2531
2532         /* Set nlink to 0 */
2533         key.objectid = rec->ino;
2534         key.type = BTRFS_INODE_ITEM_KEY;
2535         key.offset = 0;
2536         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2537         if (ret < 0)
2538                 goto out;
2539         if (ret > 0) {
2540                 ret = -ENOENT;
2541                 goto out;
2542         }
2543         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2544                                     struct btrfs_inode_item);
2545         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2546         btrfs_mark_buffer_dirty(path->nodes[0]);
2547         btrfs_release_path(path);
2548
2549         /*
2550          * Add back valid inode_ref/dir_item/dir_index,
2551          * add_link() will handle the nlink inc, so new nlink must be correct
2552          */
2553         list_for_each_entry(backref, &rec->backrefs, list) {
2554                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2555                                      backref->name, backref->namelen,
2556                                      backref->filetype, &backref->index, 1);
2557                 if (ret < 0)
2558                         goto out;
2559         }
2560 out:
2561         btrfs_release_path(path);
2562         return ret;
2563 }
2564
2565 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2566                                struct btrfs_root *root,
2567                                struct btrfs_path *path,
2568                                struct inode_record *rec)
2569 {
2570         char *dir_name = "lost+found";
2571         char namebuf[BTRFS_NAME_LEN] = {0};
2572         u64 lost_found_ino;
2573         u32 mode = 0700;
2574         u8 type = 0;
2575         int namelen = 0;
2576         int name_recovered = 0;
2577         int type_recovered = 0;
2578         int ret = 0;
2579
2580         /*
2581          * Get file name and type first before these invalid inode ref
2582          * are deleted by remove_all_invalid_backref()
2583          */
2584         name_recovered = !find_file_name(rec, namebuf, &namelen);
2585         type_recovered = !find_file_type(rec, &type);
2586
2587         if (!name_recovered) {
2588                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2589                        rec->ino, rec->ino);
2590                 namelen = count_digits(rec->ino);
2591                 sprintf(namebuf, "%llu", rec->ino);
2592                 name_recovered = 1;
2593         }
2594         if (!type_recovered) {
2595                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2596                        rec->ino);
2597                 type = BTRFS_FT_REG_FILE;
2598                 type_recovered = 1;
2599         }
2600
2601         ret = reset_nlink(trans, root, path, rec);
2602         if (ret < 0) {
2603                 fprintf(stderr,
2604                         "Failed to reset nlink for inode %llu: %s\n",
2605                         rec->ino, strerror(-ret));
2606                 goto out;
2607         }
2608
2609         if (rec->found_link == 0) {
2610                 lost_found_ino = root->highest_inode;
2611                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2612                         ret = -EOVERFLOW;
2613                         goto out;
2614                 }
2615                 lost_found_ino++;
2616                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2617                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2618                                   mode);
2619                 if (ret < 0) {
2620                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2621                                 dir_name, strerror(-ret));
2622                         goto out;
2623                 }
2624                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2625                                      namebuf, namelen, type, NULL, 1);
2626                 /*
2627                  * Add ".INO" suffix several times to handle case where
2628                  * "FILENAME.INO" is already taken by another file.
2629                  */
2630                 while (ret == -EEXIST) {
2631                         /*
2632                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2633                          */
2634                         if (namelen + count_digits(rec->ino) + 1 >
2635                             BTRFS_NAME_LEN) {
2636                                 ret = -EFBIG;
2637                                 goto out;
2638                         }
2639                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2640                                  ".%llu", rec->ino);
2641                         namelen += count_digits(rec->ino) + 1;
2642                         ret = btrfs_add_link(trans, root, rec->ino,
2643                                              lost_found_ino, namebuf,
2644                                              namelen, type, NULL, 1);
2645                 }
2646                 if (ret < 0) {
2647                         fprintf(stderr,
2648                                 "Failed to link the inode %llu to %s dir: %s\n",
2649                                 rec->ino, dir_name, strerror(-ret));
2650                         goto out;
2651                 }
2652                 /*
2653                  * Just increase the found_link, don't actually add the
2654                  * backref. This will make things easier and this inode
2655                  * record will be freed after the repair is done.
2656                  * So fsck will not report problem about this inode.
2657                  */
2658                 rec->found_link++;
2659                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2660                        namelen, namebuf, dir_name);
2661         }
2662         printf("Fixed the nlink of inode %llu\n", rec->ino);
2663 out:
2664         /*
2665          * Clear the flag anyway, or we will loop forever for the same inode
2666          * as it will not be removed from the bad inode list and the dead loop
2667          * happens.
2668          */
2669         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2670         btrfs_release_path(path);
2671         return ret;
2672 }
2673
2674 /*
2675  * Check if there is any normal(reg or prealloc) file extent for given
2676  * ino.
2677  * This is used to determine the file type when neither its dir_index/item or
2678  * inode_item exists.
2679  *
2680  * This will *NOT* report error, if any error happens, just consider it does
2681  * not have any normal file extent.
2682  */
2683 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2684 {
2685         struct btrfs_path *path;
2686         struct btrfs_key key;
2687         struct btrfs_key found_key;
2688         struct btrfs_file_extent_item *fi;
2689         u8 type;
2690         int ret = 0;
2691
2692         path = btrfs_alloc_path();
2693         if (!path)
2694                 goto out;
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2705                 ret = btrfs_next_leaf(root, path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2713                                       path->slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path->nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_free_path(path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path *path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         path = btrfs_alloc_path();
2930         if (!path)
2931                 return -ENOMEM;
2932
2933         /*
2934          * For nlink repair, it may create a dir and add link, so
2935          * 2 for parent(256)'s dir_index and dir_item
2936          * 2 for lost+found dir's inode_item and inode_ref
2937          * 1 for the new inode_ref of the file
2938          * 2 for lost+found dir's dir_index and dir_item for the file
2939          */
2940         trans = btrfs_start_transaction(root, 7);
2941         if (IS_ERR(trans)) {
2942                 btrfs_free_path(path);
2943                 return PTR_ERR(trans);
2944         }
2945
2946         if (rec->errors & I_ERR_NO_INODE_ITEM)
2947                 ret = repair_inode_no_item(trans, root, path, rec);
2948         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2949                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2950         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2951                 ret = repair_inode_discount_extent(trans, root, path, rec);
2952         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2953                 ret = repair_inode_isize(trans, root, path, rec);
2954         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2955                 ret = repair_inode_orphan_item(trans, root, path, rec);
2956         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2957                 ret = repair_inode_nlinks(trans, root, path, rec);
2958         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2959                 ret = repair_inode_nbytes(trans, root, path, rec);
2960         btrfs_commit_transaction(trans, root);
2961         btrfs_free_path(path);
2962         return ret;
2963 }
2964
2965 static int check_inode_recs(struct btrfs_root *root,
2966                             struct cache_tree *inode_cache)
2967 {
2968         struct cache_extent *cache;
2969         struct ptr_node *node;
2970         struct inode_record *rec;
2971         struct inode_backref *backref;
2972         int stage = 0;
2973         int ret = 0;
2974         int err = 0;
2975         u64 error = 0;
2976         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2977
2978         if (btrfs_root_refs(&root->root_item) == 0) {
2979                 if (!cache_tree_empty(inode_cache))
2980                         fprintf(stderr, "warning line %d\n", __LINE__);
2981                 return 0;
2982         }
2983
2984         /*
2985          * We need to record the highest inode number for later 'lost+found'
2986          * dir creation.
2987          * We must select an ino not used/referred by any existing inode, or
2988          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2989          * this may cause 'lost+found' dir has wrong nlinks.
2990          */
2991         cache = last_cache_extent(inode_cache);
2992         if (cache) {
2993                 node = container_of(cache, struct ptr_node, cache);
2994                 rec = node->data;
2995                 if (rec->ino > root->highest_inode)
2996                         root->highest_inode = rec->ino;
2997         }
2998
2999         /*
3000          * We need to repair backrefs first because we could change some of the
3001          * errors in the inode recs.
3002          *
3003          * We also need to go through and delete invalid backrefs first and then
3004          * add the correct ones second.  We do this because we may get EEXIST
3005          * when adding back the correct index because we hadn't yet deleted the
3006          * invalid index.
3007          *
3008          * For example, if we were missing a dir index then the directories
3009          * isize would be wrong, so if we fixed the isize to what we thought it
3010          * would be and then fixed the backref we'd still have a invalid fs, so
3011          * we need to add back the dir index and then check to see if the isize
3012          * is still wrong.
3013          */
3014         while (stage < 3) {
3015                 stage++;
3016                 if (stage == 3 && !err)
3017                         break;
3018
3019                 cache = search_cache_extent(inode_cache, 0);
3020                 while (repair && cache) {
3021                         node = container_of(cache, struct ptr_node, cache);
3022                         rec = node->data;
3023                         cache = next_cache_extent(cache);
3024
3025                         /* Need to free everything up and rescan */
3026                         if (stage == 3) {
3027                                 remove_cache_extent(inode_cache, &node->cache);
3028                                 free(node);
3029                                 free_inode_rec(rec);
3030                                 continue;
3031                         }
3032
3033                         if (list_empty(&rec->backrefs))
3034                                 continue;
3035
3036                         ret = repair_inode_backrefs(root, rec, inode_cache,
3037                                                     stage == 1);
3038                         if (ret < 0) {
3039                                 err = ret;
3040                                 stage = 2;
3041                                 break;
3042                         } if (ret > 0) {
3043                                 err = -EAGAIN;
3044                         }
3045                 }
3046         }
3047         if (err)
3048                 return err;
3049
3050         rec = get_inode_rec(inode_cache, root_dirid, 0);
3051         BUG_ON(IS_ERR(rec));
3052         if (rec) {
3053                 ret = check_root_dir(rec);
3054                 if (ret) {
3055                         fprintf(stderr, "root %llu root dir %llu error\n",
3056                                 (unsigned long long)root->root_key.objectid,
3057                                 (unsigned long long)root_dirid);
3058                         print_inode_error(root, rec);
3059                         error++;
3060                 }
3061         } else {
3062                 if (repair) {
3063                         struct btrfs_trans_handle *trans;
3064
3065                         trans = btrfs_start_transaction(root, 1);
3066                         if (IS_ERR(trans)) {
3067                                 err = PTR_ERR(trans);
3068                                 return err;
3069                         }
3070
3071                         fprintf(stderr,
3072                                 "root %llu missing its root dir, recreating\n",
3073                                 (unsigned long long)root->objectid);
3074
3075                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3076                         BUG_ON(ret);
3077
3078                         btrfs_commit_transaction(trans, root);
3079                         return -EAGAIN;
3080                 }
3081
3082                 fprintf(stderr, "root %llu root dir %llu not found\n",
3083                         (unsigned long long)root->root_key.objectid,
3084                         (unsigned long long)root_dirid);
3085         }
3086
3087         while (1) {
3088                 cache = search_cache_extent(inode_cache, 0);
3089                 if (!cache)
3090                         break;
3091                 node = container_of(cache, struct ptr_node, cache);
3092                 rec = node->data;
3093                 remove_cache_extent(inode_cache, &node->cache);
3094                 free(node);
3095                 if (rec->ino == root_dirid ||
3096                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3097                         free_inode_rec(rec);
3098                         continue;
3099                 }
3100
3101                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3102                         ret = check_orphan_item(root, rec->ino);
3103                         if (ret == 0)
3104                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3105                         if (can_free_inode_rec(rec)) {
3106                                 free_inode_rec(rec);
3107                                 continue;
3108                         }
3109                 }
3110
3111                 if (!rec->found_inode_item)
3112                         rec->errors |= I_ERR_NO_INODE_ITEM;
3113                 if (rec->found_link != rec->nlink)
3114                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3115                 if (repair) {
3116                         ret = try_repair_inode(root, rec);
3117                         if (ret == 0 && can_free_inode_rec(rec)) {
3118                                 free_inode_rec(rec);
3119                                 continue;
3120                         }
3121                         ret = 0;
3122                 }
3123
3124                 if (!(repair && ret == 0))
3125                         error++;
3126                 print_inode_error(root, rec);
3127                 list_for_each_entry(backref, &rec->backrefs, list) {
3128                         if (!backref->found_dir_item)
3129                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3130                         if (!backref->found_dir_index)
3131                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3132                         if (!backref->found_inode_ref)
3133                                 backref->errors |= REF_ERR_NO_INODE_REF;
3134                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3135                                 " namelen %u name %s filetype %d errors %x",
3136                                 (unsigned long long)backref->dir,
3137                                 (unsigned long long)backref->index,
3138                                 backref->namelen, backref->name,
3139                                 backref->filetype, backref->errors);
3140                         print_ref_error(backref->errors);
3141                 }
3142                 free_inode_rec(rec);
3143         }
3144         return (error > 0) ? -1 : 0;
3145 }
3146
3147 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3148                                         u64 objectid)
3149 {
3150         struct cache_extent *cache;
3151         struct root_record *rec = NULL;
3152         int ret;
3153
3154         cache = lookup_cache_extent(root_cache, objectid, 1);
3155         if (cache) {
3156                 rec = container_of(cache, struct root_record, cache);
3157         } else {
3158                 rec = calloc(1, sizeof(*rec));
3159                 if (!rec)
3160                         return ERR_PTR(-ENOMEM);
3161                 rec->objectid = objectid;
3162                 INIT_LIST_HEAD(&rec->backrefs);
3163                 rec->cache.start = objectid;
3164                 rec->cache.size = 1;
3165
3166                 ret = insert_cache_extent(root_cache, &rec->cache);
3167                 if (ret)
3168                         return ERR_PTR(-EEXIST);
3169         }
3170         return rec;
3171 }
3172
3173 static struct root_backref *get_root_backref(struct root_record *rec,
3174                                              u64 ref_root, u64 dir, u64 index,
3175                                              const char *name, int namelen)
3176 {
3177         struct root_backref *backref;
3178
3179         list_for_each_entry(backref, &rec->backrefs, list) {
3180                 if (backref->ref_root != ref_root || backref->dir != dir ||
3181                     backref->namelen != namelen)
3182                         continue;
3183                 if (memcmp(name, backref->name, namelen))
3184                         continue;
3185                 return backref;
3186         }
3187
3188         backref = calloc(1, sizeof(*backref) + namelen + 1);
3189         if (!backref)
3190                 return NULL;
3191         backref->ref_root = ref_root;
3192         backref->dir = dir;
3193         backref->index = index;
3194         backref->namelen = namelen;
3195         memcpy(backref->name, name, namelen);
3196         backref->name[namelen] = '\0';
3197         list_add_tail(&backref->list, &rec->backrefs);
3198         return backref;
3199 }
3200
3201 static void free_root_record(struct cache_extent *cache)
3202 {
3203         struct root_record *rec;
3204         struct root_backref *backref;
3205
3206         rec = container_of(cache, struct root_record, cache);
3207         while (!list_empty(&rec->backrefs)) {
3208                 backref = to_root_backref(rec->backrefs.next);
3209                 list_del(&backref->list);
3210                 free(backref);
3211         }
3212
3213         free(rec);
3214 }
3215
3216 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3217
3218 static int add_root_backref(struct cache_tree *root_cache,
3219                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3220                             const char *name, int namelen,
3221                             int item_type, int errors)
3222 {
3223         struct root_record *rec;
3224         struct root_backref *backref;
3225
3226         rec = get_root_rec(root_cache, root_id);
3227         BUG_ON(IS_ERR(rec));
3228         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3229         BUG_ON(!backref);
3230
3231         backref->errors |= errors;
3232
3233         if (item_type != BTRFS_DIR_ITEM_KEY) {
3234                 if (backref->found_dir_index || backref->found_back_ref ||
3235                     backref->found_forward_ref) {
3236                         if (backref->index != index)
3237                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3238                 } else {
3239                         backref->index = index;
3240                 }
3241         }
3242
3243         if (item_type == BTRFS_DIR_ITEM_KEY) {
3244                 if (backref->found_forward_ref)
3245                         rec->found_ref++;
3246                 backref->found_dir_item = 1;
3247         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3248                 backref->found_dir_index = 1;
3249         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3250                 if (backref->found_forward_ref)
3251                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3252                 else if (backref->found_dir_item)
3253                         rec->found_ref++;
3254                 backref->found_forward_ref = 1;
3255         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3256                 if (backref->found_back_ref)
3257                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3258                 backref->found_back_ref = 1;
3259         } else {
3260                 BUG_ON(1);
3261         }
3262
3263         if (backref->found_forward_ref && backref->found_dir_item)
3264                 backref->reachable = 1;
3265         return 0;
3266 }
3267
3268 static int merge_root_recs(struct btrfs_root *root,
3269                            struct cache_tree *src_cache,
3270                            struct cache_tree *dst_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int ret = 0;
3277
3278         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3279                 free_inode_recs_tree(src_cache);
3280                 return 0;
3281         }
3282
3283         while (1) {
3284                 cache = search_cache_extent(src_cache, 0);
3285                 if (!cache)
3286                         break;
3287                 node = container_of(cache, struct ptr_node, cache);
3288                 rec = node->data;
3289                 remove_cache_extent(src_cache, &node->cache);
3290                 free(node);
3291
3292                 ret = is_child_root(root, root->objectid, rec->ino);
3293                 if (ret < 0)
3294                         break;
3295                 else if (ret == 0)
3296                         goto skip;
3297
3298                 list_for_each_entry(backref, &rec->backrefs, list) {
3299                         BUG_ON(backref->found_inode_ref);
3300                         if (backref->found_dir_item)
3301                                 add_root_backref(dst_cache, rec->ino,
3302                                         root->root_key.objectid, backref->dir,
3303                                         backref->index, backref->name,
3304                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3305                                         backref->errors);
3306                         if (backref->found_dir_index)
3307                                 add_root_backref(dst_cache, rec->ino,
3308                                         root->root_key.objectid, backref->dir,
3309                                         backref->index, backref->name,
3310                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3311                                         backref->errors);
3312                 }
3313 skip:
3314                 free_inode_rec(rec);
3315         }
3316         if (ret < 0)
3317                 return ret;
3318         return 0;
3319 }
3320
3321 static int check_root_refs(struct btrfs_root *root,
3322                            struct cache_tree *root_cache)
3323 {
3324         struct root_record *rec;
3325         struct root_record *ref_root;
3326         struct root_backref *backref;
3327         struct cache_extent *cache;
3328         int loop = 1;
3329         int ret;
3330         int error;
3331         int errors = 0;
3332
3333         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3334         BUG_ON(IS_ERR(rec));
3335         rec->found_ref = 1;
3336
3337         /* fixme: this can not detect circular references */
3338         while (loop) {
3339                 loop = 0;
3340                 cache = search_cache_extent(root_cache, 0);
3341                 while (1) {
3342                         if (!cache)
3343                                 break;
3344                         rec = container_of(cache, struct root_record, cache);
3345                         cache = next_cache_extent(cache);
3346
3347                         if (rec->found_ref == 0)
3348                                 continue;
3349
3350                         list_for_each_entry(backref, &rec->backrefs, list) {
3351                                 if (!backref->reachable)
3352                                         continue;
3353
3354                                 ref_root = get_root_rec(root_cache,
3355                                                         backref->ref_root);
3356                                 BUG_ON(IS_ERR(ref_root));
3357                                 if (ref_root->found_ref > 0)
3358                                         continue;
3359
3360                                 backref->reachable = 0;
3361                                 rec->found_ref--;
3362                                 if (rec->found_ref == 0)
3363                                         loop = 1;
3364                         }
3365                 }
3366         }
3367
3368         cache = search_cache_extent(root_cache, 0);
3369         while (1) {
3370                 if (!cache)
3371                         break;
3372                 rec = container_of(cache, struct root_record, cache);
3373                 cache = next_cache_extent(cache);
3374
3375                 if (rec->found_ref == 0 &&
3376                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3377                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3378                         ret = check_orphan_item(root->fs_info->tree_root,
3379                                                 rec->objectid);
3380                         if (ret == 0)
3381                                 continue;
3382
3383                         /*
3384                          * If we don't have a root item then we likely just have
3385                          * a dir item in a snapshot for this root but no actual
3386                          * ref key or anything so it's meaningless.
3387                          */
3388                         if (!rec->found_root_item)
3389                                 continue;
3390                         errors++;
3391                         fprintf(stderr, "fs tree %llu not referenced\n",
3392                                 (unsigned long long)rec->objectid);
3393                 }
3394
3395                 error = 0;
3396                 if (rec->found_ref > 0 && !rec->found_root_item)
3397                         error = 1;
3398                 list_for_each_entry(backref, &rec->backrefs, list) {
3399                         if (!backref->found_dir_item)
3400                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3401                         if (!backref->found_dir_index)
3402                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3403                         if (!backref->found_back_ref)
3404                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3405                         if (!backref->found_forward_ref)
3406                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3407                         if (backref->reachable && backref->errors)
3408                                 error = 1;
3409                 }
3410                 if (!error)
3411                         continue;
3412
3413                 errors++;
3414                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3415                         (unsigned long long)rec->objectid, rec->found_ref,
3416                          rec->found_root_item ? "" : "not found");
3417
3418                 list_for_each_entry(backref, &rec->backrefs, list) {
3419                         if (!backref->reachable)
3420                                 continue;
3421                         if (!backref->errors && rec->found_root_item)
3422                                 continue;
3423                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3424                                 " index %llu namelen %u name %s errors %x\n",
3425                                 (unsigned long long)backref->ref_root,
3426                                 (unsigned long long)backref->dir,
3427                                 (unsigned long long)backref->index,
3428                                 backref->namelen, backref->name,
3429                                 backref->errors);
3430                         print_ref_error(backref->errors);
3431                 }
3432         }
3433         return errors > 0 ? 1 : 0;
3434 }
3435
3436 static int process_root_ref(struct extent_buffer *eb, int slot,
3437                             struct btrfs_key *key,
3438                             struct cache_tree *root_cache)
3439 {
3440         u64 dirid;
3441         u64 index;
3442         u32 len;
3443         u32 name_len;
3444         struct btrfs_root_ref *ref;
3445         char namebuf[BTRFS_NAME_LEN];
3446         int error;
3447
3448         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3449
3450         dirid = btrfs_root_ref_dirid(eb, ref);
3451         index = btrfs_root_ref_sequence(eb, ref);
3452         name_len = btrfs_root_ref_name_len(eb, ref);
3453
3454         if (name_len <= BTRFS_NAME_LEN) {
3455                 len = name_len;
3456                 error = 0;
3457         } else {
3458                 len = BTRFS_NAME_LEN;
3459                 error = REF_ERR_NAME_TOO_LONG;
3460         }
3461         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3462
3463         if (key->type == BTRFS_ROOT_REF_KEY) {
3464                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3465                                  index, namebuf, len, key->type, error);
3466         } else {
3467                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3468                                  index, namebuf, len, key->type, error);
3469         }
3470         return 0;
3471 }
3472
3473 static void free_corrupt_block(struct cache_extent *cache)
3474 {
3475         struct btrfs_corrupt_block *corrupt;
3476
3477         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3478         free(corrupt);
3479 }
3480
3481 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3482
3483 /*
3484  * Repair the btree of the given root.
3485  *
3486  * The fix is to remove the node key in corrupt_blocks cache_tree.
3487  * and rebalance the tree.
3488  * After the fix, the btree should be writeable.
3489  */
3490 static int repair_btree(struct btrfs_root *root,
3491                         struct cache_tree *corrupt_blocks)
3492 {
3493         struct btrfs_trans_handle *trans;
3494         struct btrfs_path *path;
3495         struct btrfs_corrupt_block *corrupt;
3496         struct cache_extent *cache;
3497         struct btrfs_key key;
3498         u64 offset;
3499         int level;
3500         int ret = 0;
3501
3502         if (cache_tree_empty(corrupt_blocks))
3503                 return 0;
3504
3505         path = btrfs_alloc_path();
3506         if (!path)
3507                 return -ENOMEM;
3508
3509         trans = btrfs_start_transaction(root, 1);
3510         if (IS_ERR(trans)) {
3511                 ret = PTR_ERR(trans);
3512                 fprintf(stderr, "Error starting transaction: %s\n",
3513                         strerror(-ret));
3514                 goto out_free_path;
3515         }
3516         cache = first_cache_extent(corrupt_blocks);
3517         while (cache) {
3518                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3519                                        cache);
3520                 level = corrupt->level;
3521                 path->lowest_level = level;
3522                 key.objectid = corrupt->key.objectid;
3523                 key.type = corrupt->key.type;
3524                 key.offset = corrupt->key.offset;
3525
3526                 /*
3527                  * Here we don't want to do any tree balance, since it may
3528                  * cause a balance with corrupted brother leaf/node,
3529                  * so ins_len set to 0 here.
3530                  * Balance will be done after all corrupt node/leaf is deleted.
3531                  */
3532                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3533                 if (ret < 0)
3534                         goto out;
3535                 offset = btrfs_node_blockptr(path->nodes[level],
3536                                              path->slots[level]);
3537
3538                 /* Remove the ptr */
3539                 ret = btrfs_del_ptr(trans, root, path, level,
3540                                     path->slots[level]);
3541                 if (ret < 0)
3542                         goto out;
3543                 /*
3544                  * Remove the corresponding extent
3545                  * return value is not concerned.
3546                  */
3547                 btrfs_release_path(path);
3548                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3549                                         0, root->root_key.objectid,
3550                                         level - 1, 0);
3551                 cache = next_cache_extent(cache);
3552         }
3553
3554         /* Balance the btree using btrfs_search_slot() */
3555         cache = first_cache_extent(corrupt_blocks);
3556         while (cache) {
3557                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3558                                        cache);
3559                 memcpy(&key, &corrupt->key, sizeof(key));
3560                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3561                 if (ret < 0)
3562                         goto out;
3563                 /* return will always >0 since it won't find the item */
3564                 ret = 0;
3565                 btrfs_release_path(path);
3566                 cache = next_cache_extent(cache);
3567         }
3568 out:
3569         btrfs_commit_transaction(trans, root);
3570 out_free_path:
3571         btrfs_free_path(path);
3572         return ret;
3573 }
3574
3575 static int check_fs_root(struct btrfs_root *root,
3576                          struct cache_tree *root_cache,
3577                          struct walk_control *wc)
3578 {
3579         int ret = 0;
3580         int err = 0;
3581         int wret;
3582         int level;
3583         struct btrfs_path path;
3584         struct shared_node root_node;
3585         struct root_record *rec;
3586         struct btrfs_root_item *root_item = &root->root_item;
3587         struct cache_tree corrupt_blocks;
3588         struct orphan_data_extent *orphan;
3589         struct orphan_data_extent *tmp;
3590         enum btrfs_tree_block_status status;
3591         struct node_refs nrefs;
3592
3593         /*
3594          * Reuse the corrupt_block cache tree to record corrupted tree block
3595          *
3596          * Unlike the usage in extent tree check, here we do it in a per
3597          * fs/subvol tree base.
3598          */
3599         cache_tree_init(&corrupt_blocks);
3600         root->fs_info->corrupt_blocks = &corrupt_blocks;
3601
3602         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3603                 rec = get_root_rec(root_cache, root->root_key.objectid);
3604                 BUG_ON(IS_ERR(rec));
3605                 if (btrfs_root_refs(root_item) > 0)
3606                         rec->found_root_item = 1;
3607         }
3608
3609         btrfs_init_path(&path);
3610         memset(&root_node, 0, sizeof(root_node));
3611         cache_tree_init(&root_node.root_cache);
3612         cache_tree_init(&root_node.inode_cache);
3613         memset(&nrefs, 0, sizeof(nrefs));
3614
3615         /* Move the orphan extent record to corresponding inode_record */
3616         list_for_each_entry_safe(orphan, tmp,
3617                                  &root->orphan_data_extents, list) {
3618                 struct inode_record *inode;
3619
3620                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3621                                       1);
3622                 BUG_ON(IS_ERR(inode));
3623                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3624                 list_move(&orphan->list, &inode->orphan_extents);
3625         }
3626
3627         level = btrfs_header_level(root->node);
3628         memset(wc->nodes, 0, sizeof(wc->nodes));
3629         wc->nodes[level] = &root_node;
3630         wc->active_node = level;
3631         wc->root_level = level;
3632
3633         /* We may not have checked the root block, lets do that now */
3634         if (btrfs_is_leaf(root->node))
3635                 status = btrfs_check_leaf(root, NULL, root->node);
3636         else
3637                 status = btrfs_check_node(root, NULL, root->node);
3638         if (status != BTRFS_TREE_BLOCK_CLEAN)
3639                 return -EIO;
3640
3641         if (btrfs_root_refs(root_item) > 0 ||
3642             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3643                 path.nodes[level] = root->node;
3644                 extent_buffer_get(root->node);
3645                 path.slots[level] = 0;
3646         } else {
3647                 struct btrfs_key key;
3648                 struct btrfs_disk_key found_key;
3649
3650                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3651                 level = root_item->drop_level;
3652                 path.lowest_level = level;
3653                 if (level > btrfs_header_level(root->node) ||
3654                     level >= BTRFS_MAX_LEVEL) {
3655                         error("ignoring invalid drop level: %u", level);
3656                         goto skip_walking;
3657                 }
3658                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3659                 if (wret < 0)
3660                         goto skip_walking;
3661                 btrfs_node_key(path.nodes[level], &found_key,
3662                                 path.slots[level]);
3663                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3664                                         sizeof(found_key)));
3665         }
3666
3667         while (1) {
3668                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3669                 if (wret < 0)
3670                         ret = wret;
3671                 if (wret != 0)
3672                         break;
3673
3674                 wret = walk_up_tree(root, &path, wc, &level);
3675                 if (wret < 0)
3676                         ret = wret;
3677                 if (wret != 0)
3678                         break;
3679         }
3680 skip_walking:
3681         btrfs_release_path(&path);
3682
3683         if (!cache_tree_empty(&corrupt_blocks)) {
3684                 struct cache_extent *cache;
3685                 struct btrfs_corrupt_block *corrupt;
3686
3687                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3688                        root->root_key.objectid);
3689                 cache = first_cache_extent(&corrupt_blocks);
3690                 while (cache) {
3691                         corrupt = container_of(cache,
3692                                                struct btrfs_corrupt_block,
3693                                                cache);
3694                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3695                                cache->start, corrupt->level,
3696                                corrupt->key.objectid, corrupt->key.type,
3697                                corrupt->key.offset);
3698                         cache = next_cache_extent(cache);
3699                 }
3700                 if (repair) {
3701                         printf("Try to repair the btree for root %llu\n",
3702                                root->root_key.objectid);
3703                         ret = repair_btree(root, &corrupt_blocks);
3704                         if (ret < 0)
3705                                 fprintf(stderr, "Failed to repair btree: %s\n",
3706                                         strerror(-ret));
3707                         if (!ret)
3708                                 printf("Btree for root %llu is fixed\n",
3709                                        root->root_key.objectid);
3710                 }
3711         }
3712
3713         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3714         if (err < 0)
3715                 ret = err;
3716
3717         if (root_node.current) {
3718                 root_node.current->checked = 1;
3719                 maybe_free_inode_rec(&root_node.inode_cache,
3720                                 root_node.current);
3721         }
3722
3723         err = check_inode_recs(root, &root_node.inode_cache);
3724         if (!ret)
3725                 ret = err;
3726
3727         free_corrupt_blocks_tree(&corrupt_blocks);
3728         root->fs_info->corrupt_blocks = NULL;
3729         free_orphan_data_extents(&root->orphan_data_extents);
3730         return ret;
3731 }
3732
3733 static int fs_root_objectid(u64 objectid)
3734 {
3735         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3736             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3737                 return 1;
3738         return is_fstree(objectid);
3739 }
3740
3741 static int check_fs_roots(struct btrfs_root *root,
3742                           struct cache_tree *root_cache)
3743 {
3744         struct btrfs_path path;
3745         struct btrfs_key key;
3746         struct walk_control wc;
3747         struct extent_buffer *leaf, *tree_node;
3748         struct btrfs_root *tmp_root;
3749         struct btrfs_root *tree_root = root->fs_info->tree_root;
3750         int ret;
3751         int err = 0;
3752
3753         if (ctx.progress_enabled) {
3754                 ctx.tp = TASK_FS_ROOTS;
3755                 task_start(ctx.info);
3756         }
3757
3758         /*
3759          * Just in case we made any changes to the extent tree that weren't
3760          * reflected into the free space cache yet.
3761          */
3762         if (repair)
3763                 reset_cached_block_groups(root->fs_info);
3764         memset(&wc, 0, sizeof(wc));
3765         cache_tree_init(&wc.shared);
3766         btrfs_init_path(&path);
3767
3768 again:
3769         key.offset = 0;
3770         key.objectid = 0;
3771         key.type = BTRFS_ROOT_ITEM_KEY;
3772         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3773         if (ret < 0) {
3774                 err = 1;
3775                 goto out;
3776         }
3777         tree_node = tree_root->node;
3778         while (1) {
3779                 if (tree_node != tree_root->node) {
3780                         free_root_recs_tree(root_cache);
3781                         btrfs_release_path(&path);
3782                         goto again;
3783                 }
3784                 leaf = path.nodes[0];
3785                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3786                         ret = btrfs_next_leaf(tree_root, &path);
3787                         if (ret) {
3788                                 if (ret < 0)
3789                                         err = 1;
3790                                 break;
3791                         }
3792                         leaf = path.nodes[0];
3793                 }
3794                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3795                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3796                     fs_root_objectid(key.objectid)) {
3797                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3798                                 tmp_root = btrfs_read_fs_root_no_cache(
3799                                                 root->fs_info, &key);
3800                         } else {
3801                                 key.offset = (u64)-1;
3802                                 tmp_root = btrfs_read_fs_root(
3803                                                 root->fs_info, &key);
3804                         }
3805                         if (IS_ERR(tmp_root)) {
3806                                 err = 1;
3807                                 goto next;
3808                         }
3809                         ret = check_fs_root(tmp_root, root_cache, &wc);
3810                         if (ret == -EAGAIN) {
3811                                 free_root_recs_tree(root_cache);
3812                                 btrfs_release_path(&path);
3813                                 goto again;
3814                         }
3815                         if (ret)
3816                                 err = 1;
3817                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3818                                 btrfs_free_fs_root(tmp_root);
3819                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3820                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3821                         process_root_ref(leaf, path.slots[0], &key,
3822                                          root_cache);
3823                 }
3824 next:
3825                 path.slots[0]++;
3826         }
3827 out:
3828         btrfs_release_path(&path);
3829         if (err)
3830                 free_extent_cache_tree(&wc.shared);
3831         if (!cache_tree_empty(&wc.shared))
3832                 fprintf(stderr, "warning line %d\n", __LINE__);
3833
3834         task_stop(ctx.info);
3835
3836         return err;
3837 }
3838
3839 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3840 {
3841         struct list_head *cur = rec->backrefs.next;
3842         struct extent_backref *back;
3843         struct tree_backref *tback;
3844         struct data_backref *dback;
3845         u64 found = 0;
3846         int err = 0;
3847
3848         while(cur != &rec->backrefs) {
3849                 back = to_extent_backref(cur);
3850                 cur = cur->next;
3851                 if (!back->found_extent_tree) {
3852                         err = 1;
3853                         if (!print_errs)
3854                                 goto out;
3855                         if (back->is_data) {
3856                                 dback = to_data_backref(back);
3857                                 fprintf(stderr, "Backref %llu %s %llu"
3858                                         " owner %llu offset %llu num_refs %lu"
3859                                         " not found in extent tree\n",
3860                                         (unsigned long long)rec->start,
3861                                         back->full_backref ?
3862                                         "parent" : "root",
3863                                         back->full_backref ?
3864                                         (unsigned long long)dback->parent:
3865                                         (unsigned long long)dback->root,
3866                                         (unsigned long long)dback->owner,
3867                                         (unsigned long long)dback->offset,
3868                                         (unsigned long)dback->num_refs);
3869                         } else {
3870                                 tback = to_tree_backref(back);
3871                                 fprintf(stderr, "Backref %llu parent %llu"
3872                                         " root %llu not found in extent tree\n",
3873                                         (unsigned long long)rec->start,
3874                                         (unsigned long long)tback->parent,
3875                                         (unsigned long long)tback->root);
3876                         }
3877                 }
3878                 if (!back->is_data && !back->found_ref) {
3879                         err = 1;
3880                         if (!print_errs)
3881                                 goto out;
3882                         tback = to_tree_backref(back);
3883                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3884                                 (unsigned long long)rec->start,
3885                                 back->full_backref ? "parent" : "root",
3886                                 back->full_backref ?
3887                                 (unsigned long long)tback->parent :
3888                                 (unsigned long long)tback->root, back);
3889                 }
3890                 if (back->is_data) {
3891                         dback = to_data_backref(back);
3892                         if (dback->found_ref != dback->num_refs) {
3893                                 err = 1;
3894                                 if (!print_errs)
3895                                         goto out;
3896                                 fprintf(stderr, "Incorrect local backref count"
3897                                         " on %llu %s %llu owner %llu"
3898                                         " offset %llu found %u wanted %u back %p\n",
3899                                         (unsigned long long)rec->start,
3900                                         back->full_backref ?
3901                                         "parent" : "root",
3902                                         back->full_backref ?
3903                                         (unsigned long long)dback->parent:
3904                                         (unsigned long long)dback->root,
3905                                         (unsigned long long)dback->owner,
3906                                         (unsigned long long)dback->offset,
3907                                         dback->found_ref, dback->num_refs, back);
3908                         }
3909                         if (dback->disk_bytenr != rec->start) {
3910                                 err = 1;
3911                                 if (!print_errs)
3912                                         goto out;
3913                                 fprintf(stderr, "Backref disk bytenr does not"
3914                                         " match extent record, bytenr=%llu, "
3915                                         "ref bytenr=%llu\n",
3916                                         (unsigned long long)rec->start,
3917                                         (unsigned long long)dback->disk_bytenr);
3918                         }
3919
3920                         if (dback->bytes != rec->nr) {
3921                                 err = 1;
3922                                 if (!print_errs)
3923                                         goto out;
3924                                 fprintf(stderr, "Backref bytes do not match "
3925                                         "extent backref, bytenr=%llu, ref "
3926                                         "bytes=%llu, backref bytes=%llu\n",
3927                                         (unsigned long long)rec->start,
3928                                         (unsigned long long)rec->nr,
3929                                         (unsigned long long)dback->bytes);
3930                         }
3931                 }
3932                 if (!back->is_data) {
3933                         found += 1;
3934                 } else {
3935                         dback = to_data_backref(back);
3936                         found += dback->found_ref;
3937                 }
3938         }
3939         if (found != rec->refs) {
3940                 err = 1;
3941                 if (!print_errs)
3942                         goto out;
3943                 fprintf(stderr, "Incorrect global backref count "
3944                         "on %llu found %llu wanted %llu\n",
3945                         (unsigned long long)rec->start,
3946                         (unsigned long long)found,
3947                         (unsigned long long)rec->refs);
3948         }
3949 out:
3950         return err;
3951 }
3952
3953 static int free_all_extent_backrefs(struct extent_record *rec)
3954 {
3955         struct extent_backref *back;
3956         struct list_head *cur;
3957         while (!list_empty(&rec->backrefs)) {
3958                 cur = rec->backrefs.next;
3959                 back = to_extent_backref(cur);
3960                 list_del(cur);
3961                 free(back);
3962         }
3963         return 0;
3964 }
3965
3966 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3967                                      struct cache_tree *extent_cache)
3968 {
3969         struct cache_extent *cache;
3970         struct extent_record *rec;
3971
3972         while (1) {
3973                 cache = first_cache_extent(extent_cache);
3974                 if (!cache)
3975                         break;
3976                 rec = container_of(cache, struct extent_record, cache);
3977                 remove_cache_extent(extent_cache, cache);
3978                 free_all_extent_backrefs(rec);
3979                 free(rec);
3980         }
3981 }
3982
3983 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3984                                  struct extent_record *rec)
3985 {
3986         if (rec->content_checked && rec->owner_ref_checked &&
3987             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3988             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3989             !rec->bad_full_backref && !rec->crossing_stripes &&
3990             !rec->wrong_chunk_type) {
3991                 remove_cache_extent(extent_cache, &rec->cache);
3992                 free_all_extent_backrefs(rec);
3993                 list_del_init(&rec->list);
3994                 free(rec);
3995         }
3996         return 0;
3997 }
3998
3999 static int check_owner_ref(struct btrfs_root *root,
4000                             struct extent_record *rec,
4001                             struct extent_buffer *buf)
4002 {
4003         struct extent_backref *node;
4004         struct tree_backref *back;
4005         struct btrfs_root *ref_root;
4006         struct btrfs_key key;
4007         struct btrfs_path path;
4008         struct extent_buffer *parent;
4009         int level;
4010         int found = 0;
4011         int ret;
4012
4013         list_for_each_entry(node, &rec->backrefs, list) {
4014                 if (node->is_data)
4015                         continue;
4016                 if (!node->found_ref)
4017                         continue;
4018                 if (node->full_backref)
4019                         continue;
4020                 back = to_tree_backref(node);
4021                 if (btrfs_header_owner(buf) == back->root)
4022                         return 0;
4023         }
4024         BUG_ON(rec->is_root);
4025
4026         /* try to find the block by search corresponding fs tree */
4027         key.objectid = btrfs_header_owner(buf);
4028         key.type = BTRFS_ROOT_ITEM_KEY;
4029         key.offset = (u64)-1;
4030
4031         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4032         if (IS_ERR(ref_root))
4033                 return 1;
4034
4035         level = btrfs_header_level(buf);
4036         if (level == 0)
4037                 btrfs_item_key_to_cpu(buf, &key, 0);
4038         else
4039                 btrfs_node_key_to_cpu(buf, &key, 0);
4040
4041         btrfs_init_path(&path);
4042         path.lowest_level = level + 1;
4043         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4044         if (ret < 0)
4045                 return 0;
4046
4047         parent = path.nodes[level + 1];
4048         if (parent && buf->start == btrfs_node_blockptr(parent,
4049                                                         path.slots[level + 1]))
4050                 found = 1;
4051
4052         btrfs_release_path(&path);
4053         return found ? 0 : 1;
4054 }
4055
4056 static int is_extent_tree_record(struct extent_record *rec)
4057 {
4058         struct list_head *cur = rec->backrefs.next;
4059         struct extent_backref *node;
4060         struct tree_backref *back;
4061         int is_extent = 0;
4062
4063         while(cur != &rec->backrefs) {
4064                 node = to_extent_backref(cur);
4065                 cur = cur->next;
4066                 if (node->is_data)
4067                         return 0;
4068                 back = to_tree_backref(node);
4069                 if (node->full_backref)
4070                         return 0;
4071                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4072                         is_extent = 1;
4073         }
4074         return is_extent;
4075 }
4076
4077
4078 static int record_bad_block_io(struct btrfs_fs_info *info,
4079                                struct cache_tree *extent_cache,
4080                                u64 start, u64 len)
4081 {
4082         struct extent_record *rec;
4083         struct cache_extent *cache;
4084         struct btrfs_key key;
4085
4086         cache = lookup_cache_extent(extent_cache, start, len);
4087         if (!cache)
4088                 return 0;
4089
4090         rec = container_of(cache, struct extent_record, cache);
4091         if (!is_extent_tree_record(rec))
4092                 return 0;
4093
4094         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4095         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4096 }
4097
4098 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4099                        struct extent_buffer *buf, int slot)
4100 {
4101         if (btrfs_header_level(buf)) {
4102                 struct btrfs_key_ptr ptr1, ptr2;
4103
4104                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4105                                    sizeof(struct btrfs_key_ptr));
4106                 read_extent_buffer(buf, &ptr2,
4107                                    btrfs_node_key_ptr_offset(slot + 1),
4108                                    sizeof(struct btrfs_key_ptr));
4109                 write_extent_buffer(buf, &ptr1,
4110                                     btrfs_node_key_ptr_offset(slot + 1),
4111                                     sizeof(struct btrfs_key_ptr));
4112                 write_extent_buffer(buf, &ptr2,
4113                                     btrfs_node_key_ptr_offset(slot),
4114                                     sizeof(struct btrfs_key_ptr));
4115                 if (slot == 0) {
4116                         struct btrfs_disk_key key;
4117                         btrfs_node_key(buf, &key, 0);
4118                         btrfs_fixup_low_keys(root, path, &key,
4119                                              btrfs_header_level(buf) + 1);
4120                 }
4121         } else {
4122                 struct btrfs_item *item1, *item2;
4123                 struct btrfs_key k1, k2;
4124                 char *item1_data, *item2_data;
4125                 u32 item1_offset, item2_offset, item1_size, item2_size;
4126
4127                 item1 = btrfs_item_nr(slot);
4128                 item2 = btrfs_item_nr(slot + 1);
4129                 btrfs_item_key_to_cpu(buf, &k1, slot);
4130                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4131                 item1_offset = btrfs_item_offset(buf, item1);
4132                 item2_offset = btrfs_item_offset(buf, item2);
4133                 item1_size = btrfs_item_size(buf, item1);
4134                 item2_size = btrfs_item_size(buf, item2);
4135
4136                 item1_data = malloc(item1_size);
4137                 if (!item1_data)
4138                         return -ENOMEM;
4139                 item2_data = malloc(item2_size);
4140                 if (!item2_data) {
4141                         free(item1_data);
4142                         return -ENOMEM;
4143                 }
4144
4145                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4146                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4147
4148                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4149                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4150                 free(item1_data);
4151                 free(item2_data);
4152
4153                 btrfs_set_item_offset(buf, item1, item2_offset);
4154                 btrfs_set_item_offset(buf, item2, item1_offset);
4155                 btrfs_set_item_size(buf, item1, item2_size);
4156                 btrfs_set_item_size(buf, item2, item1_size);
4157
4158                 path->slots[0] = slot;
4159                 btrfs_set_item_key_unsafe(root, path, &k2);
4160                 path->slots[0] = slot + 1;
4161                 btrfs_set_item_key_unsafe(root, path, &k1);
4162         }
4163         return 0;
4164 }
4165
4166 static int fix_key_order(struct btrfs_trans_handle *trans,
4167                          struct btrfs_root *root,
4168                          struct btrfs_path *path)
4169 {
4170         struct extent_buffer *buf;
4171         struct btrfs_key k1, k2;
4172         int i;
4173         int level = path->lowest_level;
4174         int ret = -EIO;
4175
4176         buf = path->nodes[level];
4177         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4178                 if (level) {
4179                         btrfs_node_key_to_cpu(buf, &k1, i);
4180                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4181                 } else {
4182                         btrfs_item_key_to_cpu(buf, &k1, i);
4183                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4184                 }
4185                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4186                         continue;
4187                 ret = swap_values(root, path, buf, i);
4188                 if (ret)
4189                         break;
4190                 btrfs_mark_buffer_dirty(buf);
4191                 i = 0;
4192         }
4193         return ret;
4194 }
4195
4196 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4197                              struct btrfs_root *root,
4198                              struct btrfs_path *path,
4199                              struct extent_buffer *buf, int slot)
4200 {
4201         struct btrfs_key key;
4202         int nritems = btrfs_header_nritems(buf);
4203
4204         btrfs_item_key_to_cpu(buf, &key, slot);
4205
4206         /* These are all the keys we can deal with missing. */
4207         if (key.type != BTRFS_DIR_INDEX_KEY &&
4208             key.type != BTRFS_EXTENT_ITEM_KEY &&
4209             key.type != BTRFS_METADATA_ITEM_KEY &&
4210             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4211             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4212                 return -1;
4213
4214         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4215                (unsigned long long)key.objectid, key.type,
4216                (unsigned long long)key.offset, slot, buf->start);
4217         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4218                               btrfs_item_nr_offset(slot + 1),
4219                               sizeof(struct btrfs_item) *
4220                               (nritems - slot - 1));
4221         btrfs_set_header_nritems(buf, nritems - 1);
4222         if (slot == 0) {
4223                 struct btrfs_disk_key disk_key;
4224
4225                 btrfs_item_key(buf, &disk_key, 0);
4226                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4227         }
4228         btrfs_mark_buffer_dirty(buf);
4229         return 0;
4230 }
4231
4232 static int fix_item_offset(struct btrfs_trans_handle *trans,
4233                            struct btrfs_root *root,
4234                            struct btrfs_path *path)
4235 {
4236         struct extent_buffer *buf;
4237         int i;
4238         int ret = 0;
4239
4240         /* We should only get this for leaves */
4241         BUG_ON(path->lowest_level);
4242         buf = path->nodes[0];
4243 again:
4244         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4245                 unsigned int shift = 0, offset;
4246
4247                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4248                     BTRFS_LEAF_DATA_SIZE(root)) {
4249                         if (btrfs_item_end_nr(buf, i) >
4250                             BTRFS_LEAF_DATA_SIZE(root)) {
4251                                 ret = delete_bogus_item(trans, root, path,
4252                                                         buf, i);
4253                                 if (!ret)
4254                                         goto again;
4255                                 fprintf(stderr, "item is off the end of the "
4256                                         "leaf, can't fix\n");
4257                                 ret = -EIO;
4258                                 break;
4259                         }
4260                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4261                                 btrfs_item_end_nr(buf, i);
4262                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4263                            btrfs_item_offset_nr(buf, i - 1)) {
4264                         if (btrfs_item_end_nr(buf, i) >
4265                             btrfs_item_offset_nr(buf, i - 1)) {
4266                                 ret = delete_bogus_item(trans, root, path,
4267                                                         buf, i);
4268                                 if (!ret)
4269                                         goto again;
4270                                 fprintf(stderr, "items overlap, can't fix\n");
4271                                 ret = -EIO;
4272                                 break;
4273                         }
4274                         shift = btrfs_item_offset_nr(buf, i - 1) -
4275                                 btrfs_item_end_nr(buf, i);
4276                 }
4277                 if (!shift)
4278                         continue;
4279
4280                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4281                        i, shift, (unsigned long long)buf->start);
4282                 offset = btrfs_item_offset_nr(buf, i);
4283                 memmove_extent_buffer(buf,
4284                                       btrfs_leaf_data(buf) + offset + shift,
4285                                       btrfs_leaf_data(buf) + offset,
4286                                       btrfs_item_size_nr(buf, i));
4287                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4288                                       offset + shift);
4289                 btrfs_mark_buffer_dirty(buf);
4290         }
4291
4292         /*
4293          * We may have moved things, in which case we want to exit so we don't
4294          * write those changes out.  Once we have proper abort functionality in
4295          * progs this can be changed to something nicer.
4296          */
4297         BUG_ON(ret);
4298         return ret;
4299 }
4300
4301 /*
4302  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4303  * then just return -EIO.
4304  */
4305 static int try_to_fix_bad_block(struct btrfs_root *root,
4306                                 struct extent_buffer *buf,
4307                                 enum btrfs_tree_block_status status)
4308 {
4309         struct btrfs_trans_handle *trans;
4310         struct ulist *roots;
4311         struct ulist_node *node;
4312         struct btrfs_root *search_root;
4313         struct btrfs_path *path;
4314         struct ulist_iterator iter;
4315         struct btrfs_key root_key, key;
4316         int ret;
4317
4318         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4319             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4320                 return -EIO;
4321
4322         path = btrfs_alloc_path();
4323         if (!path)
4324                 return -EIO;
4325
4326         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4327                                    0, &roots);
4328         if (ret) {
4329                 btrfs_free_path(path);
4330                 return -EIO;
4331         }
4332
4333         ULIST_ITER_INIT(&iter);
4334         while ((node = ulist_next(roots, &iter))) {
4335                 root_key.objectid = node->val;
4336                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4337                 root_key.offset = (u64)-1;
4338
4339                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4340                 if (IS_ERR(root)) {
4341                         ret = -EIO;
4342                         break;
4343                 }
4344
4345
4346                 trans = btrfs_start_transaction(search_root, 0);
4347                 if (IS_ERR(trans)) {
4348                         ret = PTR_ERR(trans);
4349                         break;
4350                 }
4351
4352                 path->lowest_level = btrfs_header_level(buf);
4353                 path->skip_check_block = 1;
4354                 if (path->lowest_level)
4355                         btrfs_node_key_to_cpu(buf, &key, 0);
4356                 else
4357                         btrfs_item_key_to_cpu(buf, &key, 0);
4358                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4359                 if (ret) {
4360                         ret = -EIO;
4361                         btrfs_commit_transaction(trans, search_root);
4362                         break;
4363                 }
4364                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4365                         ret = fix_key_order(trans, search_root, path);
4366                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4367                         ret = fix_item_offset(trans, search_root, path);
4368                 if (ret) {
4369                         btrfs_commit_transaction(trans, search_root);
4370                         break;
4371                 }
4372                 btrfs_release_path(path);
4373                 btrfs_commit_transaction(trans, search_root);
4374         }
4375         ulist_free(roots);
4376         btrfs_free_path(path);
4377         return ret;
4378 }
4379
4380 static int check_block(struct btrfs_root *root,
4381                        struct cache_tree *extent_cache,
4382                        struct extent_buffer *buf, u64 flags)
4383 {
4384         struct extent_record *rec;
4385         struct cache_extent *cache;
4386         struct btrfs_key key;
4387         enum btrfs_tree_block_status status;
4388         int ret = 0;
4389         int level;
4390
4391         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4392         if (!cache)
4393                 return 1;
4394         rec = container_of(cache, struct extent_record, cache);
4395         rec->generation = btrfs_header_generation(buf);
4396
4397         level = btrfs_header_level(buf);
4398         if (btrfs_header_nritems(buf) > 0) {
4399
4400                 if (level == 0)
4401                         btrfs_item_key_to_cpu(buf, &key, 0);
4402                 else
4403                         btrfs_node_key_to_cpu(buf, &key, 0);
4404
4405                 rec->info_objectid = key.objectid;
4406         }
4407         rec->info_level = level;
4408
4409         if (btrfs_is_leaf(buf))
4410                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4411         else
4412                 status = btrfs_check_node(root, &rec->parent_key, buf);
4413
4414         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4415                 if (repair)
4416                         status = try_to_fix_bad_block(root, buf, status);
4417                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4418                         ret = -EIO;
4419                         fprintf(stderr, "bad block %llu\n",
4420                                 (unsigned long long)buf->start);
4421                 } else {
4422                         /*
4423                          * Signal to callers we need to start the scan over
4424                          * again since we'll have cowed blocks.
4425                          */
4426                         ret = -EAGAIN;
4427                 }
4428         } else {
4429                 rec->content_checked = 1;
4430                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4431                         rec->owner_ref_checked = 1;
4432                 else {
4433                         ret = check_owner_ref(root, rec, buf);
4434                         if (!ret)
4435                                 rec->owner_ref_checked = 1;
4436                 }
4437         }
4438         if (!ret)
4439                 maybe_free_extent_rec(extent_cache, rec);
4440         return ret;
4441 }
4442
4443 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4444                                                 u64 parent, u64 root)
4445 {
4446         struct list_head *cur = rec->backrefs.next;
4447         struct extent_backref *node;
4448         struct tree_backref *back;
4449
4450         while(cur != &rec->backrefs) {
4451                 node = to_extent_backref(cur);
4452                 cur = cur->next;
4453                 if (node->is_data)
4454                         continue;
4455                 back = to_tree_backref(node);
4456                 if (parent > 0) {
4457                         if (!node->full_backref)
4458                                 continue;
4459                         if (parent == back->parent)
4460                                 return back;
4461                 } else {
4462                         if (node->full_backref)
4463                                 continue;
4464                         if (back->root == root)
4465                                 return back;
4466                 }
4467         }
4468         return NULL;
4469 }
4470
4471 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4472                                                 u64 parent, u64 root)
4473 {
4474         struct tree_backref *ref = malloc(sizeof(*ref));
4475
4476         if (!ref)
4477                 return NULL;
4478         memset(&ref->node, 0, sizeof(ref->node));
4479         if (parent > 0) {
4480                 ref->parent = parent;
4481                 ref->node.full_backref = 1;
4482         } else {
4483                 ref->root = root;
4484                 ref->node.full_backref = 0;
4485         }
4486         list_add_tail(&ref->node.list, &rec->backrefs);
4487
4488         return ref;
4489 }
4490
4491 static struct data_backref *find_data_backref(struct extent_record *rec,
4492                                                 u64 parent, u64 root,
4493                                                 u64 owner, u64 offset,
4494                                                 int found_ref,
4495                                                 u64 disk_bytenr, u64 bytes)
4496 {
4497         struct list_head *cur = rec->backrefs.next;
4498         struct extent_backref *node;
4499         struct data_backref *back;
4500
4501         while(cur != &rec->backrefs) {
4502                 node = to_extent_backref(cur);
4503                 cur = cur->next;
4504                 if (!node->is_data)
4505                         continue;
4506                 back = to_data_backref(node);
4507                 if (parent > 0) {
4508                         if (!node->full_backref)
4509                                 continue;
4510                         if (parent == back->parent)
4511                                 return back;
4512                 } else {
4513                         if (node->full_backref)
4514                                 continue;
4515                         if (back->root == root && back->owner == owner &&
4516                             back->offset == offset) {
4517                                 if (found_ref && node->found_ref &&
4518                                     (back->bytes != bytes ||
4519                                     back->disk_bytenr != disk_bytenr))
4520                                         continue;
4521                                 return back;
4522                         }
4523                 }
4524         }
4525         return NULL;
4526 }
4527
4528 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root,
4530                                                 u64 owner, u64 offset,
4531                                                 u64 max_size)
4532 {
4533         struct data_backref *ref = malloc(sizeof(*ref));
4534
4535         if (!ref)
4536                 return NULL;
4537         memset(&ref->node, 0, sizeof(ref->node));
4538         ref->node.is_data = 1;
4539
4540         if (parent > 0) {
4541                 ref->parent = parent;
4542                 ref->owner = 0;
4543                 ref->offset = 0;
4544                 ref->node.full_backref = 1;
4545         } else {
4546                 ref->root = root;
4547                 ref->owner = owner;
4548                 ref->offset = offset;
4549                 ref->node.full_backref = 0;
4550         }
4551         ref->bytes = max_size;
4552         ref->found_ref = 0;
4553         ref->num_refs = 0;
4554         list_add_tail(&ref->node.list, &rec->backrefs);
4555         if (max_size > rec->max_size)
4556                 rec->max_size = max_size;
4557         return ref;
4558 }
4559
4560 /* Check if the type of extent matches with its chunk */
4561 static void check_extent_type(struct extent_record *rec)
4562 {
4563         struct btrfs_block_group_cache *bg_cache;
4564
4565         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4566         if (!bg_cache)
4567                 return;
4568
4569         /* data extent, check chunk directly*/
4570         if (!rec->metadata) {
4571                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4572                         rec->wrong_chunk_type = 1;
4573                 return;
4574         }
4575
4576         /* metadata extent, check the obvious case first */
4577         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4578                                  BTRFS_BLOCK_GROUP_METADATA))) {
4579                 rec->wrong_chunk_type = 1;
4580                 return;
4581         }
4582
4583         /*
4584          * Check SYSTEM extent, as it's also marked as metadata, we can only
4585          * make sure it's a SYSTEM extent by its backref
4586          */
4587         if (!list_empty(&rec->backrefs)) {
4588                 struct extent_backref *node;
4589                 struct tree_backref *tback;
4590                 u64 bg_type;
4591
4592                 node = to_extent_backref(rec->backrefs.next);
4593                 if (node->is_data) {
4594                         /* tree block shouldn't have data backref */
4595                         rec->wrong_chunk_type = 1;
4596                         return;
4597                 }
4598                 tback = container_of(node, struct tree_backref, node);
4599
4600                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4601                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4602                 else
4603                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4604                 if (!(bg_cache->flags & bg_type))
4605                         rec->wrong_chunk_type = 1;
4606         }
4607 }
4608
4609 /*
4610  * Allocate a new extent record, fill default values from @tmpl and insert int
4611  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4612  * the cache, otherwise it fails.
4613  */
4614 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4615                 struct extent_record *tmpl)
4616 {
4617         struct extent_record *rec;
4618         int ret = 0;
4619
4620         rec = malloc(sizeof(*rec));
4621         if (!rec)
4622                 return -ENOMEM;
4623         rec->start = tmpl->start;
4624         rec->max_size = tmpl->max_size;
4625         rec->nr = max(tmpl->nr, tmpl->max_size);
4626         rec->found_rec = tmpl->found_rec;
4627         rec->content_checked = tmpl->content_checked;
4628         rec->owner_ref_checked = tmpl->owner_ref_checked;
4629         rec->num_duplicates = 0;
4630         rec->metadata = tmpl->metadata;
4631         rec->flag_block_full_backref = FLAG_UNSET;
4632         rec->bad_full_backref = 0;
4633         rec->crossing_stripes = 0;
4634         rec->wrong_chunk_type = 0;
4635         rec->is_root = tmpl->is_root;
4636         rec->refs = tmpl->refs;
4637         rec->extent_item_refs = tmpl->extent_item_refs;
4638         rec->parent_generation = tmpl->parent_generation;
4639         INIT_LIST_HEAD(&rec->backrefs);
4640         INIT_LIST_HEAD(&rec->dups);
4641         INIT_LIST_HEAD(&rec->list);
4642         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4643         rec->cache.start = tmpl->start;
4644         rec->cache.size = tmpl->nr;
4645         ret = insert_cache_extent(extent_cache, &rec->cache);
4646         BUG_ON(ret);
4647         bytes_used += rec->nr;
4648
4649         if (tmpl->metadata)
4650                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4651                                 global_info->tree_root->nodesize);
4652         check_extent_type(rec);
4653         return ret;
4654 }
4655
4656 /*
4657  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4658  * some are hints:
4659  * - refs              - if found, increase refs
4660  * - is_root           - if found, set
4661  * - content_checked   - if found, set
4662  * - owner_ref_checked - if found, set
4663  *
4664  * If not found, create a new one, initialize and insert.
4665  */
4666 static int add_extent_rec(struct cache_tree *extent_cache,
4667                 struct extent_record *tmpl)
4668 {
4669         struct extent_record *rec;
4670         struct cache_extent *cache;
4671         int ret = 0;
4672         int dup = 0;
4673
4674         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4675         if (cache) {
4676                 rec = container_of(cache, struct extent_record, cache);
4677                 if (tmpl->refs)
4678                         rec->refs++;
4679                 if (rec->nr == 1)
4680                         rec->nr = max(tmpl->nr, tmpl->max_size);
4681
4682                 /*
4683                  * We need to make sure to reset nr to whatever the extent
4684                  * record says was the real size, this way we can compare it to
4685                  * the backrefs.
4686                  */
4687                 if (tmpl->found_rec) {
4688                         if (tmpl->start != rec->start || rec->found_rec) {
4689                                 struct extent_record *tmp;
4690
4691                                 dup = 1;
4692                                 if (list_empty(&rec->list))
4693                                         list_add_tail(&rec->list,
4694                                                       &duplicate_extents);
4695
4696                                 /*
4697                                  * We have to do this song and dance in case we
4698                                  * find an extent record that falls inside of
4699                                  * our current extent record but does not have
4700                                  * the same objectid.
4701                                  */
4702                                 tmp = malloc(sizeof(*tmp));
4703                                 if (!tmp)
4704                                         return -ENOMEM;
4705                                 tmp->start = tmpl->start;
4706                                 tmp->max_size = tmpl->max_size;
4707                                 tmp->nr = tmpl->nr;
4708                                 tmp->found_rec = 1;
4709                                 tmp->metadata = tmpl->metadata;
4710                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4711                                 INIT_LIST_HEAD(&tmp->list);
4712                                 list_add_tail(&tmp->list, &rec->dups);
4713                                 rec->num_duplicates++;
4714                         } else {
4715                                 rec->nr = tmpl->nr;
4716                                 rec->found_rec = 1;
4717                         }
4718                 }
4719
4720                 if (tmpl->extent_item_refs && !dup) {
4721                         if (rec->extent_item_refs) {
4722                                 fprintf(stderr, "block %llu rec "
4723                                         "extent_item_refs %llu, passed %llu\n",
4724                                         (unsigned long long)tmpl->start,
4725                                         (unsigned long long)
4726                                                         rec->extent_item_refs,
4727                                         (unsigned long long)tmpl->extent_item_refs);
4728                         }
4729                         rec->extent_item_refs = tmpl->extent_item_refs;
4730                 }
4731                 if (tmpl->is_root)
4732                         rec->is_root = 1;
4733                 if (tmpl->content_checked)
4734                         rec->content_checked = 1;
4735                 if (tmpl->owner_ref_checked)
4736                         rec->owner_ref_checked = 1;
4737                 memcpy(&rec->parent_key, &tmpl->parent_key,
4738                                 sizeof(tmpl->parent_key));
4739                 if (tmpl->parent_generation)
4740                         rec->parent_generation = tmpl->parent_generation;
4741                 if (rec->max_size < tmpl->max_size)
4742                         rec->max_size = tmpl->max_size;
4743
4744                 /*
4745                  * A metadata extent can't cross stripe_len boundary, otherwise
4746                  * kernel scrub won't be able to handle it.
4747                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4748                  * it.
4749                  */
4750                 if (tmpl->metadata)
4751                         rec->crossing_stripes = check_crossing_stripes(
4752                                 rec->start, global_info->tree_root->nodesize);
4753                 check_extent_type(rec);
4754                 maybe_free_extent_rec(extent_cache, rec);
4755                 return ret;
4756         }
4757
4758         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4759
4760         return ret;
4761 }
4762
4763 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4764                             u64 parent, u64 root, int found_ref)
4765 {
4766         struct extent_record *rec;
4767         struct tree_backref *back;
4768         struct cache_extent *cache;
4769         int ret;
4770
4771         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4772         if (!cache) {
4773                 struct extent_record tmpl;
4774
4775                 memset(&tmpl, 0, sizeof(tmpl));
4776                 tmpl.start = bytenr;
4777                 tmpl.nr = 1;
4778                 tmpl.metadata = 1;
4779
4780                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4781                 if (ret)
4782                         return ret;
4783
4784                 /* really a bug in cache_extent implement now */
4785                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4786                 if (!cache)
4787                         return -ENOENT;
4788         }
4789
4790         rec = container_of(cache, struct extent_record, cache);
4791         if (rec->start != bytenr) {
4792                 /*
4793                  * Several cause, from unaligned bytenr to over lapping extents
4794                  */
4795                 return -EEXIST;
4796         }
4797
4798         back = find_tree_backref(rec, parent, root);
4799         if (!back) {
4800                 back = alloc_tree_backref(rec, parent, root);
4801                 if (!back)
4802                         return -ENOMEM;
4803         }
4804
4805         if (found_ref) {
4806                 if (back->node.found_ref) {
4807                         fprintf(stderr, "Extent back ref already exists "
4808                                 "for %llu parent %llu root %llu \n",
4809                                 (unsigned long long)bytenr,
4810                                 (unsigned long long)parent,
4811                                 (unsigned long long)root);
4812                 }
4813                 back->node.found_ref = 1;
4814         } else {
4815                 if (back->node.found_extent_tree) {
4816                         fprintf(stderr, "Extent back ref already exists "
4817                                 "for %llu parent %llu root %llu \n",
4818                                 (unsigned long long)bytenr,
4819                                 (unsigned long long)parent,
4820                                 (unsigned long long)root);
4821                 }
4822                 back->node.found_extent_tree = 1;
4823         }
4824         check_extent_type(rec);
4825         maybe_free_extent_rec(extent_cache, rec);
4826         return 0;
4827 }
4828
4829 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4830                             u64 parent, u64 root, u64 owner, u64 offset,
4831                             u32 num_refs, int found_ref, u64 max_size)
4832 {
4833         struct extent_record *rec;
4834         struct data_backref *back;
4835         struct cache_extent *cache;
4836         int ret;
4837
4838         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4839         if (!cache) {
4840                 struct extent_record tmpl;
4841
4842                 memset(&tmpl, 0, sizeof(tmpl));
4843                 tmpl.start = bytenr;
4844                 tmpl.nr = 1;
4845                 tmpl.max_size = max_size;
4846
4847                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4848                 if (ret)
4849                         return ret;
4850
4851                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4852                 if (!cache)
4853                         abort();
4854         }
4855
4856         rec = container_of(cache, struct extent_record, cache);
4857         if (rec->max_size < max_size)
4858                 rec->max_size = max_size;
4859
4860         /*
4861          * If found_ref is set then max_size is the real size and must match the
4862          * existing refs.  So if we have already found a ref then we need to
4863          * make sure that this ref matches the existing one, otherwise we need
4864          * to add a new backref so we can notice that the backrefs don't match
4865          * and we need to figure out who is telling the truth.  This is to
4866          * account for that awful fsync bug I introduced where we'd end up with
4867          * a btrfs_file_extent_item that would have its length include multiple
4868          * prealloc extents or point inside of a prealloc extent.
4869          */
4870         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4871                                  bytenr, max_size);
4872         if (!back) {
4873                 back = alloc_data_backref(rec, parent, root, owner, offset,
4874                                           max_size);
4875                 BUG_ON(!back);
4876         }
4877
4878         if (found_ref) {
4879                 BUG_ON(num_refs != 1);
4880                 if (back->node.found_ref)
4881                         BUG_ON(back->bytes != max_size);
4882                 back->node.found_ref = 1;
4883                 back->found_ref += 1;
4884                 back->bytes = max_size;
4885                 back->disk_bytenr = bytenr;
4886                 rec->refs += 1;
4887                 rec->content_checked = 1;
4888                 rec->owner_ref_checked = 1;
4889         } else {
4890                 if (back->node.found_extent_tree) {
4891                         fprintf(stderr, "Extent back ref already exists "
4892                                 "for %llu parent %llu root %llu "
4893                                 "owner %llu offset %llu num_refs %lu\n",
4894                                 (unsigned long long)bytenr,
4895                                 (unsigned long long)parent,
4896                                 (unsigned long long)root,
4897                                 (unsigned long long)owner,
4898                                 (unsigned long long)offset,
4899                                 (unsigned long)num_refs);
4900                 }
4901                 back->num_refs = num_refs;
4902                 back->node.found_extent_tree = 1;
4903         }
4904         maybe_free_extent_rec(extent_cache, rec);
4905         return 0;
4906 }
4907
4908 static int add_pending(struct cache_tree *pending,
4909                        struct cache_tree *seen, u64 bytenr, u32 size)
4910 {
4911         int ret;
4912         ret = add_cache_extent(seen, bytenr, size);
4913         if (ret)
4914                 return ret;
4915         add_cache_extent(pending, bytenr, size);
4916         return 0;
4917 }
4918
4919 static int pick_next_pending(struct cache_tree *pending,
4920                         struct cache_tree *reada,
4921                         struct cache_tree *nodes,
4922                         u64 last, struct block_info *bits, int bits_nr,
4923                         int *reada_bits)
4924 {
4925         unsigned long node_start = last;
4926         struct cache_extent *cache;
4927         int ret;
4928
4929         cache = search_cache_extent(reada, 0);
4930         if (cache) {
4931                 bits[0].start = cache->start;
4932                 bits[0].size = cache->size;
4933                 *reada_bits = 1;
4934                 return 1;
4935         }
4936         *reada_bits = 0;
4937         if (node_start > 32768)
4938                 node_start -= 32768;
4939
4940         cache = search_cache_extent(nodes, node_start);
4941         if (!cache)
4942                 cache = search_cache_extent(nodes, 0);
4943
4944         if (!cache) {
4945                  cache = search_cache_extent(pending, 0);
4946                  if (!cache)
4947                          return 0;
4948                  ret = 0;
4949                  do {
4950                          bits[ret].start = cache->start;
4951                          bits[ret].size = cache->size;
4952                          cache = next_cache_extent(cache);
4953                          ret++;
4954                  } while (cache && ret < bits_nr);
4955                  return ret;
4956         }
4957
4958         ret = 0;
4959         do {
4960                 bits[ret].start = cache->start;
4961                 bits[ret].size = cache->size;
4962                 cache = next_cache_extent(cache);
4963                 ret++;
4964         } while (cache && ret < bits_nr);
4965
4966         if (bits_nr - ret > 8) {
4967                 u64 lookup = bits[0].start + bits[0].size;
4968                 struct cache_extent *next;
4969                 next = search_cache_extent(pending, lookup);
4970                 while(next) {
4971                         if (next->start - lookup > 32768)
4972                                 break;
4973                         bits[ret].start = next->start;
4974                         bits[ret].size = next->size;
4975                         lookup = next->start + next->size;
4976                         ret++;
4977                         if (ret == bits_nr)
4978                                 break;
4979                         next = next_cache_extent(next);
4980                         if (!next)
4981                                 break;
4982                 }
4983         }
4984         return ret;
4985 }
4986
4987 static void free_chunk_record(struct cache_extent *cache)
4988 {
4989         struct chunk_record *rec;
4990
4991         rec = container_of(cache, struct chunk_record, cache);
4992         list_del_init(&rec->list);
4993         list_del_init(&rec->dextents);
4994         free(rec);
4995 }
4996
4997 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4998 {
4999         cache_tree_free_extents(chunk_cache, free_chunk_record);
5000 }
5001
5002 static void free_device_record(struct rb_node *node)
5003 {
5004         struct device_record *rec;
5005
5006         rec = container_of(node, struct device_record, node);
5007         free(rec);
5008 }
5009
5010 FREE_RB_BASED_TREE(device_cache, free_device_record);
5011
5012 int insert_block_group_record(struct block_group_tree *tree,
5013                               struct block_group_record *bg_rec)
5014 {
5015         int ret;
5016
5017         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5018         if (ret)
5019                 return ret;
5020
5021         list_add_tail(&bg_rec->list, &tree->block_groups);
5022         return 0;
5023 }
5024
5025 static void free_block_group_record(struct cache_extent *cache)
5026 {
5027         struct block_group_record *rec;
5028
5029         rec = container_of(cache, struct block_group_record, cache);
5030         list_del_init(&rec->list);
5031         free(rec);
5032 }
5033
5034 void free_block_group_tree(struct block_group_tree *tree)
5035 {
5036         cache_tree_free_extents(&tree->tree, free_block_group_record);
5037 }
5038
5039 int insert_device_extent_record(struct device_extent_tree *tree,
5040                                 struct device_extent_record *de_rec)
5041 {
5042         int ret;
5043
5044         /*
5045          * Device extent is a bit different from the other extents, because
5046          * the extents which belong to the different devices may have the
5047          * same start and size, so we need use the special extent cache
5048          * search/insert functions.
5049          */
5050         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5051         if (ret)
5052                 return ret;
5053
5054         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5055         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5056         return 0;
5057 }
5058
5059 static void free_device_extent_record(struct cache_extent *cache)
5060 {
5061         struct device_extent_record *rec;
5062
5063         rec = container_of(cache, struct device_extent_record, cache);
5064         if (!list_empty(&rec->chunk_list))
5065                 list_del_init(&rec->chunk_list);
5066         if (!list_empty(&rec->device_list))
5067                 list_del_init(&rec->device_list);
5068         free(rec);
5069 }
5070
5071 void free_device_extent_tree(struct device_extent_tree *tree)
5072 {
5073         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5074 }
5075
5076 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5077 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5078                                  struct extent_buffer *leaf, int slot)
5079 {
5080         struct btrfs_extent_ref_v0 *ref0;
5081         struct btrfs_key key;
5082         int ret;
5083
5084         btrfs_item_key_to_cpu(leaf, &key, slot);
5085         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5086         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5087                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5088                                 0, 0);
5089         } else {
5090                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5091                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5092         }
5093         return ret;
5094 }
5095 #endif
5096
5097 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5098                                             struct btrfs_key *key,
5099                                             int slot)
5100 {
5101         struct btrfs_chunk *ptr;
5102         struct chunk_record *rec;
5103         int num_stripes, i;
5104
5105         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5106         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5107
5108         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5109         if (!rec) {
5110                 fprintf(stderr, "memory allocation failed\n");
5111                 exit(-1);
5112         }
5113
5114         INIT_LIST_HEAD(&rec->list);
5115         INIT_LIST_HEAD(&rec->dextents);
5116         rec->bg_rec = NULL;
5117
5118         rec->cache.start = key->offset;
5119         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5120
5121         rec->generation = btrfs_header_generation(leaf);
5122
5123         rec->objectid = key->objectid;
5124         rec->type = key->type;
5125         rec->offset = key->offset;
5126
5127         rec->length = rec->cache.size;
5128         rec->owner = btrfs_chunk_owner(leaf, ptr);
5129         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5130         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5131         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5132         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5133         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5134         rec->num_stripes = num_stripes;
5135         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5136
5137         for (i = 0; i < rec->num_stripes; ++i) {
5138                 rec->stripes[i].devid =
5139                         btrfs_stripe_devid_nr(leaf, ptr, i);
5140                 rec->stripes[i].offset =
5141                         btrfs_stripe_offset_nr(leaf, ptr, i);
5142                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5143                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5144                                 BTRFS_UUID_SIZE);
5145         }
5146
5147         return rec;
5148 }
5149
5150 static int process_chunk_item(struct cache_tree *chunk_cache,
5151                               struct btrfs_key *key, struct extent_buffer *eb,
5152                               int slot)
5153 {
5154         struct chunk_record *rec;
5155         struct btrfs_chunk *chunk;
5156         int ret = 0;
5157
5158         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5159         /*
5160          * Do extra check for this chunk item,
5161          *
5162          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5163          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5164          * and owner<->key_type check.
5165          */
5166         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5167                                       key->offset);
5168         if (ret < 0) {
5169                 error("chunk(%llu, %llu) is not valid, ignore it",
5170                       key->offset, btrfs_chunk_length(eb, chunk));
5171                 return 0;
5172         }
5173         rec = btrfs_new_chunk_record(eb, key, slot);
5174         ret = insert_cache_extent(chunk_cache, &rec->cache);
5175         if (ret) {
5176                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5177                         rec->offset, rec->length);
5178                 free(rec);
5179         }
5180
5181         return ret;
5182 }
5183
5184 static int process_device_item(struct rb_root *dev_cache,
5185                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5186 {
5187         struct btrfs_dev_item *ptr;
5188         struct device_record *rec;
5189         int ret = 0;
5190
5191         ptr = btrfs_item_ptr(eb,
5192                 slot, struct btrfs_dev_item);
5193
5194         rec = malloc(sizeof(*rec));
5195         if (!rec) {
5196                 fprintf(stderr, "memory allocation failed\n");
5197                 return -ENOMEM;
5198         }
5199
5200         rec->devid = key->offset;
5201         rec->generation = btrfs_header_generation(eb);
5202
5203         rec->objectid = key->objectid;
5204         rec->type = key->type;
5205         rec->offset = key->offset;
5206
5207         rec->devid = btrfs_device_id(eb, ptr);
5208         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5209         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5210
5211         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5212         if (ret) {
5213                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5214                 free(rec);
5215         }
5216
5217         return ret;
5218 }
5219
5220 struct block_group_record *
5221 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5222                              int slot)
5223 {
5224         struct btrfs_block_group_item *ptr;
5225         struct block_group_record *rec;
5226
5227         rec = calloc(1, sizeof(*rec));
5228         if (!rec) {
5229                 fprintf(stderr, "memory allocation failed\n");
5230                 exit(-1);
5231         }
5232
5233         rec->cache.start = key->objectid;
5234         rec->cache.size = key->offset;
5235
5236         rec->generation = btrfs_header_generation(leaf);
5237
5238         rec->objectid = key->objectid;
5239         rec->type = key->type;
5240         rec->offset = key->offset;
5241
5242         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5243         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5244
5245         INIT_LIST_HEAD(&rec->list);
5246
5247         return rec;
5248 }
5249
5250 static int process_block_group_item(struct block_group_tree *block_group_cache,
5251                                     struct btrfs_key *key,
5252                                     struct extent_buffer *eb, int slot)
5253 {
5254         struct block_group_record *rec;
5255         int ret = 0;
5256
5257         rec = btrfs_new_block_group_record(eb, key, slot);
5258         ret = insert_block_group_record(block_group_cache, rec);
5259         if (ret) {
5260                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5261                         rec->objectid, rec->offset);
5262                 free(rec);
5263         }
5264
5265         return ret;
5266 }
5267
5268 struct device_extent_record *
5269 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5270                                struct btrfs_key *key, int slot)
5271 {
5272         struct device_extent_record *rec;
5273         struct btrfs_dev_extent *ptr;
5274
5275         rec = calloc(1, sizeof(*rec));
5276         if (!rec) {
5277                 fprintf(stderr, "memory allocation failed\n");
5278                 exit(-1);
5279         }
5280
5281         rec->cache.objectid = key->objectid;
5282         rec->cache.start = key->offset;
5283
5284         rec->generation = btrfs_header_generation(leaf);
5285
5286         rec->objectid = key->objectid;
5287         rec->type = key->type;
5288         rec->offset = key->offset;
5289
5290         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5291         rec->chunk_objecteid =
5292                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5293         rec->chunk_offset =
5294                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5295         rec->length = btrfs_dev_extent_length(leaf, ptr);
5296         rec->cache.size = rec->length;
5297
5298         INIT_LIST_HEAD(&rec->chunk_list);
5299         INIT_LIST_HEAD(&rec->device_list);
5300
5301         return rec;
5302 }
5303
5304 static int
5305 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5306                            struct btrfs_key *key, struct extent_buffer *eb,
5307                            int slot)
5308 {
5309         struct device_extent_record *rec;
5310         int ret;
5311
5312         rec = btrfs_new_device_extent_record(eb, key, slot);
5313         ret = insert_device_extent_record(dev_extent_cache, rec);
5314         if (ret) {
5315                 fprintf(stderr,
5316                         "Device extent[%llu, %llu, %llu] existed.\n",
5317                         rec->objectid, rec->offset, rec->length);
5318                 free(rec);
5319         }
5320
5321         return ret;
5322 }
5323
5324 static int process_extent_item(struct btrfs_root *root,
5325                                struct cache_tree *extent_cache,
5326                                struct extent_buffer *eb, int slot)
5327 {
5328         struct btrfs_extent_item *ei;
5329         struct btrfs_extent_inline_ref *iref;
5330         struct btrfs_extent_data_ref *dref;
5331         struct btrfs_shared_data_ref *sref;
5332         struct btrfs_key key;
5333         struct extent_record tmpl;
5334         unsigned long end;
5335         unsigned long ptr;
5336         int ret;
5337         int type;
5338         u32 item_size = btrfs_item_size_nr(eb, slot);
5339         u64 refs = 0;
5340         u64 offset;
5341         u64 num_bytes;
5342         int metadata = 0;
5343
5344         btrfs_item_key_to_cpu(eb, &key, slot);
5345
5346         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5347                 metadata = 1;
5348                 num_bytes = root->nodesize;
5349         } else {
5350                 num_bytes = key.offset;
5351         }
5352
5353         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5354                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5355                       key.objectid, root->sectorsize);
5356                 return -EIO;
5357         }
5358         if (item_size < sizeof(*ei)) {
5359 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5360                 struct btrfs_extent_item_v0 *ei0;
5361                 BUG_ON(item_size != sizeof(*ei0));
5362                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5363                 refs = btrfs_extent_refs_v0(eb, ei0);
5364 #else
5365                 BUG();
5366 #endif
5367                 memset(&tmpl, 0, sizeof(tmpl));
5368                 tmpl.start = key.objectid;
5369                 tmpl.nr = num_bytes;
5370                 tmpl.extent_item_refs = refs;
5371                 tmpl.metadata = metadata;
5372                 tmpl.found_rec = 1;
5373                 tmpl.max_size = num_bytes;
5374
5375                 return add_extent_rec(extent_cache, &tmpl);
5376         }
5377
5378         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5379         refs = btrfs_extent_refs(eb, ei);
5380         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5381                 metadata = 1;
5382         else
5383                 metadata = 0;
5384         if (metadata && num_bytes != root->nodesize) {
5385                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5386                       num_bytes, root->nodesize);
5387                 return -EIO;
5388         }
5389         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5390                 error("ignore invalid data extent, length %llu is not aligned to %u",
5391                       num_bytes, root->sectorsize);
5392                 return -EIO;
5393         }
5394
5395         memset(&tmpl, 0, sizeof(tmpl));
5396         tmpl.start = key.objectid;
5397         tmpl.nr = num_bytes;
5398         tmpl.extent_item_refs = refs;
5399         tmpl.metadata = metadata;
5400         tmpl.found_rec = 1;
5401         tmpl.max_size = num_bytes;
5402         add_extent_rec(extent_cache, &tmpl);
5403
5404         ptr = (unsigned long)(ei + 1);
5405         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5406             key.type == BTRFS_EXTENT_ITEM_KEY)
5407                 ptr += sizeof(struct btrfs_tree_block_info);
5408
5409         end = (unsigned long)ei + item_size;
5410         while (ptr < end) {
5411                 iref = (struct btrfs_extent_inline_ref *)ptr;
5412                 type = btrfs_extent_inline_ref_type(eb, iref);
5413                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5414                 switch (type) {
5415                 case BTRFS_TREE_BLOCK_REF_KEY:
5416                         ret = add_tree_backref(extent_cache, key.objectid,
5417                                         0, offset, 0);
5418                         if (ret < 0)
5419                                 error("add_tree_backref failed: %s",
5420                                       strerror(-ret));
5421                         break;
5422                 case BTRFS_SHARED_BLOCK_REF_KEY:
5423                         ret = add_tree_backref(extent_cache, key.objectid,
5424                                         offset, 0, 0);
5425                         if (ret < 0)
5426                                 error("add_tree_backref failed: %s",
5427                                       strerror(-ret));
5428                         break;
5429                 case BTRFS_EXTENT_DATA_REF_KEY:
5430                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5431                         add_data_backref(extent_cache, key.objectid, 0,
5432                                         btrfs_extent_data_ref_root(eb, dref),
5433                                         btrfs_extent_data_ref_objectid(eb,
5434                                                                        dref),
5435                                         btrfs_extent_data_ref_offset(eb, dref),
5436                                         btrfs_extent_data_ref_count(eb, dref),
5437                                         0, num_bytes);
5438                         break;
5439                 case BTRFS_SHARED_DATA_REF_KEY:
5440                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5441                         add_data_backref(extent_cache, key.objectid, offset,
5442                                         0, 0, 0,
5443                                         btrfs_shared_data_ref_count(eb, sref),
5444                                         0, num_bytes);
5445                         break;
5446                 default:
5447                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5448                                 key.objectid, key.type, num_bytes);
5449                         goto out;
5450                 }
5451                 ptr += btrfs_extent_inline_ref_size(type);
5452         }
5453         WARN_ON(ptr > end);
5454 out:
5455         return 0;
5456 }
5457
5458 static int check_cache_range(struct btrfs_root *root,
5459                              struct btrfs_block_group_cache *cache,
5460                              u64 offset, u64 bytes)
5461 {
5462         struct btrfs_free_space *entry;
5463         u64 *logical;
5464         u64 bytenr;
5465         int stripe_len;
5466         int i, nr, ret;
5467
5468         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5469                 bytenr = btrfs_sb_offset(i);
5470                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5471                                        cache->key.objectid, bytenr, 0,
5472                                        &logical, &nr, &stripe_len);
5473                 if (ret)
5474                         return ret;
5475
5476                 while (nr--) {
5477                         if (logical[nr] + stripe_len <= offset)
5478                                 continue;
5479                         if (offset + bytes <= logical[nr])
5480                                 continue;
5481                         if (logical[nr] == offset) {
5482                                 if (stripe_len >= bytes) {
5483                                         free(logical);
5484                                         return 0;
5485                                 }
5486                                 bytes -= stripe_len;
5487                                 offset += stripe_len;
5488                         } else if (logical[nr] < offset) {
5489                                 if (logical[nr] + stripe_len >=
5490                                     offset + bytes) {
5491                                         free(logical);
5492                                         return 0;
5493                                 }
5494                                 bytes = (offset + bytes) -
5495                                         (logical[nr] + stripe_len);
5496                                 offset = logical[nr] + stripe_len;
5497                         } else {
5498                                 /*
5499                                  * Could be tricky, the super may land in the
5500                                  * middle of the area we're checking.  First
5501                                  * check the easiest case, it's at the end.
5502                                  */
5503                                 if (logical[nr] + stripe_len >=
5504                                     bytes + offset) {
5505                                         bytes = logical[nr] - offset;
5506                                         continue;
5507                                 }
5508
5509                                 /* Check the left side */
5510                                 ret = check_cache_range(root, cache,
5511                                                         offset,
5512                                                         logical[nr] - offset);
5513                                 if (ret) {
5514                                         free(logical);
5515                                         return ret;
5516                                 }
5517
5518                                 /* Now we continue with the right side */
5519                                 bytes = (offset + bytes) -
5520                                         (logical[nr] + stripe_len);
5521                                 offset = logical[nr] + stripe_len;
5522                         }
5523                 }
5524
5525                 free(logical);
5526         }
5527
5528         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5529         if (!entry) {
5530                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5531                         offset, offset+bytes);
5532                 return -EINVAL;
5533         }
5534
5535         if (entry->offset != offset) {
5536                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5537                         entry->offset);
5538                 return -EINVAL;
5539         }
5540
5541         if (entry->bytes != bytes) {
5542                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5543                         bytes, entry->bytes, offset);
5544                 return -EINVAL;
5545         }
5546
5547         unlink_free_space(cache->free_space_ctl, entry);
5548         free(entry);
5549         return 0;
5550 }
5551
5552 static int verify_space_cache(struct btrfs_root *root,
5553                               struct btrfs_block_group_cache *cache)
5554 {
5555         struct btrfs_path *path;
5556         struct extent_buffer *leaf;
5557         struct btrfs_key key;
5558         u64 last;
5559         int ret = 0;
5560
5561         path = btrfs_alloc_path();
5562         if (!path)
5563                 return -ENOMEM;
5564
5565         root = root->fs_info->extent_root;
5566
5567         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5568
5569         key.objectid = last;
5570         key.offset = 0;
5571         key.type = BTRFS_EXTENT_ITEM_KEY;
5572
5573         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5574         if (ret < 0)
5575                 goto out;
5576         ret = 0;
5577         while (1) {
5578                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5579                         ret = btrfs_next_leaf(root, path);
5580                         if (ret < 0)
5581                                 goto out;
5582                         if (ret > 0) {
5583                                 ret = 0;
5584                                 break;
5585                         }
5586                 }
5587                 leaf = path->nodes[0];
5588                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5589                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5590                         break;
5591                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5592                     key.type != BTRFS_METADATA_ITEM_KEY) {
5593                         path->slots[0]++;
5594                         continue;
5595                 }
5596
5597                 if (last == key.objectid) {
5598                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5599                                 last = key.objectid + key.offset;
5600                         else
5601                                 last = key.objectid + root->nodesize;
5602                         path->slots[0]++;
5603                         continue;
5604                 }
5605
5606                 ret = check_cache_range(root, cache, last,
5607                                         key.objectid - last);
5608                 if (ret)
5609                         break;
5610                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5611                         last = key.objectid + key.offset;
5612                 else
5613                         last = key.objectid + root->nodesize;
5614                 path->slots[0]++;
5615         }
5616
5617         if (last < cache->key.objectid + cache->key.offset)
5618                 ret = check_cache_range(root, cache, last,
5619                                         cache->key.objectid +
5620                                         cache->key.offset - last);
5621
5622 out:
5623         btrfs_free_path(path);
5624
5625         if (!ret &&
5626             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5627                 fprintf(stderr, "There are still entries left in the space "
5628                         "cache\n");
5629                 ret = -EINVAL;
5630         }
5631
5632         return ret;
5633 }
5634
5635 static int check_space_cache(struct btrfs_root *root)
5636 {
5637         struct btrfs_block_group_cache *cache;
5638         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5639         int ret;
5640         int error = 0;
5641
5642         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5643             btrfs_super_generation(root->fs_info->super_copy) !=
5644             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5645                 printf("cache and super generation don't match, space cache "
5646                        "will be invalidated\n");
5647                 return 0;
5648         }
5649
5650         if (ctx.progress_enabled) {
5651                 ctx.tp = TASK_FREE_SPACE;
5652                 task_start(ctx.info);
5653         }
5654
5655         while (1) {
5656                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5657                 if (!cache)
5658                         break;
5659
5660                 start = cache->key.objectid + cache->key.offset;
5661                 if (!cache->free_space_ctl) {
5662                         if (btrfs_init_free_space_ctl(cache,
5663                                                       root->sectorsize)) {
5664                                 ret = -ENOMEM;
5665                                 break;
5666                         }
5667                 } else {
5668                         btrfs_remove_free_space_cache(cache);
5669                 }
5670
5671                 if (btrfs_fs_compat_ro(root->fs_info,
5672                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5673                         ret = exclude_super_stripes(root, cache);
5674                         if (ret) {
5675                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5676                                         strerror(-ret));
5677                                 error++;
5678                                 continue;
5679                         }
5680                         ret = load_free_space_tree(root->fs_info, cache);
5681                         free_excluded_extents(root, cache);
5682                         if (ret < 0) {
5683                                 fprintf(stderr, "could not load free space tree: %s\n",
5684                                         strerror(-ret));
5685                                 error++;
5686                                 continue;
5687                         }
5688                         error += ret;
5689                 } else {
5690                         ret = load_free_space_cache(root->fs_info, cache);
5691                         if (!ret)
5692                                 continue;
5693                 }
5694
5695                 ret = verify_space_cache(root, cache);
5696                 if (ret) {
5697                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5698                                 cache->key.objectid);
5699                         error++;
5700                 }
5701         }
5702
5703         task_stop(ctx.info);
5704
5705         return error ? -EINVAL : 0;
5706 }
5707
5708 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5709                         u64 num_bytes, unsigned long leaf_offset,
5710                         struct extent_buffer *eb) {
5711
5712         u64 offset = 0;
5713         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5714         char *data;
5715         unsigned long csum_offset;
5716         u32 csum;
5717         u32 csum_expected;
5718         u64 read_len;
5719         u64 data_checked = 0;
5720         u64 tmp;
5721         int ret = 0;
5722         int mirror;
5723         int num_copies;
5724
5725         if (num_bytes % root->sectorsize)
5726                 return -EINVAL;
5727
5728         data = malloc(num_bytes);
5729         if (!data)
5730                 return -ENOMEM;
5731
5732         while (offset < num_bytes) {
5733                 mirror = 0;
5734 again:
5735                 read_len = num_bytes - offset;
5736                 /* read as much space once a time */
5737                 ret = read_extent_data(root, data + offset,
5738                                 bytenr + offset, &read_len, mirror);
5739                 if (ret)
5740                         goto out;
5741                 data_checked = 0;
5742                 /* verify every 4k data's checksum */
5743                 while (data_checked < read_len) {
5744                         csum = ~(u32)0;
5745                         tmp = offset + data_checked;
5746
5747                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5748                                                csum, root->sectorsize);
5749                         btrfs_csum_final(csum, (char *)&csum);
5750
5751                         csum_offset = leaf_offset +
5752                                  tmp / root->sectorsize * csum_size;
5753                         read_extent_buffer(eb, (char *)&csum_expected,
5754                                            csum_offset, csum_size);
5755                         /* try another mirror */
5756                         if (csum != csum_expected) {
5757                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5758                                                 mirror, bytenr + tmp,
5759                                                 csum, csum_expected);
5760                                 num_copies = btrfs_num_copies(
5761                                                 &root->fs_info->mapping_tree,
5762                                                 bytenr, num_bytes);
5763                                 if (mirror < num_copies - 1) {
5764                                         mirror += 1;
5765                                         goto again;
5766                                 }
5767                         }
5768                         data_checked += root->sectorsize;
5769                 }
5770                 offset += read_len;
5771         }
5772 out:
5773         free(data);
5774         return ret;
5775 }
5776
5777 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5778                                u64 num_bytes)
5779 {
5780         struct btrfs_path *path;
5781         struct extent_buffer *leaf;
5782         struct btrfs_key key;
5783         int ret;
5784
5785         path = btrfs_alloc_path();
5786         if (!path) {
5787                 fprintf(stderr, "Error allocating path\n");
5788                 return -ENOMEM;
5789         }
5790
5791         key.objectid = bytenr;
5792         key.type = BTRFS_EXTENT_ITEM_KEY;
5793         key.offset = (u64)-1;
5794
5795 again:
5796         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5797                                 0, 0);
5798         if (ret < 0) {
5799                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5800                 btrfs_free_path(path);
5801                 return ret;
5802         } else if (ret) {
5803                 if (path->slots[0] > 0) {
5804                         path->slots[0]--;
5805                 } else {
5806                         ret = btrfs_prev_leaf(root, path);
5807                         if (ret < 0) {
5808                                 goto out;
5809                         } else if (ret > 0) {
5810                                 ret = 0;
5811                                 goto out;
5812                         }
5813                 }
5814         }
5815
5816         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5817
5818         /*
5819          * Block group items come before extent items if they have the same
5820          * bytenr, so walk back one more just in case.  Dear future traveller,
5821          * first congrats on mastering time travel.  Now if it's not too much
5822          * trouble could you go back to 2006 and tell Chris to make the
5823          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5824          * EXTENT_ITEM_KEY please?
5825          */
5826         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5827                 if (path->slots[0] > 0) {
5828                         path->slots[0]--;
5829                 } else {
5830                         ret = btrfs_prev_leaf(root, path);
5831                         if (ret < 0) {
5832                                 goto out;
5833                         } else if (ret > 0) {
5834                                 ret = 0;
5835                                 goto out;
5836                         }
5837                 }
5838                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5839         }
5840
5841         while (num_bytes) {
5842                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5843                         ret = btrfs_next_leaf(root, path);
5844                         if (ret < 0) {
5845                                 fprintf(stderr, "Error going to next leaf "
5846                                         "%d\n", ret);
5847                                 btrfs_free_path(path);
5848                                 return ret;
5849                         } else if (ret) {
5850                                 break;
5851                         }
5852                 }
5853                 leaf = path->nodes[0];
5854                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5855                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5856                         path->slots[0]++;
5857                         continue;
5858                 }
5859                 if (key.objectid + key.offset < bytenr) {
5860                         path->slots[0]++;
5861                         continue;
5862                 }
5863                 if (key.objectid > bytenr + num_bytes)
5864                         break;
5865
5866                 if (key.objectid == bytenr) {
5867                         if (key.offset >= num_bytes) {
5868                                 num_bytes = 0;
5869                                 break;
5870                         }
5871                         num_bytes -= key.offset;
5872                         bytenr += key.offset;
5873                 } else if (key.objectid < bytenr) {
5874                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5875                                 num_bytes = 0;
5876                                 break;
5877                         }
5878                         num_bytes = (bytenr + num_bytes) -
5879                                 (key.objectid + key.offset);
5880                         bytenr = key.objectid + key.offset;
5881                 } else {
5882                         if (key.objectid + key.offset < bytenr + num_bytes) {
5883                                 u64 new_start = key.objectid + key.offset;
5884                                 u64 new_bytes = bytenr + num_bytes - new_start;
5885
5886                                 /*
5887                                  * Weird case, the extent is in the middle of
5888                                  * our range, we'll have to search one side
5889                                  * and then the other.  Not sure if this happens
5890                                  * in real life, but no harm in coding it up
5891                                  * anyway just in case.
5892                                  */
5893                                 btrfs_release_path(path);
5894                                 ret = check_extent_exists(root, new_start,
5895                                                           new_bytes);
5896                                 if (ret) {
5897                                         fprintf(stderr, "Right section didn't "
5898                                                 "have a record\n");
5899                                         break;
5900                                 }
5901                                 num_bytes = key.objectid - bytenr;
5902                                 goto again;
5903                         }
5904                         num_bytes = key.objectid - bytenr;
5905                 }
5906                 path->slots[0]++;
5907         }
5908         ret = 0;
5909
5910 out:
5911         if (num_bytes && !ret) {
5912                 fprintf(stderr, "There are no extents for csum range "
5913                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5914                 ret = 1;
5915         }
5916
5917         btrfs_free_path(path);
5918         return ret;
5919 }
5920
5921 static int check_csums(struct btrfs_root *root)
5922 {
5923         struct btrfs_path *path;
5924         struct extent_buffer *leaf;
5925         struct btrfs_key key;
5926         u64 offset = 0, num_bytes = 0;
5927         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5928         int errors = 0;
5929         int ret;
5930         u64 data_len;
5931         unsigned long leaf_offset;
5932
5933         root = root->fs_info->csum_root;
5934         if (!extent_buffer_uptodate(root->node)) {
5935                 fprintf(stderr, "No valid csum tree found\n");
5936                 return -ENOENT;
5937         }
5938
5939         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5940         key.type = BTRFS_EXTENT_CSUM_KEY;
5941         key.offset = 0;
5942
5943         path = btrfs_alloc_path();
5944         if (!path)
5945                 return -ENOMEM;
5946
5947         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5948         if (ret < 0) {
5949                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5950                 btrfs_free_path(path);
5951                 return ret;
5952         }
5953
5954         if (ret > 0 && path->slots[0])
5955                 path->slots[0]--;
5956         ret = 0;
5957
5958         while (1) {
5959                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5960                         ret = btrfs_next_leaf(root, path);
5961                         if (ret < 0) {
5962                                 fprintf(stderr, "Error going to next leaf "
5963                                         "%d\n", ret);
5964                                 break;
5965                         }
5966                         if (ret)
5967                                 break;
5968                 }
5969                 leaf = path->nodes[0];
5970
5971                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5972                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5973                         path->slots[0]++;
5974                         continue;
5975                 }
5976
5977                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5978                               csum_size) * root->sectorsize;
5979                 if (!check_data_csum)
5980                         goto skip_csum_check;
5981                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5982                 ret = check_extent_csums(root, key.offset, data_len,
5983                                          leaf_offset, leaf);
5984                 if (ret)
5985                         break;
5986 skip_csum_check:
5987                 if (!num_bytes) {
5988                         offset = key.offset;
5989                 } else if (key.offset != offset + num_bytes) {
5990                         ret = check_extent_exists(root, offset, num_bytes);
5991                         if (ret) {
5992                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5993                                         "there is no extent record\n",
5994                                         offset, offset+num_bytes);
5995                                 errors++;
5996                         }
5997                         offset = key.offset;
5998                         num_bytes = 0;
5999                 }
6000                 num_bytes += data_len;
6001                 path->slots[0]++;
6002         }
6003
6004         btrfs_free_path(path);
6005         return errors;
6006 }
6007
6008 static int is_dropped_key(struct btrfs_key *key,
6009                           struct btrfs_key *drop_key) {
6010         if (key->objectid < drop_key->objectid)
6011                 return 1;
6012         else if (key->objectid == drop_key->objectid) {
6013                 if (key->type < drop_key->type)
6014                         return 1;
6015                 else if (key->type == drop_key->type) {
6016                         if (key->offset < drop_key->offset)
6017                                 return 1;
6018                 }
6019         }
6020         return 0;
6021 }
6022
6023 /*
6024  * Here are the rules for FULL_BACKREF.
6025  *
6026  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6027  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6028  *      FULL_BACKREF set.
6029  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6030  *    if it happened after the relocation occurred since we'll have dropped the
6031  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6032  *    have no real way to know for sure.
6033  *
6034  * We process the blocks one root at a time, and we start from the lowest root
6035  * objectid and go to the highest.  So we can just lookup the owner backref for
6036  * the record and if we don't find it then we know it doesn't exist and we have
6037  * a FULL BACKREF.
6038  *
6039  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6040  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6041  * be set or not and then we can check later once we've gathered all the refs.
6042  */
6043 static int calc_extent_flag(struct btrfs_root *root,
6044                            struct cache_tree *extent_cache,
6045                            struct extent_buffer *buf,
6046                            struct root_item_record *ri,
6047                            u64 *flags)
6048 {
6049         struct extent_record *rec;
6050         struct cache_extent *cache;
6051         struct tree_backref *tback;
6052         u64 owner = 0;
6053
6054         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6055         /* we have added this extent before */
6056         if (!cache)
6057                 return -ENOENT;
6058
6059         rec = container_of(cache, struct extent_record, cache);
6060
6061         /*
6062          * Except file/reloc tree, we can not have
6063          * FULL BACKREF MODE
6064          */
6065         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6066                 goto normal;
6067         /*
6068          * root node
6069          */
6070         if (buf->start == ri->bytenr)
6071                 goto normal;
6072
6073         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6074                 goto full_backref;
6075
6076         owner = btrfs_header_owner(buf);
6077         if (owner == ri->objectid)
6078                 goto normal;
6079
6080         tback = find_tree_backref(rec, 0, owner);
6081         if (!tback)
6082                 goto full_backref;
6083 normal:
6084         *flags = 0;
6085         if (rec->flag_block_full_backref != FLAG_UNSET &&
6086             rec->flag_block_full_backref != 0)
6087                 rec->bad_full_backref = 1;
6088         return 0;
6089 full_backref:
6090         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6091         if (rec->flag_block_full_backref != FLAG_UNSET &&
6092             rec->flag_block_full_backref != 1)
6093                 rec->bad_full_backref = 1;
6094         return 0;
6095 }
6096
6097 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6098 {
6099         fprintf(stderr, "Invalid key type(");
6100         print_key_type(stderr, 0, key_type);
6101         fprintf(stderr, ") found in root(");
6102         print_objectid(stderr, rootid, 0);
6103         fprintf(stderr, ")\n");
6104 }
6105
6106 /*
6107  * Check if the key is valid with its extent buffer.
6108  *
6109  * This is a early check in case invalid key exists in a extent buffer
6110  * This is not comprehensive yet, but should prevent wrong key/item passed
6111  * further
6112  */
6113 static int check_type_with_root(u64 rootid, u8 key_type)
6114 {
6115         switch (key_type) {
6116         /* Only valid in chunk tree */
6117         case BTRFS_DEV_ITEM_KEY:
6118         case BTRFS_CHUNK_ITEM_KEY:
6119                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6120                         goto err;
6121                 break;
6122         /* valid in csum and log tree */
6123         case BTRFS_CSUM_TREE_OBJECTID:
6124                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6125                       is_fstree(rootid)))
6126                         goto err;
6127                 break;
6128         case BTRFS_EXTENT_ITEM_KEY:
6129         case BTRFS_METADATA_ITEM_KEY:
6130         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6131                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6132                         goto err;
6133                 break;
6134         case BTRFS_ROOT_ITEM_KEY:
6135                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6136                         goto err;
6137                 break;
6138         case BTRFS_DEV_EXTENT_KEY:
6139                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6140                         goto err;
6141                 break;
6142         }
6143         return 0;
6144 err:
6145         report_mismatch_key_root(key_type, rootid);
6146         return -EINVAL;
6147 }
6148
6149 static int run_next_block(struct btrfs_root *root,
6150                           struct block_info *bits,
6151                           int bits_nr,
6152                           u64 *last,
6153                           struct cache_tree *pending,
6154                           struct cache_tree *seen,
6155                           struct cache_tree *reada,
6156                           struct cache_tree *nodes,
6157                           struct cache_tree *extent_cache,
6158                           struct cache_tree *chunk_cache,
6159                           struct rb_root *dev_cache,
6160                           struct block_group_tree *block_group_cache,
6161                           struct device_extent_tree *dev_extent_cache,
6162                           struct root_item_record *ri)
6163 {
6164         struct extent_buffer *buf;
6165         struct extent_record *rec = NULL;
6166         u64 bytenr;
6167         u32 size;
6168         u64 parent;
6169         u64 owner;
6170         u64 flags;
6171         u64 ptr;
6172         u64 gen = 0;
6173         int ret = 0;
6174         int i;
6175         int nritems;
6176         struct btrfs_key key;
6177         struct cache_extent *cache;
6178         int reada_bits;
6179
6180         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6181                                     bits_nr, &reada_bits);
6182         if (nritems == 0)
6183                 return 1;
6184
6185         if (!reada_bits) {
6186                 for(i = 0; i < nritems; i++) {
6187                         ret = add_cache_extent(reada, bits[i].start,
6188                                                bits[i].size);
6189                         if (ret == -EEXIST)
6190                                 continue;
6191
6192                         /* fixme, get the parent transid */
6193                         readahead_tree_block(root, bits[i].start,
6194                                              bits[i].size, 0);
6195                 }
6196         }
6197         *last = bits[0].start;
6198         bytenr = bits[0].start;
6199         size = bits[0].size;
6200
6201         cache = lookup_cache_extent(pending, bytenr, size);
6202         if (cache) {
6203                 remove_cache_extent(pending, cache);
6204                 free(cache);
6205         }
6206         cache = lookup_cache_extent(reada, bytenr, size);
6207         if (cache) {
6208                 remove_cache_extent(reada, cache);
6209                 free(cache);
6210         }
6211         cache = lookup_cache_extent(nodes, bytenr, size);
6212         if (cache) {
6213                 remove_cache_extent(nodes, cache);
6214                 free(cache);
6215         }
6216         cache = lookup_cache_extent(extent_cache, bytenr, size);
6217         if (cache) {
6218                 rec = container_of(cache, struct extent_record, cache);
6219                 gen = rec->parent_generation;
6220         }
6221
6222         /* fixme, get the real parent transid */
6223         buf = read_tree_block(root, bytenr, size, gen);
6224         if (!extent_buffer_uptodate(buf)) {
6225                 record_bad_block_io(root->fs_info,
6226                                     extent_cache, bytenr, size);
6227                 goto out;
6228         }
6229
6230         nritems = btrfs_header_nritems(buf);
6231
6232         flags = 0;
6233         if (!init_extent_tree) {
6234                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6235                                        btrfs_header_level(buf), 1, NULL,
6236                                        &flags);
6237                 if (ret < 0) {
6238                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6239                         if (ret < 0) {
6240                                 fprintf(stderr, "Couldn't calc extent flags\n");
6241                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6242                         }
6243                 }
6244         } else {
6245                 flags = 0;
6246                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6247                 if (ret < 0) {
6248                         fprintf(stderr, "Couldn't calc extent flags\n");
6249                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6250                 }
6251         }
6252
6253         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6254                 if (ri != NULL &&
6255                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6256                     ri->objectid == btrfs_header_owner(buf)) {
6257                         /*
6258                          * Ok we got to this block from it's original owner and
6259                          * we have FULL_BACKREF set.  Relocation can leave
6260                          * converted blocks over so this is altogether possible,
6261                          * however it's not possible if the generation > the
6262                          * last snapshot, so check for this case.
6263                          */
6264                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6265                             btrfs_header_generation(buf) > ri->last_snapshot) {
6266                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6267                                 rec->bad_full_backref = 1;
6268                         }
6269                 }
6270         } else {
6271                 if (ri != NULL &&
6272                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6273                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6274                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6275                         rec->bad_full_backref = 1;
6276                 }
6277         }
6278
6279         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6280                 rec->flag_block_full_backref = 1;
6281                 parent = bytenr;
6282                 owner = 0;
6283         } else {
6284                 rec->flag_block_full_backref = 0;
6285                 parent = 0;
6286                 owner = btrfs_header_owner(buf);
6287         }
6288
6289         ret = check_block(root, extent_cache, buf, flags);
6290         if (ret)
6291                 goto out;
6292
6293         if (btrfs_is_leaf(buf)) {
6294                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6295                 for (i = 0; i < nritems; i++) {
6296                         struct btrfs_file_extent_item *fi;
6297                         btrfs_item_key_to_cpu(buf, &key, i);
6298                         /*
6299                          * Check key type against the leaf owner.
6300                          * Could filter quite a lot of early error if
6301                          * owner is correct
6302                          */
6303                         if (check_type_with_root(btrfs_header_owner(buf),
6304                                                  key.type)) {
6305                                 fprintf(stderr, "ignoring invalid key\n");
6306                                 continue;
6307                         }
6308                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6309                                 process_extent_item(root, extent_cache, buf,
6310                                                     i);
6311                                 continue;
6312                         }
6313                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6314                                 process_extent_item(root, extent_cache, buf,
6315                                                     i);
6316                                 continue;
6317                         }
6318                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6319                                 total_csum_bytes +=
6320                                         btrfs_item_size_nr(buf, i);
6321                                 continue;
6322                         }
6323                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6324                                 process_chunk_item(chunk_cache, &key, buf, i);
6325                                 continue;
6326                         }
6327                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6328                                 process_device_item(dev_cache, &key, buf, i);
6329                                 continue;
6330                         }
6331                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6332                                 process_block_group_item(block_group_cache,
6333                                         &key, buf, i);
6334                                 continue;
6335                         }
6336                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6337                                 process_device_extent_item(dev_extent_cache,
6338                                         &key, buf, i);
6339                                 continue;
6340
6341                         }
6342                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6343 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6344                                 process_extent_ref_v0(extent_cache, buf, i);
6345 #else
6346                                 BUG();
6347 #endif
6348                                 continue;
6349                         }
6350
6351                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6352                                 ret = add_tree_backref(extent_cache,
6353                                                 key.objectid, 0, key.offset, 0);
6354                                 if (ret < 0)
6355                                         error("add_tree_backref failed: %s",
6356                                               strerror(-ret));
6357                                 continue;
6358                         }
6359                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6360                                 ret = add_tree_backref(extent_cache,
6361                                                 key.objectid, key.offset, 0, 0);
6362                                 if (ret < 0)
6363                                         error("add_tree_backref failed: %s",
6364                                               strerror(-ret));
6365                                 continue;
6366                         }
6367                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6368                                 struct btrfs_extent_data_ref *ref;
6369                                 ref = btrfs_item_ptr(buf, i,
6370                                                 struct btrfs_extent_data_ref);
6371                                 add_data_backref(extent_cache,
6372                                         key.objectid, 0,
6373                                         btrfs_extent_data_ref_root(buf, ref),
6374                                         btrfs_extent_data_ref_objectid(buf,
6375                                                                        ref),
6376                                         btrfs_extent_data_ref_offset(buf, ref),
6377                                         btrfs_extent_data_ref_count(buf, ref),
6378                                         0, root->sectorsize);
6379                                 continue;
6380                         }
6381                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6382                                 struct btrfs_shared_data_ref *ref;
6383                                 ref = btrfs_item_ptr(buf, i,
6384                                                 struct btrfs_shared_data_ref);
6385                                 add_data_backref(extent_cache,
6386                                         key.objectid, key.offset, 0, 0, 0,
6387                                         btrfs_shared_data_ref_count(buf, ref),
6388                                         0, root->sectorsize);
6389                                 continue;
6390                         }
6391                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6392                                 struct bad_item *bad;
6393
6394                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6395                                         continue;
6396                                 if (!owner)
6397                                         continue;
6398                                 bad = malloc(sizeof(struct bad_item));
6399                                 if (!bad)
6400                                         continue;
6401                                 INIT_LIST_HEAD(&bad->list);
6402                                 memcpy(&bad->key, &key,
6403                                        sizeof(struct btrfs_key));
6404                                 bad->root_id = owner;
6405                                 list_add_tail(&bad->list, &delete_items);
6406                                 continue;
6407                         }
6408                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6409                                 continue;
6410                         fi = btrfs_item_ptr(buf, i,
6411                                             struct btrfs_file_extent_item);
6412                         if (btrfs_file_extent_type(buf, fi) ==
6413                             BTRFS_FILE_EXTENT_INLINE)
6414                                 continue;
6415                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6416                                 continue;
6417
6418                         data_bytes_allocated +=
6419                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6420                         if (data_bytes_allocated < root->sectorsize) {
6421                                 abort();
6422                         }
6423                         data_bytes_referenced +=
6424                                 btrfs_file_extent_num_bytes(buf, fi);
6425                         add_data_backref(extent_cache,
6426                                 btrfs_file_extent_disk_bytenr(buf, fi),
6427                                 parent, owner, key.objectid, key.offset -
6428                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6429                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6430                 }
6431         } else {
6432                 int level;
6433                 struct btrfs_key first_key;
6434
6435                 first_key.objectid = 0;
6436
6437                 if (nritems > 0)
6438                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6439                 level = btrfs_header_level(buf);
6440                 for (i = 0; i < nritems; i++) {
6441                         struct extent_record tmpl;
6442
6443                         ptr = btrfs_node_blockptr(buf, i);
6444                         size = root->nodesize;
6445                         btrfs_node_key_to_cpu(buf, &key, i);
6446                         if (ri != NULL) {
6447                                 if ((level == ri->drop_level)
6448                                     && is_dropped_key(&key, &ri->drop_key)) {
6449                                         continue;
6450                                 }
6451                         }
6452
6453                         memset(&tmpl, 0, sizeof(tmpl));
6454                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6455                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6456                         tmpl.start = ptr;
6457                         tmpl.nr = size;
6458                         tmpl.refs = 1;
6459                         tmpl.metadata = 1;
6460                         tmpl.max_size = size;
6461                         ret = add_extent_rec(extent_cache, &tmpl);
6462                         if (ret < 0)
6463                                 goto out;
6464
6465                         ret = add_tree_backref(extent_cache, ptr, parent,
6466                                         owner, 1);
6467                         if (ret < 0) {
6468                                 error("add_tree_backref failed: %s",
6469                                       strerror(-ret));
6470                                 continue;
6471                         }
6472
6473                         if (level > 1) {
6474                                 add_pending(nodes, seen, ptr, size);
6475                         } else {
6476                                 add_pending(pending, seen, ptr, size);
6477                         }
6478                 }
6479                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6480                                       nritems) * sizeof(struct btrfs_key_ptr);
6481         }
6482         total_btree_bytes += buf->len;
6483         if (fs_root_objectid(btrfs_header_owner(buf)))
6484                 total_fs_tree_bytes += buf->len;
6485         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6486                 total_extent_tree_bytes += buf->len;
6487         if (!found_old_backref &&
6488             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6489             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6490             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6491                 found_old_backref = 1;
6492 out:
6493         free_extent_buffer(buf);
6494         return ret;
6495 }
6496
6497 static int add_root_to_pending(struct extent_buffer *buf,
6498                                struct cache_tree *extent_cache,
6499                                struct cache_tree *pending,
6500                                struct cache_tree *seen,
6501                                struct cache_tree *nodes,
6502                                u64 objectid)
6503 {
6504         struct extent_record tmpl;
6505         int ret;
6506
6507         if (btrfs_header_level(buf) > 0)
6508                 add_pending(nodes, seen, buf->start, buf->len);
6509         else
6510                 add_pending(pending, seen, buf->start, buf->len);
6511
6512         memset(&tmpl, 0, sizeof(tmpl));
6513         tmpl.start = buf->start;
6514         tmpl.nr = buf->len;
6515         tmpl.is_root = 1;
6516         tmpl.refs = 1;
6517         tmpl.metadata = 1;
6518         tmpl.max_size = buf->len;
6519         add_extent_rec(extent_cache, &tmpl);
6520
6521         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6522             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6523                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6524                                 0, 1);
6525         else
6526                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6527                                 1);
6528         return ret;
6529 }
6530
6531 /* as we fix the tree, we might be deleting blocks that
6532  * we're tracking for repair.  This hook makes sure we
6533  * remove any backrefs for blocks as we are fixing them.
6534  */
6535 static int free_extent_hook(struct btrfs_trans_handle *trans,
6536                             struct btrfs_root *root,
6537                             u64 bytenr, u64 num_bytes, u64 parent,
6538                             u64 root_objectid, u64 owner, u64 offset,
6539                             int refs_to_drop)
6540 {
6541         struct extent_record *rec;
6542         struct cache_extent *cache;
6543         int is_data;
6544         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6545
6546         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6547         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6548         if (!cache)
6549                 return 0;
6550
6551         rec = container_of(cache, struct extent_record, cache);
6552         if (is_data) {
6553                 struct data_backref *back;
6554                 back = find_data_backref(rec, parent, root_objectid, owner,
6555                                          offset, 1, bytenr, num_bytes);
6556                 if (!back)
6557                         goto out;
6558                 if (back->node.found_ref) {
6559                         back->found_ref -= refs_to_drop;
6560                         if (rec->refs)
6561                                 rec->refs -= refs_to_drop;
6562                 }
6563                 if (back->node.found_extent_tree) {
6564                         back->num_refs -= refs_to_drop;
6565                         if (rec->extent_item_refs)
6566                                 rec->extent_item_refs -= refs_to_drop;
6567                 }
6568                 if (back->found_ref == 0)
6569                         back->node.found_ref = 0;
6570                 if (back->num_refs == 0)
6571                         back->node.found_extent_tree = 0;
6572
6573                 if (!back->node.found_extent_tree && back->node.found_ref) {
6574                         list_del(&back->node.list);
6575                         free(back);
6576                 }
6577         } else {
6578                 struct tree_backref *back;
6579                 back = find_tree_backref(rec, parent, root_objectid);
6580                 if (!back)
6581                         goto out;
6582                 if (back->node.found_ref) {
6583                         if (rec->refs)
6584                                 rec->refs--;
6585                         back->node.found_ref = 0;
6586                 }
6587                 if (back->node.found_extent_tree) {
6588                         if (rec->extent_item_refs)
6589                                 rec->extent_item_refs--;
6590                         back->node.found_extent_tree = 0;
6591                 }
6592                 if (!back->node.found_extent_tree && back->node.found_ref) {
6593                         list_del(&back->node.list);
6594                         free(back);
6595                 }
6596         }
6597         maybe_free_extent_rec(extent_cache, rec);
6598 out:
6599         return 0;
6600 }
6601
6602 static int delete_extent_records(struct btrfs_trans_handle *trans,
6603                                  struct btrfs_root *root,
6604                                  struct btrfs_path *path,
6605                                  u64 bytenr, u64 new_len)
6606 {
6607         struct btrfs_key key;
6608         struct btrfs_key found_key;
6609         struct extent_buffer *leaf;
6610         int ret;
6611         int slot;
6612
6613
6614         key.objectid = bytenr;
6615         key.type = (u8)-1;
6616         key.offset = (u64)-1;
6617
6618         while(1) {
6619                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6620                                         &key, path, 0, 1);
6621                 if (ret < 0)
6622                         break;
6623
6624                 if (ret > 0) {
6625                         ret = 0;
6626                         if (path->slots[0] == 0)
6627                                 break;
6628                         path->slots[0]--;
6629                 }
6630                 ret = 0;
6631
6632                 leaf = path->nodes[0];
6633                 slot = path->slots[0];
6634
6635                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6636                 if (found_key.objectid != bytenr)
6637                         break;
6638
6639                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6640                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6641                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6642                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6643                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6644                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6645                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6646                         btrfs_release_path(path);
6647                         if (found_key.type == 0) {
6648                                 if (found_key.offset == 0)
6649                                         break;
6650                                 key.offset = found_key.offset - 1;
6651                                 key.type = found_key.type;
6652                         }
6653                         key.type = found_key.type - 1;
6654                         key.offset = (u64)-1;
6655                         continue;
6656                 }
6657
6658                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6659                         found_key.objectid, found_key.type, found_key.offset);
6660
6661                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6662                 if (ret)
6663                         break;
6664                 btrfs_release_path(path);
6665
6666                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6667                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6668                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6669                                 found_key.offset : root->nodesize;
6670
6671                         ret = btrfs_update_block_group(trans, root, bytenr,
6672                                                        bytes, 0, 0);
6673                         if (ret)
6674                                 break;
6675                 }
6676         }
6677
6678         btrfs_release_path(path);
6679         return ret;
6680 }
6681
6682 /*
6683  * for a single backref, this will allocate a new extent
6684  * and add the backref to it.
6685  */
6686 static int record_extent(struct btrfs_trans_handle *trans,
6687                          struct btrfs_fs_info *info,
6688                          struct btrfs_path *path,
6689                          struct extent_record *rec,
6690                          struct extent_backref *back,
6691                          int allocated, u64 flags)
6692 {
6693         int ret;
6694         struct btrfs_root *extent_root = info->extent_root;
6695         struct extent_buffer *leaf;
6696         struct btrfs_key ins_key;
6697         struct btrfs_extent_item *ei;
6698         struct tree_backref *tback;
6699         struct data_backref *dback;
6700         struct btrfs_tree_block_info *bi;
6701
6702         if (!back->is_data)
6703                 rec->max_size = max_t(u64, rec->max_size,
6704                                     info->extent_root->nodesize);
6705
6706         if (!allocated) {
6707                 u32 item_size = sizeof(*ei);
6708
6709                 if (!back->is_data)
6710                         item_size += sizeof(*bi);
6711
6712                 ins_key.objectid = rec->start;
6713                 ins_key.offset = rec->max_size;
6714                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6715
6716                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6717                                         &ins_key, item_size);
6718                 if (ret)
6719                         goto fail;
6720
6721                 leaf = path->nodes[0];
6722                 ei = btrfs_item_ptr(leaf, path->slots[0],
6723                                     struct btrfs_extent_item);
6724
6725                 btrfs_set_extent_refs(leaf, ei, 0);
6726                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6727
6728                 if (back->is_data) {
6729                         btrfs_set_extent_flags(leaf, ei,
6730                                                BTRFS_EXTENT_FLAG_DATA);
6731                 } else {
6732                         struct btrfs_disk_key copy_key;;
6733
6734                         tback = to_tree_backref(back);
6735                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6736                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6737                                              sizeof(*bi));
6738
6739                         btrfs_set_disk_key_objectid(&copy_key,
6740                                                     rec->info_objectid);
6741                         btrfs_set_disk_key_type(&copy_key, 0);
6742                         btrfs_set_disk_key_offset(&copy_key, 0);
6743
6744                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6745                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6746
6747                         btrfs_set_extent_flags(leaf, ei,
6748                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6749                 }
6750
6751                 btrfs_mark_buffer_dirty(leaf);
6752                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6753                                                rec->max_size, 1, 0);
6754                 if (ret)
6755                         goto fail;
6756                 btrfs_release_path(path);
6757         }
6758
6759         if (back->is_data) {
6760                 u64 parent;
6761                 int i;
6762
6763                 dback = to_data_backref(back);
6764                 if (back->full_backref)
6765                         parent = dback->parent;
6766                 else
6767                         parent = 0;
6768
6769                 for (i = 0; i < dback->found_ref; i++) {
6770                         /* if parent != 0, we're doing a full backref
6771                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6772                          * just makes the backref allocator create a data
6773                          * backref
6774                          */
6775                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6776                                                    rec->start, rec->max_size,
6777                                                    parent,
6778                                                    dback->root,
6779                                                    parent ?
6780                                                    BTRFS_FIRST_FREE_OBJECTID :
6781                                                    dback->owner,
6782                                                    dback->offset);
6783                         if (ret)
6784                                 break;
6785                 }
6786                 fprintf(stderr, "adding new data backref"
6787                                 " on %llu %s %llu owner %llu"
6788                                 " offset %llu found %d\n",
6789                                 (unsigned long long)rec->start,
6790                                 back->full_backref ?
6791                                 "parent" : "root",
6792                                 back->full_backref ?
6793                                 (unsigned long long)parent :
6794                                 (unsigned long long)dback->root,
6795                                 (unsigned long long)dback->owner,
6796                                 (unsigned long long)dback->offset,
6797                                 dback->found_ref);
6798         } else {
6799                 u64 parent;
6800
6801                 tback = to_tree_backref(back);
6802                 if (back->full_backref)
6803                         parent = tback->parent;
6804                 else
6805                         parent = 0;
6806
6807                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6808                                            rec->start, rec->max_size,
6809                                            parent, tback->root, 0, 0);
6810                 fprintf(stderr, "adding new tree backref on "
6811                         "start %llu len %llu parent %llu root %llu\n",
6812                         rec->start, rec->max_size, parent, tback->root);
6813         }
6814 fail:
6815         btrfs_release_path(path);
6816         return ret;
6817 }
6818
6819 static struct extent_entry *find_entry(struct list_head *entries,
6820                                        u64 bytenr, u64 bytes)
6821 {
6822         struct extent_entry *entry = NULL;
6823
6824         list_for_each_entry(entry, entries, list) {
6825                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6826                         return entry;
6827         }
6828
6829         return NULL;
6830 }
6831
6832 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6833 {
6834         struct extent_entry *entry, *best = NULL, *prev = NULL;
6835
6836         list_for_each_entry(entry, entries, list) {
6837                 if (!prev) {
6838                         prev = entry;
6839                         continue;
6840                 }
6841
6842                 /*
6843                  * If there are as many broken entries as entries then we know
6844                  * not to trust this particular entry.
6845                  */
6846                 if (entry->broken == entry->count)
6847                         continue;
6848
6849                 /*
6850                  * If our current entry == best then we can't be sure our best
6851                  * is really the best, so we need to keep searching.
6852                  */
6853                 if (best && best->count == entry->count) {
6854                         prev = entry;
6855                         best = NULL;
6856                         continue;
6857                 }
6858
6859                 /* Prev == entry, not good enough, have to keep searching */
6860                 if (!prev->broken && prev->count == entry->count)
6861                         continue;
6862
6863                 if (!best)
6864                         best = (prev->count > entry->count) ? prev : entry;
6865                 else if (best->count < entry->count)
6866                         best = entry;
6867                 prev = entry;
6868         }
6869
6870         return best;
6871 }
6872
6873 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6874                       struct data_backref *dback, struct extent_entry *entry)
6875 {
6876         struct btrfs_trans_handle *trans;
6877         struct btrfs_root *root;
6878         struct btrfs_file_extent_item *fi;
6879         struct extent_buffer *leaf;
6880         struct btrfs_key key;
6881         u64 bytenr, bytes;
6882         int ret, err;
6883
6884         key.objectid = dback->root;
6885         key.type = BTRFS_ROOT_ITEM_KEY;
6886         key.offset = (u64)-1;
6887         root = btrfs_read_fs_root(info, &key);
6888         if (IS_ERR(root)) {
6889                 fprintf(stderr, "Couldn't find root for our ref\n");
6890                 return -EINVAL;
6891         }
6892
6893         /*
6894          * The backref points to the original offset of the extent if it was
6895          * split, so we need to search down to the offset we have and then walk
6896          * forward until we find the backref we're looking for.
6897          */
6898         key.objectid = dback->owner;
6899         key.type = BTRFS_EXTENT_DATA_KEY;
6900         key.offset = dback->offset;
6901         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6902         if (ret < 0) {
6903                 fprintf(stderr, "Error looking up ref %d\n", ret);
6904                 return ret;
6905         }
6906
6907         while (1) {
6908                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6909                         ret = btrfs_next_leaf(root, path);
6910                         if (ret) {
6911                                 fprintf(stderr, "Couldn't find our ref, next\n");
6912                                 return -EINVAL;
6913                         }
6914                 }
6915                 leaf = path->nodes[0];
6916                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6917                 if (key.objectid != dback->owner ||
6918                     key.type != BTRFS_EXTENT_DATA_KEY) {
6919                         fprintf(stderr, "Couldn't find our ref, search\n");
6920                         return -EINVAL;
6921                 }
6922                 fi = btrfs_item_ptr(leaf, path->slots[0],
6923                                     struct btrfs_file_extent_item);
6924                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6925                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6926
6927                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6928                         break;
6929                 path->slots[0]++;
6930         }
6931
6932         btrfs_release_path(path);
6933
6934         trans = btrfs_start_transaction(root, 1);
6935         if (IS_ERR(trans))
6936                 return PTR_ERR(trans);
6937
6938         /*
6939          * Ok we have the key of the file extent we want to fix, now we can cow
6940          * down to the thing and fix it.
6941          */
6942         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6943         if (ret < 0) {
6944                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6945                         key.objectid, key.type, key.offset, ret);
6946                 goto out;
6947         }
6948         if (ret > 0) {
6949                 fprintf(stderr, "Well that's odd, we just found this key "
6950                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6951                         key.offset);
6952                 ret = -EINVAL;
6953                 goto out;
6954         }
6955         leaf = path->nodes[0];
6956         fi = btrfs_item_ptr(leaf, path->slots[0],
6957                             struct btrfs_file_extent_item);
6958
6959         if (btrfs_file_extent_compression(leaf, fi) &&
6960             dback->disk_bytenr != entry->bytenr) {
6961                 fprintf(stderr, "Ref doesn't match the record start and is "
6962                         "compressed, please take a btrfs-image of this file "
6963                         "system and send it to a btrfs developer so they can "
6964                         "complete this functionality for bytenr %Lu\n",
6965                         dback->disk_bytenr);
6966                 ret = -EINVAL;
6967                 goto out;
6968         }
6969
6970         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6971                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6972         } else if (dback->disk_bytenr > entry->bytenr) {
6973                 u64 off_diff, offset;
6974
6975                 off_diff = dback->disk_bytenr - entry->bytenr;
6976                 offset = btrfs_file_extent_offset(leaf, fi);
6977                 if (dback->disk_bytenr + offset +
6978                     btrfs_file_extent_num_bytes(leaf, fi) >
6979                     entry->bytenr + entry->bytes) {
6980                         fprintf(stderr, "Ref is past the entry end, please "
6981                                 "take a btrfs-image of this file system and "
6982                                 "send it to a btrfs developer, ref %Lu\n",
6983                                 dback->disk_bytenr);
6984                         ret = -EINVAL;
6985                         goto out;
6986                 }
6987                 offset += off_diff;
6988                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6989                 btrfs_set_file_extent_offset(leaf, fi, offset);
6990         } else if (dback->disk_bytenr < entry->bytenr) {
6991                 u64 offset;
6992
6993                 offset = btrfs_file_extent_offset(leaf, fi);
6994                 if (dback->disk_bytenr + offset < entry->bytenr) {
6995                         fprintf(stderr, "Ref is before the entry start, please"
6996                                 " take a btrfs-image of this file system and "
6997                                 "send it to a btrfs developer, ref %Lu\n",
6998                                 dback->disk_bytenr);
6999                         ret = -EINVAL;
7000                         goto out;
7001                 }
7002
7003                 offset += dback->disk_bytenr;
7004                 offset -= entry->bytenr;
7005                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7006                 btrfs_set_file_extent_offset(leaf, fi, offset);
7007         }
7008
7009         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7010
7011         /*
7012          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7013          * only do this if we aren't using compression, otherwise it's a
7014          * trickier case.
7015          */
7016         if (!btrfs_file_extent_compression(leaf, fi))
7017                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7018         else
7019                 printf("ram bytes may be wrong?\n");
7020         btrfs_mark_buffer_dirty(leaf);
7021 out:
7022         err = btrfs_commit_transaction(trans, root);
7023         btrfs_release_path(path);
7024         return ret ? ret : err;
7025 }
7026
7027 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7028                            struct extent_record *rec)
7029 {
7030         struct extent_backref *back;
7031         struct data_backref *dback;
7032         struct extent_entry *entry, *best = NULL;
7033         LIST_HEAD(entries);
7034         int nr_entries = 0;
7035         int broken_entries = 0;
7036         int ret = 0;
7037         short mismatch = 0;
7038
7039         /*
7040          * Metadata is easy and the backrefs should always agree on bytenr and
7041          * size, if not we've got bigger issues.
7042          */
7043         if (rec->metadata)
7044                 return 0;
7045
7046         list_for_each_entry(back, &rec->backrefs, list) {
7047                 if (back->full_backref || !back->is_data)
7048                         continue;
7049
7050                 dback = to_data_backref(back);
7051
7052                 /*
7053                  * We only pay attention to backrefs that we found a real
7054                  * backref for.
7055                  */
7056                 if (dback->found_ref == 0)
7057                         continue;
7058
7059                 /*
7060                  * For now we only catch when the bytes don't match, not the
7061                  * bytenr.  We can easily do this at the same time, but I want
7062                  * to have a fs image to test on before we just add repair
7063                  * functionality willy-nilly so we know we won't screw up the
7064                  * repair.
7065                  */
7066
7067                 entry = find_entry(&entries, dback->disk_bytenr,
7068                                    dback->bytes);
7069                 if (!entry) {
7070                         entry = malloc(sizeof(struct extent_entry));
7071                         if (!entry) {
7072                                 ret = -ENOMEM;
7073                                 goto out;
7074                         }
7075                         memset(entry, 0, sizeof(*entry));
7076                         entry->bytenr = dback->disk_bytenr;
7077                         entry->bytes = dback->bytes;
7078                         list_add_tail(&entry->list, &entries);
7079                         nr_entries++;
7080                 }
7081
7082                 /*
7083                  * If we only have on entry we may think the entries agree when
7084                  * in reality they don't so we have to do some extra checking.
7085                  */
7086                 if (dback->disk_bytenr != rec->start ||
7087                     dback->bytes != rec->nr || back->broken)
7088                         mismatch = 1;
7089
7090                 if (back->broken) {
7091                         entry->broken++;
7092                         broken_entries++;
7093                 }
7094
7095                 entry->count++;
7096         }
7097
7098         /* Yay all the backrefs agree, carry on good sir */
7099         if (nr_entries <= 1 && !mismatch)
7100                 goto out;
7101
7102         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7103                 "%Lu\n", rec->start);
7104
7105         /*
7106          * First we want to see if the backrefs can agree amongst themselves who
7107          * is right, so figure out which one of the entries has the highest
7108          * count.
7109          */
7110         best = find_most_right_entry(&entries);
7111
7112         /*
7113          * Ok so we may have an even split between what the backrefs think, so
7114          * this is where we use the extent ref to see what it thinks.
7115          */
7116         if (!best) {
7117                 entry = find_entry(&entries, rec->start, rec->nr);
7118                 if (!entry && (!broken_entries || !rec->found_rec)) {
7119                         fprintf(stderr, "Backrefs don't agree with each other "
7120                                 "and extent record doesn't agree with anybody,"
7121                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7122                                 rec->start, rec->nr);
7123                         ret = -EINVAL;
7124                         goto out;
7125                 } else if (!entry) {
7126                         /*
7127                          * Ok our backrefs were broken, we'll assume this is the
7128                          * correct value and add an entry for this range.
7129                          */
7130                         entry = malloc(sizeof(struct extent_entry));
7131                         if (!entry) {
7132                                 ret = -ENOMEM;
7133                                 goto out;
7134                         }
7135                         memset(entry, 0, sizeof(*entry));
7136                         entry->bytenr = rec->start;
7137                         entry->bytes = rec->nr;
7138                         list_add_tail(&entry->list, &entries);
7139                         nr_entries++;
7140                 }
7141                 entry->count++;
7142                 best = find_most_right_entry(&entries);
7143                 if (!best) {
7144                         fprintf(stderr, "Backrefs and extent record evenly "
7145                                 "split on who is right, this is going to "
7146                                 "require user input to fix bytenr %Lu bytes "
7147                                 "%Lu\n", rec->start, rec->nr);
7148                         ret = -EINVAL;
7149                         goto out;
7150                 }
7151         }
7152
7153         /*
7154          * I don't think this can happen currently as we'll abort() if we catch
7155          * this case higher up, but in case somebody removes that we still can't
7156          * deal with it properly here yet, so just bail out of that's the case.
7157          */
7158         if (best->bytenr != rec->start) {
7159                 fprintf(stderr, "Extent start and backref starts don't match, "
7160                         "please use btrfs-image on this file system and send "
7161                         "it to a btrfs developer so they can make fsck fix "
7162                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7163                         rec->start, rec->nr);
7164                 ret = -EINVAL;
7165                 goto out;
7166         }
7167
7168         /*
7169          * Ok great we all agreed on an extent record, let's go find the real
7170          * references and fix up the ones that don't match.
7171          */
7172         list_for_each_entry(back, &rec->backrefs, list) {
7173                 if (back->full_backref || !back->is_data)
7174                         continue;
7175
7176                 dback = to_data_backref(back);
7177
7178                 /*
7179                  * Still ignoring backrefs that don't have a real ref attached
7180                  * to them.
7181                  */
7182                 if (dback->found_ref == 0)
7183                         continue;
7184
7185                 if (dback->bytes == best->bytes &&
7186                     dback->disk_bytenr == best->bytenr)
7187                         continue;
7188
7189                 ret = repair_ref(info, path, dback, best);
7190                 if (ret)
7191                         goto out;
7192         }
7193
7194         /*
7195          * Ok we messed with the actual refs, which means we need to drop our
7196          * entire cache and go back and rescan.  I know this is a huge pain and
7197          * adds a lot of extra work, but it's the only way to be safe.  Once all
7198          * the backrefs agree we may not need to do anything to the extent
7199          * record itself.
7200          */
7201         ret = -EAGAIN;
7202 out:
7203         while (!list_empty(&entries)) {
7204                 entry = list_entry(entries.next, struct extent_entry, list);
7205                 list_del_init(&entry->list);
7206                 free(entry);
7207         }
7208         return ret;
7209 }
7210
7211 static int process_duplicates(struct btrfs_root *root,
7212                               struct cache_tree *extent_cache,
7213                               struct extent_record *rec)
7214 {
7215         struct extent_record *good, *tmp;
7216         struct cache_extent *cache;
7217         int ret;
7218
7219         /*
7220          * If we found a extent record for this extent then return, or if we
7221          * have more than one duplicate we are likely going to need to delete
7222          * something.
7223          */
7224         if (rec->found_rec || rec->num_duplicates > 1)
7225                 return 0;
7226
7227         /* Shouldn't happen but just in case */
7228         BUG_ON(!rec->num_duplicates);
7229
7230         /*
7231          * So this happens if we end up with a backref that doesn't match the
7232          * actual extent entry.  So either the backref is bad or the extent
7233          * entry is bad.  Either way we want to have the extent_record actually
7234          * reflect what we found in the extent_tree, so we need to take the
7235          * duplicate out and use that as the extent_record since the only way we
7236          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7237          */
7238         remove_cache_extent(extent_cache, &rec->cache);
7239
7240         good = to_extent_record(rec->dups.next);
7241         list_del_init(&good->list);
7242         INIT_LIST_HEAD(&good->backrefs);
7243         INIT_LIST_HEAD(&good->dups);
7244         good->cache.start = good->start;
7245         good->cache.size = good->nr;
7246         good->content_checked = 0;
7247         good->owner_ref_checked = 0;
7248         good->num_duplicates = 0;
7249         good->refs = rec->refs;
7250         list_splice_init(&rec->backrefs, &good->backrefs);
7251         while (1) {
7252                 cache = lookup_cache_extent(extent_cache, good->start,
7253                                             good->nr);
7254                 if (!cache)
7255                         break;
7256                 tmp = container_of(cache, struct extent_record, cache);
7257
7258                 /*
7259                  * If we find another overlapping extent and it's found_rec is
7260                  * set then it's a duplicate and we need to try and delete
7261                  * something.
7262                  */
7263                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7264                         if (list_empty(&good->list))
7265                                 list_add_tail(&good->list,
7266                                               &duplicate_extents);
7267                         good->num_duplicates += tmp->num_duplicates + 1;
7268                         list_splice_init(&tmp->dups, &good->dups);
7269                         list_del_init(&tmp->list);
7270                         list_add_tail(&tmp->list, &good->dups);
7271                         remove_cache_extent(extent_cache, &tmp->cache);
7272                         continue;
7273                 }
7274
7275                 /*
7276                  * Ok we have another non extent item backed extent rec, so lets
7277                  * just add it to this extent and carry on like we did above.
7278                  */
7279                 good->refs += tmp->refs;
7280                 list_splice_init(&tmp->backrefs, &good->backrefs);
7281                 remove_cache_extent(extent_cache, &tmp->cache);
7282                 free(tmp);
7283         }
7284         ret = insert_cache_extent(extent_cache, &good->cache);
7285         BUG_ON(ret);
7286         free(rec);
7287         return good->num_duplicates ? 0 : 1;
7288 }
7289
7290 static int delete_duplicate_records(struct btrfs_root *root,
7291                                     struct extent_record *rec)
7292 {
7293         struct btrfs_trans_handle *trans;
7294         LIST_HEAD(delete_list);
7295         struct btrfs_path *path;
7296         struct extent_record *tmp, *good, *n;
7297         int nr_del = 0;
7298         int ret = 0, err;
7299         struct btrfs_key key;
7300
7301         path = btrfs_alloc_path();
7302         if (!path) {
7303                 ret = -ENOMEM;
7304                 goto out;
7305         }
7306
7307         good = rec;
7308         /* Find the record that covers all of the duplicates. */
7309         list_for_each_entry(tmp, &rec->dups, list) {
7310                 if (good->start < tmp->start)
7311                         continue;
7312                 if (good->nr > tmp->nr)
7313                         continue;
7314
7315                 if (tmp->start + tmp->nr < good->start + good->nr) {
7316                         fprintf(stderr, "Ok we have overlapping extents that "
7317                                 "aren't completely covered by each other, this "
7318                                 "is going to require more careful thought.  "
7319                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7320                                 tmp->start, tmp->nr, good->start, good->nr);
7321                         abort();
7322                 }
7323                 good = tmp;
7324         }
7325
7326         if (good != rec)
7327                 list_add_tail(&rec->list, &delete_list);
7328
7329         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7330                 if (tmp == good)
7331                         continue;
7332                 list_move_tail(&tmp->list, &delete_list);
7333         }
7334
7335         root = root->fs_info->extent_root;
7336         trans = btrfs_start_transaction(root, 1);
7337         if (IS_ERR(trans)) {
7338                 ret = PTR_ERR(trans);
7339                 goto out;
7340         }
7341
7342         list_for_each_entry(tmp, &delete_list, list) {
7343                 if (tmp->found_rec == 0)
7344                         continue;
7345                 key.objectid = tmp->start;
7346                 key.type = BTRFS_EXTENT_ITEM_KEY;
7347                 key.offset = tmp->nr;
7348
7349                 /* Shouldn't happen but just in case */
7350                 if (tmp->metadata) {
7351                         fprintf(stderr, "Well this shouldn't happen, extent "
7352                                 "record overlaps but is metadata? "
7353                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7354                         abort();
7355                 }
7356
7357                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7358                 if (ret) {
7359                         if (ret > 0)
7360                                 ret = -EINVAL;
7361                         break;
7362                 }
7363                 ret = btrfs_del_item(trans, root, path);
7364                 if (ret)
7365                         break;
7366                 btrfs_release_path(path);
7367                 nr_del++;
7368         }
7369         err = btrfs_commit_transaction(trans, root);
7370         if (err && !ret)
7371                 ret = err;
7372 out:
7373         while (!list_empty(&delete_list)) {
7374                 tmp = to_extent_record(delete_list.next);
7375                 list_del_init(&tmp->list);
7376                 if (tmp == rec)
7377                         continue;
7378                 free(tmp);
7379         }
7380
7381         while (!list_empty(&rec->dups)) {
7382                 tmp = to_extent_record(rec->dups.next);
7383                 list_del_init(&tmp->list);
7384                 free(tmp);
7385         }
7386
7387         btrfs_free_path(path);
7388
7389         if (!ret && !nr_del)
7390                 rec->num_duplicates = 0;
7391
7392         return ret ? ret : nr_del;
7393 }
7394
7395 static int find_possible_backrefs(struct btrfs_fs_info *info,
7396                                   struct btrfs_path *path,
7397                                   struct cache_tree *extent_cache,
7398                                   struct extent_record *rec)
7399 {
7400         struct btrfs_root *root;
7401         struct extent_backref *back;
7402         struct data_backref *dback;
7403         struct cache_extent *cache;
7404         struct btrfs_file_extent_item *fi;
7405         struct btrfs_key key;
7406         u64 bytenr, bytes;
7407         int ret;
7408
7409         list_for_each_entry(back, &rec->backrefs, list) {
7410                 /* Don't care about full backrefs (poor unloved backrefs) */
7411                 if (back->full_backref || !back->is_data)
7412                         continue;
7413
7414                 dback = to_data_backref(back);
7415
7416                 /* We found this one, we don't need to do a lookup */
7417                 if (dback->found_ref)
7418                         continue;
7419
7420                 key.objectid = dback->root;
7421                 key.type = BTRFS_ROOT_ITEM_KEY;
7422                 key.offset = (u64)-1;
7423
7424                 root = btrfs_read_fs_root(info, &key);
7425
7426                 /* No root, definitely a bad ref, skip */
7427                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7428                         continue;
7429                 /* Other err, exit */
7430                 if (IS_ERR(root))
7431                         return PTR_ERR(root);
7432
7433                 key.objectid = dback->owner;
7434                 key.type = BTRFS_EXTENT_DATA_KEY;
7435                 key.offset = dback->offset;
7436                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7437                 if (ret) {
7438                         btrfs_release_path(path);
7439                         if (ret < 0)
7440                                 return ret;
7441                         /* Didn't find it, we can carry on */
7442                         ret = 0;
7443                         continue;
7444                 }
7445
7446                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7447                                     struct btrfs_file_extent_item);
7448                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7449                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7450                 btrfs_release_path(path);
7451                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7452                 if (cache) {
7453                         struct extent_record *tmp;
7454                         tmp = container_of(cache, struct extent_record, cache);
7455
7456                         /*
7457                          * If we found an extent record for the bytenr for this
7458                          * particular backref then we can't add it to our
7459                          * current extent record.  We only want to add backrefs
7460                          * that don't have a corresponding extent item in the
7461                          * extent tree since they likely belong to this record
7462                          * and we need to fix it if it doesn't match bytenrs.
7463                          */
7464                         if  (tmp->found_rec)
7465                                 continue;
7466                 }
7467
7468                 dback->found_ref += 1;
7469                 dback->disk_bytenr = bytenr;
7470                 dback->bytes = bytes;
7471
7472                 /*
7473                  * Set this so the verify backref code knows not to trust the
7474                  * values in this backref.
7475                  */
7476                 back->broken = 1;
7477         }
7478
7479         return 0;
7480 }
7481
7482 /*
7483  * Record orphan data ref into corresponding root.
7484  *
7485  * Return 0 if the extent item contains data ref and recorded.
7486  * Return 1 if the extent item contains no useful data ref
7487  *   On that case, it may contains only shared_dataref or metadata backref
7488  *   or the file extent exists(this should be handled by the extent bytenr
7489  *   recovery routine)
7490  * Return <0 if something goes wrong.
7491  */
7492 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7493                                       struct extent_record *rec)
7494 {
7495         struct btrfs_key key;
7496         struct btrfs_root *dest_root;
7497         struct extent_backref *back;
7498         struct data_backref *dback;
7499         struct orphan_data_extent *orphan;
7500         struct btrfs_path *path;
7501         int recorded_data_ref = 0;
7502         int ret = 0;
7503
7504         if (rec->metadata)
7505                 return 1;
7506         path = btrfs_alloc_path();
7507         if (!path)
7508                 return -ENOMEM;
7509         list_for_each_entry(back, &rec->backrefs, list) {
7510                 if (back->full_backref || !back->is_data ||
7511                     !back->found_extent_tree)
7512                         continue;
7513                 dback = to_data_backref(back);
7514                 if (dback->found_ref)
7515                         continue;
7516                 key.objectid = dback->root;
7517                 key.type = BTRFS_ROOT_ITEM_KEY;
7518                 key.offset = (u64)-1;
7519
7520                 dest_root = btrfs_read_fs_root(fs_info, &key);
7521
7522                 /* For non-exist root we just skip it */
7523                 if (IS_ERR(dest_root) || !dest_root)
7524                         continue;
7525
7526                 key.objectid = dback->owner;
7527                 key.type = BTRFS_EXTENT_DATA_KEY;
7528                 key.offset = dback->offset;
7529
7530                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7531                 /*
7532                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7533                  * we need to record it for inode/file extent rebuild.
7534                  * For ret > 0, we record it only for file extent rebuild.
7535                  * For ret == 0, the file extent exists but only bytenr
7536                  * mismatch, let the original bytenr fix routine to handle,
7537                  * don't record it.
7538                  */
7539                 if (ret == 0)
7540                         continue;
7541                 ret = 0;
7542                 orphan = malloc(sizeof(*orphan));
7543                 if (!orphan) {
7544                         ret = -ENOMEM;
7545                         goto out;
7546                 }
7547                 INIT_LIST_HEAD(&orphan->list);
7548                 orphan->root = dback->root;
7549                 orphan->objectid = dback->owner;
7550                 orphan->offset = dback->offset;
7551                 orphan->disk_bytenr = rec->cache.start;
7552                 orphan->disk_len = rec->cache.size;
7553                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7554                 recorded_data_ref = 1;
7555         }
7556 out:
7557         btrfs_free_path(path);
7558         if (!ret)
7559                 return !recorded_data_ref;
7560         else
7561                 return ret;
7562 }
7563
7564 /*
7565  * when an incorrect extent item is found, this will delete
7566  * all of the existing entries for it and recreate them
7567  * based on what the tree scan found.
7568  */
7569 static int fixup_extent_refs(struct btrfs_fs_info *info,
7570                              struct cache_tree *extent_cache,
7571                              struct extent_record *rec)
7572 {
7573         struct btrfs_trans_handle *trans = NULL;
7574         int ret;
7575         struct btrfs_path *path;
7576         struct list_head *cur = rec->backrefs.next;
7577         struct cache_extent *cache;
7578         struct extent_backref *back;
7579         int allocated = 0;
7580         u64 flags = 0;
7581
7582         if (rec->flag_block_full_backref)
7583                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7584
7585         path = btrfs_alloc_path();
7586         if (!path)
7587                 return -ENOMEM;
7588
7589         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7590                 /*
7591                  * Sometimes the backrefs themselves are so broken they don't
7592                  * get attached to any meaningful rec, so first go back and
7593                  * check any of our backrefs that we couldn't find and throw
7594                  * them into the list if we find the backref so that
7595                  * verify_backrefs can figure out what to do.
7596                  */
7597                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7598                 if (ret < 0)
7599                         goto out;
7600         }
7601
7602         /* step one, make sure all of the backrefs agree */
7603         ret = verify_backrefs(info, path, rec);
7604         if (ret < 0)
7605                 goto out;
7606
7607         trans = btrfs_start_transaction(info->extent_root, 1);
7608         if (IS_ERR(trans)) {
7609                 ret = PTR_ERR(trans);
7610                 goto out;
7611         }
7612
7613         /* step two, delete all the existing records */
7614         ret = delete_extent_records(trans, info->extent_root, path,
7615                                     rec->start, rec->max_size);
7616
7617         if (ret < 0)
7618                 goto out;
7619
7620         /* was this block corrupt?  If so, don't add references to it */
7621         cache = lookup_cache_extent(info->corrupt_blocks,
7622                                     rec->start, rec->max_size);
7623         if (cache) {
7624                 ret = 0;
7625                 goto out;
7626         }
7627
7628         /* step three, recreate all the refs we did find */
7629         while(cur != &rec->backrefs) {
7630                 back = to_extent_backref(cur);
7631                 cur = cur->next;
7632
7633                 /*
7634                  * if we didn't find any references, don't create a
7635                  * new extent record
7636                  */
7637                 if (!back->found_ref)
7638                         continue;
7639
7640                 rec->bad_full_backref = 0;
7641                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7642                 allocated = 1;
7643
7644                 if (ret)
7645                         goto out;
7646         }
7647 out:
7648         if (trans) {
7649                 int err = btrfs_commit_transaction(trans, info->extent_root);
7650                 if (!ret)
7651                         ret = err;
7652         }
7653
7654         btrfs_free_path(path);
7655         return ret;
7656 }
7657
7658 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7659                               struct extent_record *rec)
7660 {
7661         struct btrfs_trans_handle *trans;
7662         struct btrfs_root *root = fs_info->extent_root;
7663         struct btrfs_path *path;
7664         struct btrfs_extent_item *ei;
7665         struct btrfs_key key;
7666         u64 flags;
7667         int ret = 0;
7668
7669         key.objectid = rec->start;
7670         if (rec->metadata) {
7671                 key.type = BTRFS_METADATA_ITEM_KEY;
7672                 key.offset = rec->info_level;
7673         } else {
7674                 key.type = BTRFS_EXTENT_ITEM_KEY;
7675                 key.offset = rec->max_size;
7676         }
7677
7678         path = btrfs_alloc_path();
7679         if (!path)
7680                 return -ENOMEM;
7681
7682         trans = btrfs_start_transaction(root, 0);
7683         if (IS_ERR(trans)) {
7684                 btrfs_free_path(path);
7685                 return PTR_ERR(trans);
7686         }
7687
7688         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7689         if (ret < 0) {
7690                 btrfs_free_path(path);
7691                 btrfs_commit_transaction(trans, root);
7692                 return ret;
7693         } else if (ret) {
7694                 fprintf(stderr, "Didn't find extent for %llu\n",
7695                         (unsigned long long)rec->start);
7696                 btrfs_free_path(path);
7697                 btrfs_commit_transaction(trans, root);
7698                 return -ENOENT;
7699         }
7700
7701         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7702                             struct btrfs_extent_item);
7703         flags = btrfs_extent_flags(path->nodes[0], ei);
7704         if (rec->flag_block_full_backref) {
7705                 fprintf(stderr, "setting full backref on %llu\n",
7706                         (unsigned long long)key.objectid);
7707                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7708         } else {
7709                 fprintf(stderr, "clearing full backref on %llu\n",
7710                         (unsigned long long)key.objectid);
7711                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7712         }
7713         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7714         btrfs_mark_buffer_dirty(path->nodes[0]);
7715         btrfs_free_path(path);
7716         return btrfs_commit_transaction(trans, root);
7717 }
7718
7719 /* right now we only prune from the extent allocation tree */
7720 static int prune_one_block(struct btrfs_trans_handle *trans,
7721                            struct btrfs_fs_info *info,
7722                            struct btrfs_corrupt_block *corrupt)
7723 {
7724         int ret;
7725         struct btrfs_path path;
7726         struct extent_buffer *eb;
7727         u64 found;
7728         int slot;
7729         int nritems;
7730         int level = corrupt->level + 1;
7731
7732         btrfs_init_path(&path);
7733 again:
7734         /* we want to stop at the parent to our busted block */
7735         path.lowest_level = level;
7736
7737         ret = btrfs_search_slot(trans, info->extent_root,
7738                                 &corrupt->key, &path, -1, 1);
7739
7740         if (ret < 0)
7741                 goto out;
7742
7743         eb = path.nodes[level];
7744         if (!eb) {
7745                 ret = -ENOENT;
7746                 goto out;
7747         }
7748
7749         /*
7750          * hopefully the search gave us the block we want to prune,
7751          * lets try that first
7752          */
7753         slot = path.slots[level];
7754         found =  btrfs_node_blockptr(eb, slot);
7755         if (found == corrupt->cache.start)
7756                 goto del_ptr;
7757
7758         nritems = btrfs_header_nritems(eb);
7759
7760         /* the search failed, lets scan this node and hope we find it */
7761         for (slot = 0; slot < nritems; slot++) {
7762                 found =  btrfs_node_blockptr(eb, slot);
7763                 if (found == corrupt->cache.start)
7764                         goto del_ptr;
7765         }
7766         /*
7767          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7768          * to this block
7769          */
7770         if (eb == info->extent_root->node) {
7771                 ret = -ENOENT;
7772                 goto out;
7773         } else {
7774                 level++;
7775                 btrfs_release_path(&path);
7776                 goto again;
7777         }
7778
7779 del_ptr:
7780         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7781         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7782
7783 out:
7784         btrfs_release_path(&path);
7785         return ret;
7786 }
7787
7788 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7789 {
7790         struct btrfs_trans_handle *trans = NULL;
7791         struct cache_extent *cache;
7792         struct btrfs_corrupt_block *corrupt;
7793
7794         while (1) {
7795                 cache = search_cache_extent(info->corrupt_blocks, 0);
7796                 if (!cache)
7797                         break;
7798                 if (!trans) {
7799                         trans = btrfs_start_transaction(info->extent_root, 1);
7800                         if (IS_ERR(trans))
7801                                 return PTR_ERR(trans);
7802                 }
7803                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7804                 prune_one_block(trans, info, corrupt);
7805                 remove_cache_extent(info->corrupt_blocks, cache);
7806         }
7807         if (trans)
7808                 return btrfs_commit_transaction(trans, info->extent_root);
7809         return 0;
7810 }
7811
7812 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7813 {
7814         struct btrfs_block_group_cache *cache;
7815         u64 start, end;
7816         int ret;
7817
7818         while (1) {
7819                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7820                                             &start, &end, EXTENT_DIRTY);
7821                 if (ret)
7822                         break;
7823                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7824                                    GFP_NOFS);
7825         }
7826
7827         start = 0;
7828         while (1) {
7829                 cache = btrfs_lookup_first_block_group(fs_info, start);
7830                 if (!cache)
7831                         break;
7832                 if (cache->cached)
7833                         cache->cached = 0;
7834                 start = cache->key.objectid + cache->key.offset;
7835         }
7836 }
7837
7838 static int check_extent_refs(struct btrfs_root *root,
7839                              struct cache_tree *extent_cache)
7840 {
7841         struct extent_record *rec;
7842         struct cache_extent *cache;
7843         int err = 0;
7844         int ret = 0;
7845         int fixed = 0;
7846         int had_dups = 0;
7847         int recorded = 0;
7848
7849         if (repair) {
7850                 /*
7851                  * if we're doing a repair, we have to make sure
7852                  * we don't allocate from the problem extents.
7853                  * In the worst case, this will be all the
7854                  * extents in the FS
7855                  */
7856                 cache = search_cache_extent(extent_cache, 0);
7857                 while(cache) {
7858                         rec = container_of(cache, struct extent_record, cache);
7859                         set_extent_dirty(root->fs_info->excluded_extents,
7860                                          rec->start,
7861                                          rec->start + rec->max_size - 1,
7862                                          GFP_NOFS);
7863                         cache = next_cache_extent(cache);
7864                 }
7865
7866                 /* pin down all the corrupted blocks too */
7867                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7868                 while(cache) {
7869                         set_extent_dirty(root->fs_info->excluded_extents,
7870                                          cache->start,
7871                                          cache->start + cache->size - 1,
7872                                          GFP_NOFS);
7873                         cache = next_cache_extent(cache);
7874                 }
7875                 prune_corrupt_blocks(root->fs_info);
7876                 reset_cached_block_groups(root->fs_info);
7877         }
7878
7879         reset_cached_block_groups(root->fs_info);
7880
7881         /*
7882          * We need to delete any duplicate entries we find first otherwise we
7883          * could mess up the extent tree when we have backrefs that actually
7884          * belong to a different extent item and not the weird duplicate one.
7885          */
7886         while (repair && !list_empty(&duplicate_extents)) {
7887                 rec = to_extent_record(duplicate_extents.next);
7888                 list_del_init(&rec->list);
7889
7890                 /* Sometimes we can find a backref before we find an actual
7891                  * extent, so we need to process it a little bit to see if there
7892                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7893                  * if this is a backref screwup.  If we need to delete stuff
7894                  * process_duplicates() will return 0, otherwise it will return
7895                  * 1 and we
7896                  */
7897                 if (process_duplicates(root, extent_cache, rec))
7898                         continue;
7899                 ret = delete_duplicate_records(root, rec);
7900                 if (ret < 0)
7901                         return ret;
7902                 /*
7903                  * delete_duplicate_records will return the number of entries
7904                  * deleted, so if it's greater than 0 then we know we actually
7905                  * did something and we need to remove.
7906                  */
7907                 if (ret)
7908                         had_dups = 1;
7909         }
7910
7911         if (had_dups)
7912                 return -EAGAIN;
7913
7914         while(1) {
7915                 int cur_err = 0;
7916
7917                 fixed = 0;
7918                 recorded = 0;
7919                 cache = search_cache_extent(extent_cache, 0);
7920                 if (!cache)
7921                         break;
7922                 rec = container_of(cache, struct extent_record, cache);
7923                 if (rec->num_duplicates) {
7924                         fprintf(stderr, "extent item %llu has multiple extent "
7925                                 "items\n", (unsigned long long)rec->start);
7926                         err = 1;
7927                         cur_err = 1;
7928                 }
7929
7930                 if (rec->refs != rec->extent_item_refs) {
7931                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7932                                 (unsigned long long)rec->start,
7933                                 (unsigned long long)rec->nr);
7934                         fprintf(stderr, "extent item %llu, found %llu\n",
7935                                 (unsigned long long)rec->extent_item_refs,
7936                                 (unsigned long long)rec->refs);
7937                         ret = record_orphan_data_extents(root->fs_info, rec);
7938                         if (ret < 0)
7939                                 goto repair_abort;
7940                         if (ret == 0) {
7941                                 recorded = 1;
7942                         } else {
7943                                 /*
7944                                  * we can't use the extent to repair file
7945                                  * extent, let the fallback method handle it.
7946                                  */
7947                                 if (!fixed && repair) {
7948                                         ret = fixup_extent_refs(
7949                                                         root->fs_info,
7950                                                         extent_cache, rec);
7951                                         if (ret)
7952                                                 goto repair_abort;
7953                                         fixed = 1;
7954                                 }
7955                         }
7956                         err = 1;
7957                         cur_err = 1;
7958                 }
7959                 if (all_backpointers_checked(rec, 1)) {
7960                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7961                                 (unsigned long long)rec->start,
7962                                 (unsigned long long)rec->nr);
7963
7964                         if (!fixed && !recorded && repair) {
7965                                 ret = fixup_extent_refs(root->fs_info,
7966                                                         extent_cache, rec);
7967                                 if (ret)
7968                                         goto repair_abort;
7969                                 fixed = 1;
7970                         }
7971                         cur_err = 1;
7972                         err = 1;
7973                 }
7974                 if (!rec->owner_ref_checked) {
7975                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7976                                 (unsigned long long)rec->start,
7977                                 (unsigned long long)rec->nr);
7978                         if (!fixed && !recorded && repair) {
7979                                 ret = fixup_extent_refs(root->fs_info,
7980                                                         extent_cache, rec);
7981                                 if (ret)
7982                                         goto repair_abort;
7983                                 fixed = 1;
7984                         }
7985                         err = 1;
7986                         cur_err = 1;
7987                 }
7988                 if (rec->bad_full_backref) {
7989                         fprintf(stderr, "bad full backref, on [%llu]\n",
7990                                 (unsigned long long)rec->start);
7991                         if (repair) {
7992                                 ret = fixup_extent_flags(root->fs_info, rec);
7993                                 if (ret)
7994                                         goto repair_abort;
7995                                 fixed = 1;
7996                         }
7997                         err = 1;
7998                         cur_err = 1;
7999                 }
8000                 /*
8001                  * Although it's not a extent ref's problem, we reuse this
8002                  * routine for error reporting.
8003                  * No repair function yet.
8004                  */
8005                 if (rec->crossing_stripes) {
8006                         fprintf(stderr,
8007                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8008                                 rec->start, rec->start + rec->max_size);
8009                         err = 1;
8010                         cur_err = 1;
8011                 }
8012
8013                 if (rec->wrong_chunk_type) {
8014                         fprintf(stderr,
8015                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8016                                 rec->start, rec->start + rec->max_size);
8017                         err = 1;
8018                         cur_err = 1;
8019                 }
8020
8021                 remove_cache_extent(extent_cache, cache);
8022                 free_all_extent_backrefs(rec);
8023                 if (!init_extent_tree && repair && (!cur_err || fixed))
8024                         clear_extent_dirty(root->fs_info->excluded_extents,
8025                                            rec->start,
8026                                            rec->start + rec->max_size - 1,
8027                                            GFP_NOFS);
8028                 free(rec);
8029         }
8030 repair_abort:
8031         if (repair) {
8032                 if (ret && ret != -EAGAIN) {
8033                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8034                         exit(1);
8035                 } else if (!ret) {
8036                         struct btrfs_trans_handle *trans;
8037
8038                         root = root->fs_info->extent_root;
8039                         trans = btrfs_start_transaction(root, 1);
8040                         if (IS_ERR(trans)) {
8041                                 ret = PTR_ERR(trans);
8042                                 goto repair_abort;
8043                         }
8044
8045                         btrfs_fix_block_accounting(trans, root);
8046                         ret = btrfs_commit_transaction(trans, root);
8047                         if (ret)
8048                                 goto repair_abort;
8049                 }
8050                 if (err)
8051                         fprintf(stderr, "repaired damaged extent references\n");
8052                 return ret;
8053         }
8054         return err;
8055 }
8056
8057 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8058 {
8059         u64 stripe_size;
8060
8061         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8062                 stripe_size = length;
8063                 stripe_size /= num_stripes;
8064         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8065                 stripe_size = length * 2;
8066                 stripe_size /= num_stripes;
8067         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8068                 stripe_size = length;
8069                 stripe_size /= (num_stripes - 1);
8070         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8071                 stripe_size = length;
8072                 stripe_size /= (num_stripes - 2);
8073         } else {
8074                 stripe_size = length;
8075         }
8076         return stripe_size;
8077 }
8078
8079 /*
8080  * Check the chunk with its block group/dev list ref:
8081  * Return 0 if all refs seems valid.
8082  * Return 1 if part of refs seems valid, need later check for rebuild ref
8083  * like missing block group and needs to search extent tree to rebuild them.
8084  * Return -1 if essential refs are missing and unable to rebuild.
8085  */
8086 static int check_chunk_refs(struct chunk_record *chunk_rec,
8087                             struct block_group_tree *block_group_cache,
8088                             struct device_extent_tree *dev_extent_cache,
8089                             int silent)
8090 {
8091         struct cache_extent *block_group_item;
8092         struct block_group_record *block_group_rec;
8093         struct cache_extent *dev_extent_item;
8094         struct device_extent_record *dev_extent_rec;
8095         u64 devid;
8096         u64 offset;
8097         u64 length;
8098         int metadump_v2 = 0;
8099         int i;
8100         int ret = 0;
8101
8102         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8103                                                chunk_rec->offset,
8104                                                chunk_rec->length);
8105         if (block_group_item) {
8106                 block_group_rec = container_of(block_group_item,
8107                                                struct block_group_record,
8108                                                cache);
8109                 if (chunk_rec->length != block_group_rec->offset ||
8110                     chunk_rec->offset != block_group_rec->objectid ||
8111                     (!metadump_v2 &&
8112                      chunk_rec->type_flags != block_group_rec->flags)) {
8113                         if (!silent)
8114                                 fprintf(stderr,
8115                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8116                                         chunk_rec->objectid,
8117                                         chunk_rec->type,
8118                                         chunk_rec->offset,
8119                                         chunk_rec->length,
8120                                         chunk_rec->offset,
8121                                         chunk_rec->type_flags,
8122                                         block_group_rec->objectid,
8123                                         block_group_rec->type,
8124                                         block_group_rec->offset,
8125                                         block_group_rec->offset,
8126                                         block_group_rec->objectid,
8127                                         block_group_rec->flags);
8128                         ret = -1;
8129                 } else {
8130                         list_del_init(&block_group_rec->list);
8131                         chunk_rec->bg_rec = block_group_rec;
8132                 }
8133         } else {
8134                 if (!silent)
8135                         fprintf(stderr,
8136                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8137                                 chunk_rec->objectid,
8138                                 chunk_rec->type,
8139                                 chunk_rec->offset,
8140                                 chunk_rec->length,
8141                                 chunk_rec->offset,
8142                                 chunk_rec->type_flags);
8143                 ret = 1;
8144         }
8145
8146         if (metadump_v2)
8147                 return ret;
8148
8149         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8150                                     chunk_rec->num_stripes);
8151         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8152                 devid = chunk_rec->stripes[i].devid;
8153                 offset = chunk_rec->stripes[i].offset;
8154                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8155                                                        devid, offset, length);
8156                 if (dev_extent_item) {
8157                         dev_extent_rec = container_of(dev_extent_item,
8158                                                 struct device_extent_record,
8159                                                 cache);
8160                         if (dev_extent_rec->objectid != devid ||
8161                             dev_extent_rec->offset != offset ||
8162                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8163                             dev_extent_rec->length != length) {
8164                                 if (!silent)
8165                                         fprintf(stderr,
8166                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8167                                                 chunk_rec->objectid,
8168                                                 chunk_rec->type,
8169                                                 chunk_rec->offset,
8170                                                 chunk_rec->stripes[i].devid,
8171                                                 chunk_rec->stripes[i].offset,
8172                                                 dev_extent_rec->objectid,
8173                                                 dev_extent_rec->offset,
8174                                                 dev_extent_rec->length);
8175                                 ret = -1;
8176                         } else {
8177                                 list_move(&dev_extent_rec->chunk_list,
8178                                           &chunk_rec->dextents);
8179                         }
8180                 } else {
8181                         if (!silent)
8182                                 fprintf(stderr,
8183                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8184                                         chunk_rec->objectid,
8185                                         chunk_rec->type,
8186                                         chunk_rec->offset,
8187                                         chunk_rec->stripes[i].devid,
8188                                         chunk_rec->stripes[i].offset);
8189                         ret = -1;
8190                 }
8191         }
8192         return ret;
8193 }
8194
8195 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8196 int check_chunks(struct cache_tree *chunk_cache,
8197                  struct block_group_tree *block_group_cache,
8198                  struct device_extent_tree *dev_extent_cache,
8199                  struct list_head *good, struct list_head *bad,
8200                  struct list_head *rebuild, int silent)
8201 {
8202         struct cache_extent *chunk_item;
8203         struct chunk_record *chunk_rec;
8204         struct block_group_record *bg_rec;
8205         struct device_extent_record *dext_rec;
8206         int err;
8207         int ret = 0;
8208
8209         chunk_item = first_cache_extent(chunk_cache);
8210         while (chunk_item) {
8211                 chunk_rec = container_of(chunk_item, struct chunk_record,
8212                                          cache);
8213                 err = check_chunk_refs(chunk_rec, block_group_cache,
8214                                        dev_extent_cache, silent);
8215                 if (err < 0)
8216                         ret = err;
8217                 if (err == 0 && good)
8218                         list_add_tail(&chunk_rec->list, good);
8219                 if (err > 0 && rebuild)
8220                         list_add_tail(&chunk_rec->list, rebuild);
8221                 if (err < 0 && bad)
8222                         list_add_tail(&chunk_rec->list, bad);
8223                 chunk_item = next_cache_extent(chunk_item);
8224         }
8225
8226         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8227                 if (!silent)
8228                         fprintf(stderr,
8229                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8230                                 bg_rec->objectid,
8231                                 bg_rec->offset,
8232                                 bg_rec->flags);
8233                 if (!ret)
8234                         ret = 1;
8235         }
8236
8237         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8238                             chunk_list) {
8239                 if (!silent)
8240                         fprintf(stderr,
8241                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8242                                 dext_rec->objectid,
8243                                 dext_rec->offset,
8244                                 dext_rec->length);
8245                 if (!ret)
8246                         ret = 1;
8247         }
8248         return ret;
8249 }
8250
8251
8252 static int check_device_used(struct device_record *dev_rec,
8253                              struct device_extent_tree *dext_cache)
8254 {
8255         struct cache_extent *cache;
8256         struct device_extent_record *dev_extent_rec;
8257         u64 total_byte = 0;
8258
8259         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8260         while (cache) {
8261                 dev_extent_rec = container_of(cache,
8262                                               struct device_extent_record,
8263                                               cache);
8264                 if (dev_extent_rec->objectid != dev_rec->devid)
8265                         break;
8266
8267                 list_del_init(&dev_extent_rec->device_list);
8268                 total_byte += dev_extent_rec->length;
8269                 cache = next_cache_extent(cache);
8270         }
8271
8272         if (total_byte != dev_rec->byte_used) {
8273                 fprintf(stderr,
8274                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8275                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8276                         dev_rec->type, dev_rec->offset);
8277                 return -1;
8278         } else {
8279                 return 0;
8280         }
8281 }
8282
8283 /* check btrfs_dev_item -> btrfs_dev_extent */
8284 static int check_devices(struct rb_root *dev_cache,
8285                          struct device_extent_tree *dev_extent_cache)
8286 {
8287         struct rb_node *dev_node;
8288         struct device_record *dev_rec;
8289         struct device_extent_record *dext_rec;
8290         int err;
8291         int ret = 0;
8292
8293         dev_node = rb_first(dev_cache);
8294         while (dev_node) {
8295                 dev_rec = container_of(dev_node, struct device_record, node);
8296                 err = check_device_used(dev_rec, dev_extent_cache);
8297                 if (err)
8298                         ret = err;
8299
8300                 dev_node = rb_next(dev_node);
8301         }
8302         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8303                             device_list) {
8304                 fprintf(stderr,
8305                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8306                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8307                 if (!ret)
8308                         ret = 1;
8309         }
8310         return ret;
8311 }
8312
8313 static int add_root_item_to_list(struct list_head *head,
8314                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8315                                   u8 level, u8 drop_level,
8316                                   int level_size, struct btrfs_key *drop_key)
8317 {
8318
8319         struct root_item_record *ri_rec;
8320         ri_rec = malloc(sizeof(*ri_rec));
8321         if (!ri_rec)
8322                 return -ENOMEM;
8323         ri_rec->bytenr = bytenr;
8324         ri_rec->objectid = objectid;
8325         ri_rec->level = level;
8326         ri_rec->level_size = level_size;
8327         ri_rec->drop_level = drop_level;
8328         ri_rec->last_snapshot = last_snapshot;
8329         if (drop_key)
8330                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8331         list_add_tail(&ri_rec->list, head);
8332
8333         return 0;
8334 }
8335
8336 static void free_root_item_list(struct list_head *list)
8337 {
8338         struct root_item_record *ri_rec;
8339
8340         while (!list_empty(list)) {
8341                 ri_rec = list_first_entry(list, struct root_item_record,
8342                                           list);
8343                 list_del_init(&ri_rec->list);
8344                 free(ri_rec);
8345         }
8346 }
8347
8348 static int deal_root_from_list(struct list_head *list,
8349                                struct btrfs_root *root,
8350                                struct block_info *bits,
8351                                int bits_nr,
8352                                struct cache_tree *pending,
8353                                struct cache_tree *seen,
8354                                struct cache_tree *reada,
8355                                struct cache_tree *nodes,
8356                                struct cache_tree *extent_cache,
8357                                struct cache_tree *chunk_cache,
8358                                struct rb_root *dev_cache,
8359                                struct block_group_tree *block_group_cache,
8360                                struct device_extent_tree *dev_extent_cache)
8361 {
8362         int ret = 0;
8363         u64 last;
8364
8365         while (!list_empty(list)) {
8366                 struct root_item_record *rec;
8367                 struct extent_buffer *buf;
8368                 rec = list_entry(list->next,
8369                                  struct root_item_record, list);
8370                 last = 0;
8371                 buf = read_tree_block(root->fs_info->tree_root,
8372                                       rec->bytenr, rec->level_size, 0);
8373                 if (!extent_buffer_uptodate(buf)) {
8374                         free_extent_buffer(buf);
8375                         ret = -EIO;
8376                         break;
8377                 }
8378                 ret = add_root_to_pending(buf, extent_cache, pending,
8379                                     seen, nodes, rec->objectid);
8380                 if (ret < 0)
8381                         break;
8382                 /*
8383                  * To rebuild extent tree, we need deal with snapshot
8384                  * one by one, otherwise we deal with node firstly which
8385                  * can maximize readahead.
8386                  */
8387                 while (1) {
8388                         ret = run_next_block(root, bits, bits_nr, &last,
8389                                              pending, seen, reada, nodes,
8390                                              extent_cache, chunk_cache,
8391                                              dev_cache, block_group_cache,
8392                                              dev_extent_cache, rec);
8393                         if (ret != 0)
8394                                 break;
8395                 }
8396                 free_extent_buffer(buf);
8397                 list_del(&rec->list);
8398                 free(rec);
8399                 if (ret < 0)
8400                         break;
8401         }
8402         while (ret >= 0) {
8403                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8404                                      reada, nodes, extent_cache, chunk_cache,
8405                                      dev_cache, block_group_cache,
8406                                      dev_extent_cache, NULL);
8407                 if (ret != 0) {
8408                         if (ret > 0)
8409                                 ret = 0;
8410                         break;
8411                 }
8412         }
8413         return ret;
8414 }
8415
8416 static int check_chunks_and_extents(struct btrfs_root *root)
8417 {
8418         struct rb_root dev_cache;
8419         struct cache_tree chunk_cache;
8420         struct block_group_tree block_group_cache;
8421         struct device_extent_tree dev_extent_cache;
8422         struct cache_tree extent_cache;
8423         struct cache_tree seen;
8424         struct cache_tree pending;
8425         struct cache_tree reada;
8426         struct cache_tree nodes;
8427         struct extent_io_tree excluded_extents;
8428         struct cache_tree corrupt_blocks;
8429         struct btrfs_path path;
8430         struct btrfs_key key;
8431         struct btrfs_key found_key;
8432         int ret, err = 0;
8433         struct block_info *bits;
8434         int bits_nr;
8435         struct extent_buffer *leaf;
8436         int slot;
8437         struct btrfs_root_item ri;
8438         struct list_head dropping_trees;
8439         struct list_head normal_trees;
8440         struct btrfs_root *root1;
8441         u64 objectid;
8442         u32 level_size;
8443         u8 level;
8444
8445         dev_cache = RB_ROOT;
8446         cache_tree_init(&chunk_cache);
8447         block_group_tree_init(&block_group_cache);
8448         device_extent_tree_init(&dev_extent_cache);
8449
8450         cache_tree_init(&extent_cache);
8451         cache_tree_init(&seen);
8452         cache_tree_init(&pending);
8453         cache_tree_init(&nodes);
8454         cache_tree_init(&reada);
8455         cache_tree_init(&corrupt_blocks);
8456         extent_io_tree_init(&excluded_extents);
8457         INIT_LIST_HEAD(&dropping_trees);
8458         INIT_LIST_HEAD(&normal_trees);
8459
8460         if (repair) {
8461                 root->fs_info->excluded_extents = &excluded_extents;
8462                 root->fs_info->fsck_extent_cache = &extent_cache;
8463                 root->fs_info->free_extent_hook = free_extent_hook;
8464                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8465         }
8466
8467         bits_nr = 1024;
8468         bits = malloc(bits_nr * sizeof(struct block_info));
8469         if (!bits) {
8470                 perror("malloc");
8471                 exit(1);
8472         }
8473
8474         if (ctx.progress_enabled) {
8475                 ctx.tp = TASK_EXTENTS;
8476                 task_start(ctx.info);
8477         }
8478
8479 again:
8480         root1 = root->fs_info->tree_root;
8481         level = btrfs_header_level(root1->node);
8482         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8483                                     root1->node->start, 0, level, 0,
8484                                     root1->nodesize, NULL);
8485         if (ret < 0)
8486                 goto out;
8487         root1 = root->fs_info->chunk_root;
8488         level = btrfs_header_level(root1->node);
8489         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8490                                     root1->node->start, 0, level, 0,
8491                                     root1->nodesize, NULL);
8492         if (ret < 0)
8493                 goto out;
8494         btrfs_init_path(&path);
8495         key.offset = 0;
8496         key.objectid = 0;
8497         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8498         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8499                                         &key, &path, 0, 0);
8500         if (ret < 0)
8501                 goto out;
8502         while(1) {
8503                 leaf = path.nodes[0];
8504                 slot = path.slots[0];
8505                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8506                         ret = btrfs_next_leaf(root, &path);
8507                         if (ret != 0)
8508                                 break;
8509                         leaf = path.nodes[0];
8510                         slot = path.slots[0];
8511                 }
8512                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8513                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8514                         unsigned long offset;
8515                         u64 last_snapshot;
8516
8517                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8518                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8519                         last_snapshot = btrfs_root_last_snapshot(&ri);
8520                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8521                                 level = btrfs_root_level(&ri);
8522                                 level_size = root->nodesize;
8523                                 ret = add_root_item_to_list(&normal_trees,
8524                                                 found_key.objectid,
8525                                                 btrfs_root_bytenr(&ri),
8526                                                 last_snapshot, level,
8527                                                 0, level_size, NULL);
8528                                 if (ret < 0)
8529                                         goto out;
8530                         } else {
8531                                 level = btrfs_root_level(&ri);
8532                                 level_size = root->nodesize;
8533                                 objectid = found_key.objectid;
8534                                 btrfs_disk_key_to_cpu(&found_key,
8535                                                       &ri.drop_progress);
8536                                 ret = add_root_item_to_list(&dropping_trees,
8537                                                 objectid,
8538                                                 btrfs_root_bytenr(&ri),
8539                                                 last_snapshot, level,
8540                                                 ri.drop_level,
8541                                                 level_size, &found_key);
8542                                 if (ret < 0)
8543                                         goto out;
8544                         }
8545                 }
8546                 path.slots[0]++;
8547         }
8548         btrfs_release_path(&path);
8549
8550         /*
8551          * check_block can return -EAGAIN if it fixes something, please keep
8552          * this in mind when dealing with return values from these functions, if
8553          * we get -EAGAIN we want to fall through and restart the loop.
8554          */
8555         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8556                                   &seen, &reada, &nodes, &extent_cache,
8557                                   &chunk_cache, &dev_cache, &block_group_cache,
8558                                   &dev_extent_cache);
8559         if (ret < 0) {
8560                 if (ret == -EAGAIN)
8561                         goto loop;
8562                 goto out;
8563         }
8564         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8565                                   &pending, &seen, &reada, &nodes,
8566                                   &extent_cache, &chunk_cache, &dev_cache,
8567                                   &block_group_cache, &dev_extent_cache);
8568         if (ret < 0) {
8569                 if (ret == -EAGAIN)
8570                         goto loop;
8571                 goto out;
8572         }
8573
8574         ret = check_chunks(&chunk_cache, &block_group_cache,
8575                            &dev_extent_cache, NULL, NULL, NULL, 0);
8576         if (ret) {
8577                 if (ret == -EAGAIN)
8578                         goto loop;
8579                 err = ret;
8580         }
8581
8582         ret = check_extent_refs(root, &extent_cache);
8583         if (ret < 0) {
8584                 if (ret == -EAGAIN)
8585                         goto loop;
8586                 goto out;
8587         }
8588
8589         ret = check_devices(&dev_cache, &dev_extent_cache);
8590         if (ret && err)
8591                 ret = err;
8592
8593 out:
8594         task_stop(ctx.info);
8595         if (repair) {
8596                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8597                 extent_io_tree_cleanup(&excluded_extents);
8598                 root->fs_info->fsck_extent_cache = NULL;
8599                 root->fs_info->free_extent_hook = NULL;
8600                 root->fs_info->corrupt_blocks = NULL;
8601                 root->fs_info->excluded_extents = NULL;
8602         }
8603         free(bits);
8604         free_chunk_cache_tree(&chunk_cache);
8605         free_device_cache_tree(&dev_cache);
8606         free_block_group_tree(&block_group_cache);
8607         free_device_extent_tree(&dev_extent_cache);
8608         free_extent_cache_tree(&seen);
8609         free_extent_cache_tree(&pending);
8610         free_extent_cache_tree(&reada);
8611         free_extent_cache_tree(&nodes);
8612         return ret;
8613 loop:
8614         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8615         free_extent_cache_tree(&seen);
8616         free_extent_cache_tree(&pending);
8617         free_extent_cache_tree(&reada);
8618         free_extent_cache_tree(&nodes);
8619         free_chunk_cache_tree(&chunk_cache);
8620         free_block_group_tree(&block_group_cache);
8621         free_device_cache_tree(&dev_cache);
8622         free_device_extent_tree(&dev_extent_cache);
8623         free_extent_record_cache(root->fs_info, &extent_cache);
8624         free_root_item_list(&normal_trees);
8625         free_root_item_list(&dropping_trees);
8626         extent_io_tree_cleanup(&excluded_extents);
8627         goto again;
8628 }
8629
8630 /*
8631  * Check backrefs of a tree block given by @bytenr or @eb.
8632  *
8633  * @root:       the root containing the @bytenr or @eb
8634  * @eb:         tree block extent buffer, can be NULL
8635  * @bytenr:     bytenr of the tree block to search
8636  * @level:      tree level of the tree block
8637  * @owner:      owner of the tree block
8638  *
8639  * Return >0 for any error found and output error message
8640  * Return 0 for no error found
8641  */
8642 static int check_tree_block_ref(struct btrfs_root *root,
8643                                 struct extent_buffer *eb, u64 bytenr,
8644                                 int level, u64 owner)
8645 {
8646         struct btrfs_key key;
8647         struct btrfs_root *extent_root = root->fs_info->extent_root;
8648         struct btrfs_path path;
8649         struct btrfs_extent_item *ei;
8650         struct btrfs_extent_inline_ref *iref;
8651         struct extent_buffer *leaf;
8652         unsigned long end;
8653         unsigned long ptr;
8654         int slot;
8655         int skinny_level;
8656         int type;
8657         u32 nodesize = root->nodesize;
8658         u32 item_size;
8659         u64 offset;
8660         int found_ref = 0;
8661         int err = 0;
8662         int ret;
8663
8664         btrfs_init_path(&path);
8665         key.objectid = bytenr;
8666         if (btrfs_fs_incompat(root->fs_info,
8667                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8668                 key.type = BTRFS_METADATA_ITEM_KEY;
8669         else
8670                 key.type = BTRFS_EXTENT_ITEM_KEY;
8671         key.offset = (u64)-1;
8672
8673         /* Search for the backref in extent tree */
8674         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8675         if (ret < 0) {
8676                 err |= BACKREF_MISSING;
8677                 goto out;
8678         }
8679         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8680         if (ret) {
8681                 err |= BACKREF_MISSING;
8682                 goto out;
8683         }
8684
8685         leaf = path.nodes[0];
8686         slot = path.slots[0];
8687         btrfs_item_key_to_cpu(leaf, &key, slot);
8688
8689         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8690
8691         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8692                 skinny_level = (int)key.offset;
8693                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8694         } else {
8695                 struct btrfs_tree_block_info *info;
8696
8697                 info = (struct btrfs_tree_block_info *)(ei + 1);
8698                 skinny_level = btrfs_tree_block_level(leaf, info);
8699                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8700         }
8701
8702         if (eb) {
8703                 u64 header_gen;
8704                 u64 extent_gen;
8705
8706                 if (!(btrfs_extent_flags(leaf, ei) &
8707                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8708                         error(
8709                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8710                                 key.objectid, nodesize,
8711                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8712                         err = BACKREF_MISMATCH;
8713                 }
8714                 header_gen = btrfs_header_generation(eb);
8715                 extent_gen = btrfs_extent_generation(leaf, ei);
8716                 if (header_gen != extent_gen) {
8717                         error(
8718         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8719                                 key.objectid, nodesize, header_gen,
8720                                 extent_gen);
8721                         err = BACKREF_MISMATCH;
8722                 }
8723                 if (level != skinny_level) {
8724                         error(
8725                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8726                                 key.objectid, nodesize, level, skinny_level);
8727                         err = BACKREF_MISMATCH;
8728                 }
8729                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8730                         error(
8731                         "extent[%llu %u] is referred by other roots than %llu",
8732                                 key.objectid, nodesize, root->objectid);
8733                         err = BACKREF_MISMATCH;
8734                 }
8735         }
8736
8737         /*
8738          * Iterate the extent/metadata item to find the exact backref
8739          */
8740         item_size = btrfs_item_size_nr(leaf, slot);
8741         ptr = (unsigned long)iref;
8742         end = (unsigned long)ei + item_size;
8743         while (ptr < end) {
8744                 iref = (struct btrfs_extent_inline_ref *)ptr;
8745                 type = btrfs_extent_inline_ref_type(leaf, iref);
8746                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8747
8748                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8749                         (offset == root->objectid || offset == owner)) {
8750                         found_ref = 1;
8751                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8752                         /* Check if the backref points to valid referencer */
8753                         found_ref = !check_tree_block_ref(root, NULL, offset,
8754                                                           level + 1, owner);
8755                 }
8756
8757                 if (found_ref)
8758                         break;
8759                 ptr += btrfs_extent_inline_ref_size(type);
8760         }
8761
8762         /*
8763          * Inlined extent item doesn't have what we need, check
8764          * TREE_BLOCK_REF_KEY
8765          */
8766         if (!found_ref) {
8767                 btrfs_release_path(&path);
8768                 key.objectid = bytenr;
8769                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8770                 key.offset = root->objectid;
8771
8772                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8773                 if (!ret)
8774                         found_ref = 1;
8775         }
8776         if (!found_ref)
8777                 err |= BACKREF_MISSING;
8778 out:
8779         btrfs_release_path(&path);
8780         if (eb && (err & BACKREF_MISSING))
8781                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8782                         bytenr, nodesize, owner, level);
8783         return err;
8784 }
8785
8786 /*
8787  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8788  *
8789  * Return >0 any error found and output error message
8790  * Return 0 for no error found
8791  */
8792 static int check_extent_data_item(struct btrfs_root *root,
8793                                   struct extent_buffer *eb, int slot)
8794 {
8795         struct btrfs_file_extent_item *fi;
8796         struct btrfs_path path;
8797         struct btrfs_root *extent_root = root->fs_info->extent_root;
8798         struct btrfs_key fi_key;
8799         struct btrfs_key dbref_key;
8800         struct extent_buffer *leaf;
8801         struct btrfs_extent_item *ei;
8802         struct btrfs_extent_inline_ref *iref;
8803         struct btrfs_extent_data_ref *dref;
8804         u64 owner;
8805         u64 file_extent_gen;
8806         u64 disk_bytenr;
8807         u64 disk_num_bytes;
8808         u64 extent_num_bytes;
8809         u64 extent_flags;
8810         u64 extent_gen;
8811         u32 item_size;
8812         unsigned long end;
8813         unsigned long ptr;
8814         int type;
8815         u64 ref_root;
8816         int found_dbackref = 0;
8817         int err = 0;
8818         int ret;
8819
8820         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8821         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8822         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8823
8824         /* Nothing to check for hole and inline data extents */
8825         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8826             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8827                 return 0;
8828
8829         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8830         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8831         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8832
8833         /* Check unaligned disk_num_bytes and num_bytes */
8834         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8835                 error(
8836 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8837                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8838                         root->sectorsize);
8839                 err |= BYTES_UNALIGNED;
8840         } else {
8841                 data_bytes_allocated += disk_num_bytes;
8842         }
8843         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8844                 error(
8845 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8846                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8847                         root->sectorsize);
8848                 err |= BYTES_UNALIGNED;
8849         } else {
8850                 data_bytes_referenced += extent_num_bytes;
8851         }
8852         owner = btrfs_header_owner(eb);
8853
8854         /* Check the extent item of the file extent in extent tree */
8855         btrfs_init_path(&path);
8856         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8857         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8858         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8859
8860         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8861         if (ret) {
8862                 err |= BACKREF_MISSING;
8863                 goto error;
8864         }
8865
8866         leaf = path.nodes[0];
8867         slot = path.slots[0];
8868         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8869
8870         extent_flags = btrfs_extent_flags(leaf, ei);
8871         extent_gen = btrfs_extent_generation(leaf, ei);
8872
8873         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8874                 error(
8875                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8876                     disk_bytenr, disk_num_bytes,
8877                     BTRFS_EXTENT_FLAG_DATA);
8878                 err |= BACKREF_MISMATCH;
8879         }
8880
8881         if (file_extent_gen < extent_gen) {
8882                 error(
8883 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8884                         disk_bytenr, disk_num_bytes, file_extent_gen,
8885                         extent_gen);
8886                 err |= BACKREF_MISMATCH;
8887         }
8888
8889         /* Check data backref inside that extent item */
8890         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8891         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8892         ptr = (unsigned long)iref;
8893         end = (unsigned long)ei + item_size;
8894         while (ptr < end) {
8895                 iref = (struct btrfs_extent_inline_ref *)ptr;
8896                 type = btrfs_extent_inline_ref_type(leaf, iref);
8897                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8898
8899                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8900                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8901                         if (ref_root == owner || ref_root == root->objectid)
8902                                 found_dbackref = 1;
8903                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8904                         found_dbackref = !check_tree_block_ref(root, NULL,
8905                                 btrfs_extent_inline_ref_offset(leaf, iref),
8906                                 0, owner);
8907                 }
8908
8909                 if (found_dbackref)
8910                         break;
8911                 ptr += btrfs_extent_inline_ref_size(type);
8912         }
8913
8914         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8915         if (!found_dbackref) {
8916                 btrfs_release_path(&path);
8917
8918                 btrfs_init_path(&path);
8919                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8920                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8921                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8922                                 fi_key.objectid, fi_key.offset);
8923
8924                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8925                                         &dbref_key, &path, 0, 0);
8926                 if (!ret)
8927                         found_dbackref = 1;
8928         }
8929
8930         if (!found_dbackref)
8931                 err |= BACKREF_MISSING;
8932 error:
8933         btrfs_release_path(&path);
8934         if (err & BACKREF_MISSING) {
8935                 error("data extent[%llu %llu] backref lost",
8936                       disk_bytenr, disk_num_bytes);
8937         }
8938         return err;
8939 }
8940
8941 /*
8942  * Get real tree block level for the case like shared block
8943  * Return >= 0 as tree level
8944  * Return <0 for error
8945  */
8946 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8947 {
8948         struct extent_buffer *eb;
8949         struct btrfs_path path;
8950         struct btrfs_key key;
8951         struct btrfs_extent_item *ei;
8952         u64 flags;
8953         u64 transid;
8954         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8955         u8 backref_level;
8956         u8 header_level;
8957         int ret;
8958
8959         /* Search extent tree for extent generation and level */
8960         key.objectid = bytenr;
8961         key.type = BTRFS_METADATA_ITEM_KEY;
8962         key.offset = (u64)-1;
8963
8964         btrfs_init_path(&path);
8965         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8966         if (ret < 0)
8967                 goto release_out;
8968         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8969         if (ret < 0)
8970                 goto release_out;
8971         if (ret > 0) {
8972                 ret = -ENOENT;
8973                 goto release_out;
8974         }
8975
8976         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8977         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8978                             struct btrfs_extent_item);
8979         flags = btrfs_extent_flags(path.nodes[0], ei);
8980         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8981                 ret = -ENOENT;
8982                 goto release_out;
8983         }
8984
8985         /* Get transid for later read_tree_block() check */
8986         transid = btrfs_extent_generation(path.nodes[0], ei);
8987
8988         /* Get backref level as one source */
8989         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8990                 backref_level = key.offset;
8991         } else {
8992                 struct btrfs_tree_block_info *info;
8993
8994                 info = (struct btrfs_tree_block_info *)(ei + 1);
8995                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8996         }
8997         btrfs_release_path(&path);
8998
8999         /* Get level from tree block as an alternative source */
9000         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9001         if (!extent_buffer_uptodate(eb)) {
9002                 free_extent_buffer(eb);
9003                 return -EIO;
9004         }
9005         header_level = btrfs_header_level(eb);
9006         free_extent_buffer(eb);
9007
9008         if (header_level != backref_level)
9009                 return -EIO;
9010         return header_level;
9011
9012 release_out:
9013         btrfs_release_path(&path);
9014         return ret;
9015 }
9016
9017 /*
9018  * Check if a tree block backref is valid (points to a valid tree block)
9019  * if level == -1, level will be resolved
9020  * Return >0 for any error found and print error message
9021  */
9022 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9023                                     u64 bytenr, int level)
9024 {
9025         struct btrfs_root *root;
9026         struct btrfs_key key;
9027         struct btrfs_path path;
9028         struct extent_buffer *eb;
9029         struct extent_buffer *node;
9030         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9031         int err = 0;
9032         int ret;
9033
9034         /* Query level for level == -1 special case */
9035         if (level == -1)
9036                 level = query_tree_block_level(fs_info, bytenr);
9037         if (level < 0) {
9038                 err |= REFERENCER_MISSING;
9039                 goto out;
9040         }
9041
9042         key.objectid = root_id;
9043         key.type = BTRFS_ROOT_ITEM_KEY;
9044         key.offset = (u64)-1;
9045
9046         root = btrfs_read_fs_root(fs_info, &key);
9047         if (IS_ERR(root)) {
9048                 err |= REFERENCER_MISSING;
9049                 goto out;
9050         }
9051
9052         /* Read out the tree block to get item/node key */
9053         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9054         if (!extent_buffer_uptodate(eb)) {
9055                 err |= REFERENCER_MISSING;
9056                 free_extent_buffer(eb);
9057                 goto out;
9058         }
9059
9060         /* Empty tree, no need to check key */
9061         if (!btrfs_header_nritems(eb) && !level) {
9062                 free_extent_buffer(eb);
9063                 goto out;
9064         }
9065
9066         if (level)
9067                 btrfs_node_key_to_cpu(eb, &key, 0);
9068         else
9069                 btrfs_item_key_to_cpu(eb, &key, 0);
9070
9071         free_extent_buffer(eb);
9072
9073         btrfs_init_path(&path);
9074         path.lowest_level = level;
9075         /* Search with the first key, to ensure we can reach it */
9076         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9077         if (ret < 0) {
9078                 err |= REFERENCER_MISSING;
9079                 goto release_out;
9080         }
9081
9082         node = path.nodes[level];
9083         if (btrfs_header_bytenr(node) != bytenr) {
9084                 error(
9085         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9086                         bytenr, nodesize, bytenr,
9087                         btrfs_header_bytenr(node));
9088                 err |= REFERENCER_MISMATCH;
9089         }
9090         if (btrfs_header_level(node) != level) {
9091                 error(
9092         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9093                         bytenr, nodesize, level,
9094                         btrfs_header_level(node));
9095                 err |= REFERENCER_MISMATCH;
9096         }
9097
9098 release_out:
9099         btrfs_release_path(&path);
9100 out:
9101         if (err & REFERENCER_MISSING) {
9102                 if (level < 0)
9103                         error("extent [%llu %d] lost referencer (owner: %llu)",
9104                                 bytenr, nodesize, root_id);
9105                 else
9106                         error(
9107                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9108                                 bytenr, nodesize, root_id, level);
9109         }
9110
9111         return err;
9112 }
9113
9114 /*
9115  * Check referencer for shared block backref
9116  * If level == -1, this function will resolve the level.
9117  */
9118 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9119                                      u64 parent, u64 bytenr, int level)
9120 {
9121         struct extent_buffer *eb;
9122         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9123         u32 nr;
9124         int found_parent = 0;
9125         int i;
9126
9127         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9128         if (!extent_buffer_uptodate(eb))
9129                 goto out;
9130
9131         if (level == -1)
9132                 level = query_tree_block_level(fs_info, bytenr);
9133         if (level < 0)
9134                 goto out;
9135
9136         if (level + 1 != btrfs_header_level(eb))
9137                 goto out;
9138
9139         nr = btrfs_header_nritems(eb);
9140         for (i = 0; i < nr; i++) {
9141                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9142                         found_parent = 1;
9143                         break;
9144                 }
9145         }
9146 out:
9147         free_extent_buffer(eb);
9148         if (!found_parent) {
9149                 error(
9150         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9151                         bytenr, nodesize, parent, level);
9152                 return REFERENCER_MISSING;
9153         }
9154         return 0;
9155 }
9156
9157 /*
9158  * Check referencer for normal (inlined) data ref
9159  * If len == 0, it will be resolved by searching in extent tree
9160  */
9161 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9162                                      u64 root_id, u64 objectid, u64 offset,
9163                                      u64 bytenr, u64 len, u32 count)
9164 {
9165         struct btrfs_root *root;
9166         struct btrfs_root *extent_root = fs_info->extent_root;
9167         struct btrfs_key key;
9168         struct btrfs_path path;
9169         struct extent_buffer *leaf;
9170         struct btrfs_file_extent_item *fi;
9171         u32 found_count = 0;
9172         int slot;
9173         int ret = 0;
9174
9175         if (!len) {
9176                 key.objectid = bytenr;
9177                 key.type = BTRFS_EXTENT_ITEM_KEY;
9178                 key.offset = (u64)-1;
9179
9180                 btrfs_init_path(&path);
9181                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9182                 if (ret < 0)
9183                         goto out;
9184                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9185                 if (ret)
9186                         goto out;
9187                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9188                 if (key.objectid != bytenr ||
9189                     key.type != BTRFS_EXTENT_ITEM_KEY)
9190                         goto out;
9191                 len = key.offset;
9192                 btrfs_release_path(&path);
9193         }
9194         key.objectid = root_id;
9195         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9196         key.offset = (u64)-1;
9197         btrfs_init_path(&path);
9198
9199         root = btrfs_read_fs_root(fs_info, &key);
9200         if (IS_ERR(root))
9201                 goto out;
9202
9203         key.objectid = objectid;
9204         key.type = BTRFS_EXTENT_DATA_KEY;
9205         /*
9206          * It can be nasty as data backref offset is
9207          * file offset - file extent offset, which is smaller or
9208          * equal to original backref offset.  The only special case is
9209          * overflow.  So we need to special check and do further search.
9210          */
9211         key.offset = offset & (1ULL << 63) ? 0 : offset;
9212
9213         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9214         if (ret < 0)
9215                 goto out;
9216
9217         /*
9218          * Search afterwards to get correct one
9219          * NOTE: As we must do a comprehensive check on the data backref to
9220          * make sure the dref count also matches, we must iterate all file
9221          * extents for that inode.
9222          */
9223         while (1) {
9224                 leaf = path.nodes[0];
9225                 slot = path.slots[0];
9226
9227                 btrfs_item_key_to_cpu(leaf, &key, slot);
9228                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9229                         break;
9230                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9231                 /*
9232                  * Except normal disk bytenr and disk num bytes, we still
9233                  * need to do extra check on dbackref offset as
9234                  * dbackref offset = file_offset - file_extent_offset
9235                  */
9236                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9237                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9238                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9239                     offset)
9240                         found_count++;
9241
9242                 ret = btrfs_next_item(root, &path);
9243                 if (ret)
9244                         break;
9245         }
9246 out:
9247         btrfs_release_path(&path);
9248         if (found_count != count) {
9249                 error(
9250 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9251                         bytenr, len, root_id, objectid, offset, count, found_count);
9252                 return REFERENCER_MISSING;
9253         }
9254         return 0;
9255 }
9256
9257 /*
9258  * Check if the referencer of a shared data backref exists
9259  */
9260 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9261                                      u64 parent, u64 bytenr)
9262 {
9263         struct extent_buffer *eb;
9264         struct btrfs_key key;
9265         struct btrfs_file_extent_item *fi;
9266         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9267         u32 nr;
9268         int found_parent = 0;
9269         int i;
9270
9271         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9272         if (!extent_buffer_uptodate(eb))
9273                 goto out;
9274
9275         nr = btrfs_header_nritems(eb);
9276         for (i = 0; i < nr; i++) {
9277                 btrfs_item_key_to_cpu(eb, &key, i);
9278                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9279                         continue;
9280
9281                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9282                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9283                         continue;
9284
9285                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9286                         found_parent = 1;
9287                         break;
9288                 }
9289         }
9290
9291 out:
9292         free_extent_buffer(eb);
9293         if (!found_parent) {
9294                 error("shared extent %llu referencer lost (parent: %llu)",
9295                         bytenr, parent);
9296                 return REFERENCER_MISSING;
9297         }
9298         return 0;
9299 }
9300
9301 /*
9302  * This function will check a given extent item, including its backref and
9303  * itself (like crossing stripe boundary and type)
9304  *
9305  * Since we don't use extent_record anymore, introduce new error bit
9306  */
9307 static int check_extent_item(struct btrfs_fs_info *fs_info,
9308                              struct extent_buffer *eb, int slot)
9309 {
9310         struct btrfs_extent_item *ei;
9311         struct btrfs_extent_inline_ref *iref;
9312         struct btrfs_extent_data_ref *dref;
9313         unsigned long end;
9314         unsigned long ptr;
9315         int type;
9316         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9317         u32 item_size = btrfs_item_size_nr(eb, slot);
9318         u64 flags;
9319         u64 offset;
9320         int metadata = 0;
9321         int level;
9322         struct btrfs_key key;
9323         int ret;
9324         int err = 0;
9325
9326         btrfs_item_key_to_cpu(eb, &key, slot);
9327         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9328                 bytes_used += key.offset;
9329         else
9330                 bytes_used += nodesize;
9331
9332         if (item_size < sizeof(*ei)) {
9333                 /*
9334                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9335                  * old thing when on disk format is still un-determined.
9336                  * No need to care about it anymore
9337                  */
9338                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9339                 return -ENOTTY;
9340         }
9341
9342         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9343         flags = btrfs_extent_flags(eb, ei);
9344
9345         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9346                 metadata = 1;
9347         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9348                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9349                       key.objectid, key.objectid + nodesize);
9350                 err |= CROSSING_STRIPE_BOUNDARY;
9351         }
9352
9353         ptr = (unsigned long)(ei + 1);
9354
9355         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9356                 /* Old EXTENT_ITEM metadata */
9357                 struct btrfs_tree_block_info *info;
9358
9359                 info = (struct btrfs_tree_block_info *)ptr;
9360                 level = btrfs_tree_block_level(eb, info);
9361                 ptr += sizeof(struct btrfs_tree_block_info);
9362         } else {
9363                 /* New METADATA_ITEM */
9364                 level = key.offset;
9365         }
9366         end = (unsigned long)ei + item_size;
9367
9368         if (ptr >= end) {
9369                 err |= ITEM_SIZE_MISMATCH;
9370                 goto out;
9371         }
9372
9373         /* Now check every backref in this extent item */
9374 next:
9375         iref = (struct btrfs_extent_inline_ref *)ptr;
9376         type = btrfs_extent_inline_ref_type(eb, iref);
9377         offset = btrfs_extent_inline_ref_offset(eb, iref);
9378         switch (type) {
9379         case BTRFS_TREE_BLOCK_REF_KEY:
9380                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9381                                                level);
9382                 err |= ret;
9383                 break;
9384         case BTRFS_SHARED_BLOCK_REF_KEY:
9385                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9386                                                  level);
9387                 err |= ret;
9388                 break;
9389         case BTRFS_EXTENT_DATA_REF_KEY:
9390                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9391                 ret = check_extent_data_backref(fs_info,
9392                                 btrfs_extent_data_ref_root(eb, dref),
9393                                 btrfs_extent_data_ref_objectid(eb, dref),
9394                                 btrfs_extent_data_ref_offset(eb, dref),
9395                                 key.objectid, key.offset,
9396                                 btrfs_extent_data_ref_count(eb, dref));
9397                 err |= ret;
9398                 break;
9399         case BTRFS_SHARED_DATA_REF_KEY:
9400                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9401                 err |= ret;
9402                 break;
9403         default:
9404                 error("extent[%llu %d %llu] has unknown ref type: %d",
9405                         key.objectid, key.type, key.offset, type);
9406                 err |= UNKNOWN_TYPE;
9407                 goto out;
9408         }
9409
9410         ptr += btrfs_extent_inline_ref_size(type);
9411         if (ptr < end)
9412                 goto next;
9413
9414 out:
9415         return err;
9416 }
9417
9418 /*
9419  * Check if a dev extent item is referred correctly by its chunk
9420  */
9421 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9422                                  struct extent_buffer *eb, int slot)
9423 {
9424         struct btrfs_root *chunk_root = fs_info->chunk_root;
9425         struct btrfs_dev_extent *ptr;
9426         struct btrfs_path path;
9427         struct btrfs_key chunk_key;
9428         struct btrfs_key devext_key;
9429         struct btrfs_chunk *chunk;
9430         struct extent_buffer *l;
9431         int num_stripes;
9432         u64 length;
9433         int i;
9434         int found_chunk = 0;
9435         int ret;
9436
9437         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9438         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9439         length = btrfs_dev_extent_length(eb, ptr);
9440
9441         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9442         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9443         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9444
9445         btrfs_init_path(&path);
9446         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9447         if (ret)
9448                 goto out;
9449
9450         l = path.nodes[0];
9451         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9452         if (btrfs_chunk_length(l, chunk) != length)
9453                 goto out;
9454
9455         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9456         for (i = 0; i < num_stripes; i++) {
9457                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9458                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9459
9460                 if (devid == devext_key.objectid &&
9461                     offset == devext_key.offset) {
9462                         found_chunk = 1;
9463                         break;
9464                 }
9465         }
9466 out:
9467         btrfs_release_path(&path);
9468         if (!found_chunk) {
9469                 error(
9470                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9471                         devext_key.objectid, devext_key.offset, length);
9472                 return REFERENCER_MISSING;
9473         }
9474         return 0;
9475 }
9476
9477 /*
9478  * Check if the used space is correct with the dev item
9479  */
9480 static int check_dev_item(struct btrfs_fs_info *fs_info,
9481                           struct extent_buffer *eb, int slot)
9482 {
9483         struct btrfs_root *dev_root = fs_info->dev_root;
9484         struct btrfs_dev_item *dev_item;
9485         struct btrfs_path path;
9486         struct btrfs_key key;
9487         struct btrfs_dev_extent *ptr;
9488         u64 dev_id;
9489         u64 used;
9490         u64 total = 0;
9491         int ret;
9492
9493         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9494         dev_id = btrfs_device_id(eb, dev_item);
9495         used = btrfs_device_bytes_used(eb, dev_item);
9496
9497         key.objectid = dev_id;
9498         key.type = BTRFS_DEV_EXTENT_KEY;
9499         key.offset = 0;
9500
9501         btrfs_init_path(&path);
9502         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9503         if (ret < 0) {
9504                 btrfs_item_key_to_cpu(eb, &key, slot);
9505                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9506                         key.objectid, key.type, key.offset);
9507                 btrfs_release_path(&path);
9508                 return REFERENCER_MISSING;
9509         }
9510
9511         /* Iterate dev_extents to calculate the used space of a device */
9512         while (1) {
9513                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9514
9515                 if (key.objectid > dev_id)
9516                         break;
9517                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9518                         goto next;
9519
9520                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9521                                      struct btrfs_dev_extent);
9522                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9523 next:
9524                 ret = btrfs_next_item(dev_root, &path);
9525                 if (ret)
9526                         break;
9527         }
9528         btrfs_release_path(&path);
9529
9530         if (used != total) {
9531                 btrfs_item_key_to_cpu(eb, &key, slot);
9532                 error(
9533 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9534                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9535                         BTRFS_DEV_EXTENT_KEY, dev_id);
9536                 return ACCOUNTING_MISMATCH;
9537         }
9538         return 0;
9539 }
9540
9541 /*
9542  * Check a block group item with its referener (chunk) and its used space
9543  * with extent/metadata item
9544  */
9545 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9546                                   struct extent_buffer *eb, int slot)
9547 {
9548         struct btrfs_root *extent_root = fs_info->extent_root;
9549         struct btrfs_root *chunk_root = fs_info->chunk_root;
9550         struct btrfs_block_group_item *bi;
9551         struct btrfs_block_group_item bg_item;
9552         struct btrfs_path path;
9553         struct btrfs_key bg_key;
9554         struct btrfs_key chunk_key;
9555         struct btrfs_key extent_key;
9556         struct btrfs_chunk *chunk;
9557         struct extent_buffer *leaf;
9558         struct btrfs_extent_item *ei;
9559         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9560         u64 flags;
9561         u64 bg_flags;
9562         u64 used;
9563         u64 total = 0;
9564         int ret;
9565         int err = 0;
9566
9567         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9568         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9569         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9570         used = btrfs_block_group_used(&bg_item);
9571         bg_flags = btrfs_block_group_flags(&bg_item);
9572
9573         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9574         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9575         chunk_key.offset = bg_key.objectid;
9576
9577         btrfs_init_path(&path);
9578         /* Search for the referencer chunk */
9579         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9580         if (ret) {
9581                 error(
9582                 "block group[%llu %llu] did not find the related chunk item",
9583                         bg_key.objectid, bg_key.offset);
9584                 err |= REFERENCER_MISSING;
9585         } else {
9586                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9587                                         struct btrfs_chunk);
9588                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9589                                                 bg_key.offset) {
9590                         error(
9591         "block group[%llu %llu] related chunk item length does not match",
9592                                 bg_key.objectid, bg_key.offset);
9593                         err |= REFERENCER_MISMATCH;
9594                 }
9595         }
9596         btrfs_release_path(&path);
9597
9598         /* Search from the block group bytenr */
9599         extent_key.objectid = bg_key.objectid;
9600         extent_key.type = 0;
9601         extent_key.offset = 0;
9602
9603         btrfs_init_path(&path);
9604         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9605         if (ret < 0)
9606                 goto out;
9607
9608         /* Iterate extent tree to account used space */
9609         while (1) {
9610                 leaf = path.nodes[0];
9611                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9612                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9613                         break;
9614
9615                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9616                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9617                         goto next;
9618                 if (extent_key.objectid < bg_key.objectid)
9619                         goto next;
9620
9621                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9622                         total += nodesize;
9623                 else
9624                         total += extent_key.offset;
9625
9626                 ei = btrfs_item_ptr(leaf, path.slots[0],
9627                                     struct btrfs_extent_item);
9628                 flags = btrfs_extent_flags(leaf, ei);
9629                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9630                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9631                                 error(
9632                         "bad extent[%llu, %llu) type mismatch with chunk",
9633                                         extent_key.objectid,
9634                                         extent_key.objectid + extent_key.offset);
9635                                 err |= CHUNK_TYPE_MISMATCH;
9636                         }
9637                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9638                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9639                                     BTRFS_BLOCK_GROUP_METADATA))) {
9640                                 error(
9641                         "bad extent[%llu, %llu) type mismatch with chunk",
9642                                         extent_key.objectid,
9643                                         extent_key.objectid + nodesize);
9644                                 err |= CHUNK_TYPE_MISMATCH;
9645                         }
9646                 }
9647 next:
9648                 ret = btrfs_next_item(extent_root, &path);
9649                 if (ret)
9650                         break;
9651         }
9652
9653 out:
9654         btrfs_release_path(&path);
9655
9656         if (total != used) {
9657                 error(
9658                 "block group[%llu %llu] used %llu but extent items used %llu",
9659                         bg_key.objectid, bg_key.offset, used, total);
9660                 err |= ACCOUNTING_MISMATCH;
9661         }
9662         return err;
9663 }
9664
9665 /*
9666  * Check a chunk item.
9667  * Including checking all referred dev_extents and block group
9668  */
9669 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9670                             struct extent_buffer *eb, int slot)
9671 {
9672         struct btrfs_root *extent_root = fs_info->extent_root;
9673         struct btrfs_root *dev_root = fs_info->dev_root;
9674         struct btrfs_path path;
9675         struct btrfs_key chunk_key;
9676         struct btrfs_key bg_key;
9677         struct btrfs_key devext_key;
9678         struct btrfs_chunk *chunk;
9679         struct extent_buffer *leaf;
9680         struct btrfs_block_group_item *bi;
9681         struct btrfs_block_group_item bg_item;
9682         struct btrfs_dev_extent *ptr;
9683         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9684         u64 length;
9685         u64 chunk_end;
9686         u64 type;
9687         u64 profile;
9688         int num_stripes;
9689         u64 offset;
9690         u64 objectid;
9691         int i;
9692         int ret;
9693         int err = 0;
9694
9695         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9696         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9697         length = btrfs_chunk_length(eb, chunk);
9698         chunk_end = chunk_key.offset + length;
9699         if (!IS_ALIGNED(length, sectorsize)) {
9700                 error("chunk[%llu %llu) not aligned to %u",
9701                         chunk_key.offset, chunk_end, sectorsize);
9702                 err |= BYTES_UNALIGNED;
9703                 goto out;
9704         }
9705
9706         type = btrfs_chunk_type(eb, chunk);
9707         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9708         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9709                 error("chunk[%llu %llu) has no chunk type",
9710                         chunk_key.offset, chunk_end);
9711                 err |= UNKNOWN_TYPE;
9712         }
9713         if (profile && (profile & (profile - 1))) {
9714                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9715                         chunk_key.offset, chunk_end, profile);
9716                 err |= UNKNOWN_TYPE;
9717         }
9718
9719         bg_key.objectid = chunk_key.offset;
9720         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9721         bg_key.offset = length;
9722
9723         btrfs_init_path(&path);
9724         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9725         if (ret) {
9726                 error(
9727                 "chunk[%llu %llu) did not find the related block group item",
9728                         chunk_key.offset, chunk_end);
9729                 err |= REFERENCER_MISSING;
9730         } else{
9731                 leaf = path.nodes[0];
9732                 bi = btrfs_item_ptr(leaf, path.slots[0],
9733                                     struct btrfs_block_group_item);
9734                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9735                                    sizeof(bg_item));
9736                 if (btrfs_block_group_flags(&bg_item) != type) {
9737                         error(
9738 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9739                                 chunk_key.offset, chunk_end, type,
9740                                 btrfs_block_group_flags(&bg_item));
9741                         err |= REFERENCER_MISSING;
9742                 }
9743         }
9744
9745         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9746         for (i = 0; i < num_stripes; i++) {
9747                 btrfs_release_path(&path);
9748                 btrfs_init_path(&path);
9749                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9750                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9751                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9752
9753                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9754                                         0, 0);
9755                 if (ret)
9756                         goto not_match_dev;
9757
9758                 leaf = path.nodes[0];
9759                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9760                                      struct btrfs_dev_extent);
9761                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9762                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9763                 if (objectid != chunk_key.objectid ||
9764                     offset != chunk_key.offset ||
9765                     btrfs_dev_extent_length(leaf, ptr) != length)
9766                         goto not_match_dev;
9767                 continue;
9768 not_match_dev:
9769                 err |= BACKREF_MISSING;
9770                 error(
9771                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9772                         chunk_key.objectid, chunk_end, i);
9773                 continue;
9774         }
9775         btrfs_release_path(&path);
9776 out:
9777         return err;
9778 }
9779
9780 /*
9781  * Main entry function to check known items and update related accounting info
9782  */
9783 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9784 {
9785         struct btrfs_fs_info *fs_info = root->fs_info;
9786         struct btrfs_key key;
9787         int slot = 0;
9788         int type;
9789         struct btrfs_extent_data_ref *dref;
9790         int ret;
9791         int err = 0;
9792
9793 next:
9794         btrfs_item_key_to_cpu(eb, &key, slot);
9795         type = btrfs_key_type(&key);
9796
9797         switch (type) {
9798         case BTRFS_EXTENT_DATA_KEY:
9799                 ret = check_extent_data_item(root, eb, slot);
9800                 err |= ret;
9801                 break;
9802         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9803                 ret = check_block_group_item(fs_info, eb, slot);
9804                 err |= ret;
9805                 break;
9806         case BTRFS_DEV_ITEM_KEY:
9807                 ret = check_dev_item(fs_info, eb, slot);
9808                 err |= ret;
9809                 break;
9810         case BTRFS_CHUNK_ITEM_KEY:
9811                 ret = check_chunk_item(fs_info, eb, slot);
9812                 err |= ret;
9813                 break;
9814         case BTRFS_DEV_EXTENT_KEY:
9815                 ret = check_dev_extent_item(fs_info, eb, slot);
9816                 err |= ret;
9817                 break;
9818         case BTRFS_EXTENT_ITEM_KEY:
9819         case BTRFS_METADATA_ITEM_KEY:
9820                 ret = check_extent_item(fs_info, eb, slot);
9821                 err |= ret;
9822                 break;
9823         case BTRFS_EXTENT_CSUM_KEY:
9824                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9825                 break;
9826         case BTRFS_TREE_BLOCK_REF_KEY:
9827                 ret = check_tree_block_backref(fs_info, key.offset,
9828                                                key.objectid, -1);
9829                 err |= ret;
9830                 break;
9831         case BTRFS_EXTENT_DATA_REF_KEY:
9832                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9833                 ret = check_extent_data_backref(fs_info,
9834                                 btrfs_extent_data_ref_root(eb, dref),
9835                                 btrfs_extent_data_ref_objectid(eb, dref),
9836                                 btrfs_extent_data_ref_offset(eb, dref),
9837                                 key.objectid, 0,
9838                                 btrfs_extent_data_ref_count(eb, dref));
9839                 err |= ret;
9840                 break;
9841         case BTRFS_SHARED_BLOCK_REF_KEY:
9842                 ret = check_shared_block_backref(fs_info, key.offset,
9843                                                  key.objectid, -1);
9844                 err |= ret;
9845                 break;
9846         case BTRFS_SHARED_DATA_REF_KEY:
9847                 ret = check_shared_data_backref(fs_info, key.offset,
9848                                                 key.objectid);
9849                 err |= ret;
9850                 break;
9851         default:
9852                 break;
9853         }
9854
9855         if (++slot < btrfs_header_nritems(eb))
9856                 goto next;
9857
9858         return err;
9859 }
9860
9861 /*
9862  * Helper function for later fs/subvol tree check.  To determine if a tree
9863  * block should be checked.
9864  * This function will ensure only the direct referencer with lowest rootid to
9865  * check a fs/subvolume tree block.
9866  *
9867  * Backref check at extent tree would detect errors like missing subvolume
9868  * tree, so we can do aggressive check to reduce duplicated checks.
9869  */
9870 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9871 {
9872         struct btrfs_root *extent_root = root->fs_info->extent_root;
9873         struct btrfs_key key;
9874         struct btrfs_path path;
9875         struct extent_buffer *leaf;
9876         int slot;
9877         struct btrfs_extent_item *ei;
9878         unsigned long ptr;
9879         unsigned long end;
9880         int type;
9881         u32 item_size;
9882         u64 offset;
9883         struct btrfs_extent_inline_ref *iref;
9884         int ret;
9885
9886         btrfs_init_path(&path);
9887         key.objectid = btrfs_header_bytenr(eb);
9888         key.type = BTRFS_METADATA_ITEM_KEY;
9889         key.offset = (u64)-1;
9890
9891         /*
9892          * Any failure in backref resolving means we can't determine
9893          * whom the tree block belongs to.
9894          * So in that case, we need to check that tree block
9895          */
9896         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9897         if (ret < 0)
9898                 goto need_check;
9899
9900         ret = btrfs_previous_extent_item(extent_root, &path,
9901                                          btrfs_header_bytenr(eb));
9902         if (ret)
9903                 goto need_check;
9904
9905         leaf = path.nodes[0];
9906         slot = path.slots[0];
9907         btrfs_item_key_to_cpu(leaf, &key, slot);
9908         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9909
9910         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9911                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9912         } else {
9913                 struct btrfs_tree_block_info *info;
9914
9915                 info = (struct btrfs_tree_block_info *)(ei + 1);
9916                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9917         }
9918
9919         item_size = btrfs_item_size_nr(leaf, slot);
9920         ptr = (unsigned long)iref;
9921         end = (unsigned long)ei + item_size;
9922         while (ptr < end) {
9923                 iref = (struct btrfs_extent_inline_ref *)ptr;
9924                 type = btrfs_extent_inline_ref_type(leaf, iref);
9925                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9926
9927                 /*
9928                  * We only check the tree block if current root is
9929                  * the lowest referencer of it.
9930                  */
9931                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9932                     offset < root->objectid) {
9933                         btrfs_release_path(&path);
9934                         return 0;
9935                 }
9936
9937                 ptr += btrfs_extent_inline_ref_size(type);
9938         }
9939         /*
9940          * Normally we should also check keyed tree block ref, but that may be
9941          * very time consuming.  Inlined ref should already make us skip a lot
9942          * of refs now.  So skip search keyed tree block ref.
9943          */
9944
9945 need_check:
9946         btrfs_release_path(&path);
9947         return 1;
9948 }
9949
9950 /*
9951  * Traversal function for tree block. We will do:
9952  * 1) Skip shared fs/subvolume tree blocks
9953  * 2) Update related bytes accounting
9954  * 3) Pre-order traversal
9955  */
9956 static int traverse_tree_block(struct btrfs_root *root,
9957                                 struct extent_buffer *node)
9958 {
9959         struct extent_buffer *eb;
9960         struct btrfs_key key;
9961         struct btrfs_key drop_key;
9962         int level;
9963         u64 nr;
9964         int i;
9965         int err = 0;
9966         int ret;
9967
9968         /*
9969          * Skip shared fs/subvolume tree block, in that case they will
9970          * be checked by referencer with lowest rootid
9971          */
9972         if (is_fstree(root->objectid) && !should_check(root, node))
9973                 return 0;
9974
9975         /* Update bytes accounting */
9976         total_btree_bytes += node->len;
9977         if (fs_root_objectid(btrfs_header_owner(node)))
9978                 total_fs_tree_bytes += node->len;
9979         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9980                 total_extent_tree_bytes += node->len;
9981         if (!found_old_backref &&
9982             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9983             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9984             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9985                 found_old_backref = 1;
9986
9987         /* pre-order tranversal, check itself first */
9988         level = btrfs_header_level(node);
9989         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9990                                    btrfs_header_level(node),
9991                                    btrfs_header_owner(node));
9992         err |= ret;
9993         if (err)
9994                 error(
9995         "check %s failed root %llu bytenr %llu level %d, force continue check",
9996                         level ? "node":"leaf", root->objectid,
9997                         btrfs_header_bytenr(node), btrfs_header_level(node));
9998
9999         if (!level) {
10000                 btree_space_waste += btrfs_leaf_free_space(root, node);
10001                 ret = check_leaf_items(root, node);
10002                 err |= ret;
10003                 return err;
10004         }
10005
10006         nr = btrfs_header_nritems(node);
10007         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10008         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10009                 sizeof(struct btrfs_key_ptr);
10010
10011         /* Then check all its children */
10012         for (i = 0; i < nr; i++) {
10013                 u64 blocknr = btrfs_node_blockptr(node, i);
10014
10015                 btrfs_node_key_to_cpu(node, &key, i);
10016                 if (level == root->root_item.drop_level &&
10017                     is_dropped_key(&key, &drop_key))
10018                         continue;
10019
10020                 /*
10021                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10022                  * to call the function itself.
10023                  */
10024                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10025                 if (extent_buffer_uptodate(eb)) {
10026                         ret = traverse_tree_block(root, eb);
10027                         err |= ret;
10028                 }
10029                 free_extent_buffer(eb);
10030         }
10031
10032         return err;
10033 }
10034
10035 /*
10036  * Low memory usage version check_chunks_and_extents.
10037  */
10038 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10039 {
10040         struct btrfs_path path;
10041         struct btrfs_key key;
10042         struct btrfs_root *root1;
10043         struct btrfs_root *cur_root;
10044         int err = 0;
10045         int ret;
10046
10047         root1 = root->fs_info->chunk_root;
10048         ret = traverse_tree_block(root1, root1->node);
10049         err |= ret;
10050
10051         root1 = root->fs_info->tree_root;
10052         ret = traverse_tree_block(root1, root1->node);
10053         err |= ret;
10054
10055         btrfs_init_path(&path);
10056         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10057         key.offset = 0;
10058         key.type = BTRFS_ROOT_ITEM_KEY;
10059
10060         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10061         if (ret) {
10062                 error("cannot find extent treet in tree_root");
10063                 goto out;
10064         }
10065
10066         while (1) {
10067                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10068                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10069                         goto next;
10070                 key.offset = (u64)-1;
10071
10072                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10073                 if (IS_ERR(cur_root) || !cur_root) {
10074                         error("failed to read tree: %lld", key.objectid);
10075                         goto next;
10076                 }
10077
10078                 ret = traverse_tree_block(cur_root, cur_root->node);
10079                 err |= ret;
10080
10081 next:
10082                 ret = btrfs_next_item(root1, &path);
10083                 if (ret)
10084                         goto out;
10085         }
10086
10087 out:
10088         btrfs_release_path(&path);
10089         return err;
10090 }
10091
10092 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10093                            struct btrfs_root *root, int overwrite)
10094 {
10095         struct extent_buffer *c;
10096         struct extent_buffer *old = root->node;
10097         int level;
10098         int ret;
10099         struct btrfs_disk_key disk_key = {0,0,0};
10100
10101         level = 0;
10102
10103         if (overwrite) {
10104                 c = old;
10105                 extent_buffer_get(c);
10106                 goto init;
10107         }
10108         c = btrfs_alloc_free_block(trans, root,
10109                                    root->nodesize,
10110                                    root->root_key.objectid,
10111                                    &disk_key, level, 0, 0);
10112         if (IS_ERR(c)) {
10113                 c = old;
10114                 extent_buffer_get(c);
10115                 overwrite = 1;
10116         }
10117 init:
10118         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10119         btrfs_set_header_level(c, level);
10120         btrfs_set_header_bytenr(c, c->start);
10121         btrfs_set_header_generation(c, trans->transid);
10122         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10123         btrfs_set_header_owner(c, root->root_key.objectid);
10124
10125         write_extent_buffer(c, root->fs_info->fsid,
10126                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10127
10128         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10129                             btrfs_header_chunk_tree_uuid(c),
10130                             BTRFS_UUID_SIZE);
10131
10132         btrfs_mark_buffer_dirty(c);
10133         /*
10134          * this case can happen in the following case:
10135          *
10136          * 1.overwrite previous root.
10137          *
10138          * 2.reinit reloc data root, this is because we skip pin
10139          * down reloc data tree before which means we can allocate
10140          * same block bytenr here.
10141          */
10142         if (old->start == c->start) {
10143                 btrfs_set_root_generation(&root->root_item,
10144                                           trans->transid);
10145                 root->root_item.level = btrfs_header_level(root->node);
10146                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10147                                         &root->root_key, &root->root_item);
10148                 if (ret) {
10149                         free_extent_buffer(c);
10150                         return ret;
10151                 }
10152         }
10153         free_extent_buffer(old);
10154         root->node = c;
10155         add_root_to_dirty_list(root);
10156         return 0;
10157 }
10158
10159 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10160                                 struct extent_buffer *eb, int tree_root)
10161 {
10162         struct extent_buffer *tmp;
10163         struct btrfs_root_item *ri;
10164         struct btrfs_key key;
10165         u64 bytenr;
10166         u32 nodesize;
10167         int level = btrfs_header_level(eb);
10168         int nritems;
10169         int ret;
10170         int i;
10171
10172         /*
10173          * If we have pinned this block before, don't pin it again.
10174          * This can not only avoid forever loop with broken filesystem
10175          * but also give us some speedups.
10176          */
10177         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10178                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10179                 return 0;
10180
10181         btrfs_pin_extent(fs_info, eb->start, eb->len);
10182
10183         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10184         nritems = btrfs_header_nritems(eb);
10185         for (i = 0; i < nritems; i++) {
10186                 if (level == 0) {
10187                         btrfs_item_key_to_cpu(eb, &key, i);
10188                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10189                                 continue;
10190                         /* Skip the extent root and reloc roots */
10191                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10192                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10193                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10194                                 continue;
10195                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10196                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10197
10198                         /*
10199                          * If at any point we start needing the real root we
10200                          * will have to build a stump root for the root we are
10201                          * in, but for now this doesn't actually use the root so
10202                          * just pass in extent_root.
10203                          */
10204                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10205                                               nodesize, 0);
10206                         if (!extent_buffer_uptodate(tmp)) {
10207                                 fprintf(stderr, "Error reading root block\n");
10208                                 return -EIO;
10209                         }
10210                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10211                         free_extent_buffer(tmp);
10212                         if (ret)
10213                                 return ret;
10214                 } else {
10215                         bytenr = btrfs_node_blockptr(eb, i);
10216
10217                         /* If we aren't the tree root don't read the block */
10218                         if (level == 1 && !tree_root) {
10219                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10220                                 continue;
10221                         }
10222
10223                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10224                                               nodesize, 0);
10225                         if (!extent_buffer_uptodate(tmp)) {
10226                                 fprintf(stderr, "Error reading tree block\n");
10227                                 return -EIO;
10228                         }
10229                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10230                         free_extent_buffer(tmp);
10231                         if (ret)
10232                                 return ret;
10233                 }
10234         }
10235
10236         return 0;
10237 }
10238
10239 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10240 {
10241         int ret;
10242
10243         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10244         if (ret)
10245                 return ret;
10246
10247         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10248 }
10249
10250 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10251 {
10252         struct btrfs_block_group_cache *cache;
10253         struct btrfs_path *path;
10254         struct extent_buffer *leaf;
10255         struct btrfs_chunk *chunk;
10256         struct btrfs_key key;
10257         int ret;
10258         u64 start;
10259
10260         path = btrfs_alloc_path();
10261         if (!path)
10262                 return -ENOMEM;
10263
10264         key.objectid = 0;
10265         key.type = BTRFS_CHUNK_ITEM_KEY;
10266         key.offset = 0;
10267
10268         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10269         if (ret < 0) {
10270                 btrfs_free_path(path);
10271                 return ret;
10272         }
10273
10274         /*
10275          * We do this in case the block groups were screwed up and had alloc
10276          * bits that aren't actually set on the chunks.  This happens with
10277          * restored images every time and could happen in real life I guess.
10278          */
10279         fs_info->avail_data_alloc_bits = 0;
10280         fs_info->avail_metadata_alloc_bits = 0;
10281         fs_info->avail_system_alloc_bits = 0;
10282
10283         /* First we need to create the in-memory block groups */
10284         while (1) {
10285                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10286                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10287                         if (ret < 0) {
10288                                 btrfs_free_path(path);
10289                                 return ret;
10290                         }
10291                         if (ret) {
10292                                 ret = 0;
10293                                 break;
10294                         }
10295                 }
10296                 leaf = path->nodes[0];
10297                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10298                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10299                         path->slots[0]++;
10300                         continue;
10301                 }
10302
10303                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10304                                        struct btrfs_chunk);
10305                 btrfs_add_block_group(fs_info, 0,
10306                                       btrfs_chunk_type(leaf, chunk),
10307                                       key.objectid, key.offset,
10308                                       btrfs_chunk_length(leaf, chunk));
10309                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10310                                  key.offset + btrfs_chunk_length(leaf, chunk),
10311                                  GFP_NOFS);
10312                 path->slots[0]++;
10313         }
10314         start = 0;
10315         while (1) {
10316                 cache = btrfs_lookup_first_block_group(fs_info, start);
10317                 if (!cache)
10318                         break;
10319                 cache->cached = 1;
10320                 start = cache->key.objectid + cache->key.offset;
10321         }
10322
10323         btrfs_free_path(path);
10324         return 0;
10325 }
10326
10327 static int reset_balance(struct btrfs_trans_handle *trans,
10328                          struct btrfs_fs_info *fs_info)
10329 {
10330         struct btrfs_root *root = fs_info->tree_root;
10331         struct btrfs_path *path;
10332         struct extent_buffer *leaf;
10333         struct btrfs_key key;
10334         int del_slot, del_nr = 0;
10335         int ret;
10336         int found = 0;
10337
10338         path = btrfs_alloc_path();
10339         if (!path)
10340                 return -ENOMEM;
10341
10342         key.objectid = BTRFS_BALANCE_OBJECTID;
10343         key.type = BTRFS_BALANCE_ITEM_KEY;
10344         key.offset = 0;
10345
10346         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10347         if (ret) {
10348                 if (ret > 0)
10349                         ret = 0;
10350                 if (!ret)
10351                         goto reinit_data_reloc;
10352                 else
10353                         goto out;
10354         }
10355
10356         ret = btrfs_del_item(trans, root, path);
10357         if (ret)
10358                 goto out;
10359         btrfs_release_path(path);
10360
10361         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10362         key.type = BTRFS_ROOT_ITEM_KEY;
10363         key.offset = 0;
10364
10365         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10366         if (ret < 0)
10367                 goto out;
10368         while (1) {
10369                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10370                         if (!found)
10371                                 break;
10372
10373                         if (del_nr) {
10374                                 ret = btrfs_del_items(trans, root, path,
10375                                                       del_slot, del_nr);
10376                                 del_nr = 0;
10377                                 if (ret)
10378                                         goto out;
10379                         }
10380                         key.offset++;
10381                         btrfs_release_path(path);
10382
10383                         found = 0;
10384                         ret = btrfs_search_slot(trans, root, &key, path,
10385                                                 -1, 1);
10386                         if (ret < 0)
10387                                 goto out;
10388                         continue;
10389                 }
10390                 found = 1;
10391                 leaf = path->nodes[0];
10392                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10393                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10394                         break;
10395                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10396                         path->slots[0]++;
10397                         continue;
10398                 }
10399                 if (!del_nr) {
10400                         del_slot = path->slots[0];
10401                         del_nr = 1;
10402                 } else {
10403                         del_nr++;
10404                 }
10405                 path->slots[0]++;
10406         }
10407
10408         if (del_nr) {
10409                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10410                 if (ret)
10411                         goto out;
10412         }
10413         btrfs_release_path(path);
10414
10415 reinit_data_reloc:
10416         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10417         key.type = BTRFS_ROOT_ITEM_KEY;
10418         key.offset = (u64)-1;
10419         root = btrfs_read_fs_root(fs_info, &key);
10420         if (IS_ERR(root)) {
10421                 fprintf(stderr, "Error reading data reloc tree\n");
10422                 ret = PTR_ERR(root);
10423                 goto out;
10424         }
10425         record_root_in_trans(trans, root);
10426         ret = btrfs_fsck_reinit_root(trans, root, 0);
10427         if (ret)
10428                 goto out;
10429         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10430 out:
10431         btrfs_free_path(path);
10432         return ret;
10433 }
10434
10435 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10436                               struct btrfs_fs_info *fs_info)
10437 {
10438         u64 start = 0;
10439         int ret;
10440
10441         /*
10442          * The only reason we don't do this is because right now we're just
10443          * walking the trees we find and pinning down their bytes, we don't look
10444          * at any of the leaves.  In order to do mixed groups we'd have to check
10445          * the leaves of any fs roots and pin down the bytes for any file
10446          * extents we find.  Not hard but why do it if we don't have to?
10447          */
10448         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10449                 fprintf(stderr, "We don't support re-initing the extent tree "
10450                         "for mixed block groups yet, please notify a btrfs "
10451                         "developer you want to do this so they can add this "
10452                         "functionality.\n");
10453                 return -EINVAL;
10454         }
10455
10456         /*
10457          * first we need to walk all of the trees except the extent tree and pin
10458          * down the bytes that are in use so we don't overwrite any existing
10459          * metadata.
10460          */
10461         ret = pin_metadata_blocks(fs_info);
10462         if (ret) {
10463                 fprintf(stderr, "error pinning down used bytes\n");
10464                 return ret;
10465         }
10466
10467         /*
10468          * Need to drop all the block groups since we're going to recreate all
10469          * of them again.
10470          */
10471         btrfs_free_block_groups(fs_info);
10472         ret = reset_block_groups(fs_info);
10473         if (ret) {
10474                 fprintf(stderr, "error resetting the block groups\n");
10475                 return ret;
10476         }
10477
10478         /* Ok we can allocate now, reinit the extent root */
10479         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10480         if (ret) {
10481                 fprintf(stderr, "extent root initialization failed\n");
10482                 /*
10483                  * When the transaction code is updated we should end the
10484                  * transaction, but for now progs only knows about commit so
10485                  * just return an error.
10486                  */
10487                 return ret;
10488         }
10489
10490         /*
10491          * Now we have all the in-memory block groups setup so we can make
10492          * allocations properly, and the metadata we care about is safe since we
10493          * pinned all of it above.
10494          */
10495         while (1) {
10496                 struct btrfs_block_group_cache *cache;
10497
10498                 cache = btrfs_lookup_first_block_group(fs_info, start);
10499                 if (!cache)
10500                         break;
10501                 start = cache->key.objectid + cache->key.offset;
10502                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10503                                         &cache->key, &cache->item,
10504                                         sizeof(cache->item));
10505                 if (ret) {
10506                         fprintf(stderr, "Error adding block group\n");
10507                         return ret;
10508                 }
10509                 btrfs_extent_post_op(trans, fs_info->extent_root);
10510         }
10511
10512         ret = reset_balance(trans, fs_info);
10513         if (ret)
10514                 fprintf(stderr, "error resetting the pending balance\n");
10515
10516         return ret;
10517 }
10518
10519 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10520 {
10521         struct btrfs_path *path;
10522         struct btrfs_trans_handle *trans;
10523         struct btrfs_key key;
10524         int ret;
10525
10526         printf("Recowing metadata block %llu\n", eb->start);
10527         key.objectid = btrfs_header_owner(eb);
10528         key.type = BTRFS_ROOT_ITEM_KEY;
10529         key.offset = (u64)-1;
10530
10531         root = btrfs_read_fs_root(root->fs_info, &key);
10532         if (IS_ERR(root)) {
10533                 fprintf(stderr, "Couldn't find owner root %llu\n",
10534                         key.objectid);
10535                 return PTR_ERR(root);
10536         }
10537
10538         path = btrfs_alloc_path();
10539         if (!path)
10540                 return -ENOMEM;
10541
10542         trans = btrfs_start_transaction(root, 1);
10543         if (IS_ERR(trans)) {
10544                 btrfs_free_path(path);
10545                 return PTR_ERR(trans);
10546         }
10547
10548         path->lowest_level = btrfs_header_level(eb);
10549         if (path->lowest_level)
10550                 btrfs_node_key_to_cpu(eb, &key, 0);
10551         else
10552                 btrfs_item_key_to_cpu(eb, &key, 0);
10553
10554         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10555         btrfs_commit_transaction(trans, root);
10556         btrfs_free_path(path);
10557         return ret;
10558 }
10559
10560 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10561 {
10562         struct btrfs_path *path;
10563         struct btrfs_trans_handle *trans;
10564         struct btrfs_key key;
10565         int ret;
10566
10567         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10568                bad->key.type, bad->key.offset);
10569         key.objectid = bad->root_id;
10570         key.type = BTRFS_ROOT_ITEM_KEY;
10571         key.offset = (u64)-1;
10572
10573         root = btrfs_read_fs_root(root->fs_info, &key);
10574         if (IS_ERR(root)) {
10575                 fprintf(stderr, "Couldn't find owner root %llu\n",
10576                         key.objectid);
10577                 return PTR_ERR(root);
10578         }
10579
10580         path = btrfs_alloc_path();
10581         if (!path)
10582                 return -ENOMEM;
10583
10584         trans = btrfs_start_transaction(root, 1);
10585         if (IS_ERR(trans)) {
10586                 btrfs_free_path(path);
10587                 return PTR_ERR(trans);
10588         }
10589
10590         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10591         if (ret) {
10592                 if (ret > 0)
10593                         ret = 0;
10594                 goto out;
10595         }
10596         ret = btrfs_del_item(trans, root, path);
10597 out:
10598         btrfs_commit_transaction(trans, root);
10599         btrfs_free_path(path);
10600         return ret;
10601 }
10602
10603 static int zero_log_tree(struct btrfs_root *root)
10604 {
10605         struct btrfs_trans_handle *trans;
10606         int ret;
10607
10608         trans = btrfs_start_transaction(root, 1);
10609         if (IS_ERR(trans)) {
10610                 ret = PTR_ERR(trans);
10611                 return ret;
10612         }
10613         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10614         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10615         ret = btrfs_commit_transaction(trans, root);
10616         return ret;
10617 }
10618
10619 static int populate_csum(struct btrfs_trans_handle *trans,
10620                          struct btrfs_root *csum_root, char *buf, u64 start,
10621                          u64 len)
10622 {
10623         u64 offset = 0;
10624         u64 sectorsize;
10625         int ret = 0;
10626
10627         while (offset < len) {
10628                 sectorsize = csum_root->sectorsize;
10629                 ret = read_extent_data(csum_root, buf, start + offset,
10630                                        &sectorsize, 0);
10631                 if (ret)
10632                         break;
10633                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10634                                             start + offset, buf, sectorsize);
10635                 if (ret)
10636                         break;
10637                 offset += sectorsize;
10638         }
10639         return ret;
10640 }
10641
10642 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10643                                       struct btrfs_root *csum_root,
10644                                       struct btrfs_root *cur_root)
10645 {
10646         struct btrfs_path *path;
10647         struct btrfs_key key;
10648         struct extent_buffer *node;
10649         struct btrfs_file_extent_item *fi;
10650         char *buf = NULL;
10651         u64 start = 0;
10652         u64 len = 0;
10653         int slot = 0;
10654         int ret = 0;
10655
10656         path = btrfs_alloc_path();
10657         if (!path)
10658                 return -ENOMEM;
10659         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10660         if (!buf) {
10661                 ret = -ENOMEM;
10662                 goto out;
10663         }
10664
10665         key.objectid = 0;
10666         key.offset = 0;
10667         key.type = 0;
10668
10669         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10670         if (ret < 0)
10671                 goto out;
10672         /* Iterate all regular file extents and fill its csum */
10673         while (1) {
10674                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10675
10676                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10677                         goto next;
10678                 node = path->nodes[0];
10679                 slot = path->slots[0];
10680                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10681                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10682                         goto next;
10683                 start = btrfs_file_extent_disk_bytenr(node, fi);
10684                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10685
10686                 ret = populate_csum(trans, csum_root, buf, start, len);
10687                 if (ret == -EEXIST)
10688                         ret = 0;
10689                 if (ret < 0)
10690                         goto out;
10691 next:
10692                 /*
10693                  * TODO: if next leaf is corrupted, jump to nearest next valid
10694                  * leaf.
10695                  */
10696                 ret = btrfs_next_item(cur_root, path);
10697                 if (ret < 0)
10698                         goto out;
10699                 if (ret > 0) {
10700                         ret = 0;
10701                         goto out;
10702                 }
10703         }
10704
10705 out:
10706         btrfs_free_path(path);
10707         free(buf);
10708         return ret;
10709 }
10710
10711 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10712                                   struct btrfs_root *csum_root)
10713 {
10714         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10715         struct btrfs_path *path;
10716         struct btrfs_root *tree_root = fs_info->tree_root;
10717         struct btrfs_root *cur_root;
10718         struct extent_buffer *node;
10719         struct btrfs_key key;
10720         int slot = 0;
10721         int ret = 0;
10722
10723         path = btrfs_alloc_path();
10724         if (!path)
10725                 return -ENOMEM;
10726
10727         key.objectid = BTRFS_FS_TREE_OBJECTID;
10728         key.offset = 0;
10729         key.type = BTRFS_ROOT_ITEM_KEY;
10730
10731         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10732         if (ret < 0)
10733                 goto out;
10734         if (ret > 0) {
10735                 ret = -ENOENT;
10736                 goto out;
10737         }
10738
10739         while (1) {
10740                 node = path->nodes[0];
10741                 slot = path->slots[0];
10742                 btrfs_item_key_to_cpu(node, &key, slot);
10743                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10744                         goto out;
10745                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10746                         goto next;
10747                 if (!is_fstree(key.objectid))
10748                         goto next;
10749                 key.offset = (u64)-1;
10750
10751                 cur_root = btrfs_read_fs_root(fs_info, &key);
10752                 if (IS_ERR(cur_root) || !cur_root) {
10753                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10754                                 key.objectid);
10755                         goto out;
10756                 }
10757                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10758                                 cur_root);
10759                 if (ret < 0)
10760                         goto out;
10761 next:
10762                 ret = btrfs_next_item(tree_root, path);
10763                 if (ret > 0) {
10764                         ret = 0;
10765                         goto out;
10766                 }
10767                 if (ret < 0)
10768                         goto out;
10769         }
10770
10771 out:
10772         btrfs_free_path(path);
10773         return ret;
10774 }
10775
10776 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10777                                       struct btrfs_root *csum_root)
10778 {
10779         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10780         struct btrfs_path *path;
10781         struct btrfs_extent_item *ei;
10782         struct extent_buffer *leaf;
10783         char *buf;
10784         struct btrfs_key key;
10785         int ret;
10786
10787         path = btrfs_alloc_path();
10788         if (!path)
10789                 return -ENOMEM;
10790
10791         key.objectid = 0;
10792         key.type = BTRFS_EXTENT_ITEM_KEY;
10793         key.offset = 0;
10794
10795         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10796         if (ret < 0) {
10797                 btrfs_free_path(path);
10798                 return ret;
10799         }
10800
10801         buf = malloc(csum_root->sectorsize);
10802         if (!buf) {
10803                 btrfs_free_path(path);
10804                 return -ENOMEM;
10805         }
10806
10807         while (1) {
10808                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10809                         ret = btrfs_next_leaf(extent_root, path);
10810                         if (ret < 0)
10811                                 break;
10812                         if (ret) {
10813                                 ret = 0;
10814                                 break;
10815                         }
10816                 }
10817                 leaf = path->nodes[0];
10818
10819                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10820                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10821                         path->slots[0]++;
10822                         continue;
10823                 }
10824
10825                 ei = btrfs_item_ptr(leaf, path->slots[0],
10826                                     struct btrfs_extent_item);
10827                 if (!(btrfs_extent_flags(leaf, ei) &
10828                       BTRFS_EXTENT_FLAG_DATA)) {
10829                         path->slots[0]++;
10830                         continue;
10831                 }
10832
10833                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10834                                     key.offset);
10835                 if (ret)
10836                         break;
10837                 path->slots[0]++;
10838         }
10839
10840         btrfs_free_path(path);
10841         free(buf);
10842         return ret;
10843 }
10844
10845 /*
10846  * Recalculate the csum and put it into the csum tree.
10847  *
10848  * Extent tree init will wipe out all the extent info, so in that case, we
10849  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10850  * will use fs/subvol trees to init the csum tree.
10851  */
10852 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10853                           struct btrfs_root *csum_root,
10854                           int search_fs_tree)
10855 {
10856         if (search_fs_tree)
10857                 return fill_csum_tree_from_fs(trans, csum_root);
10858         else
10859                 return fill_csum_tree_from_extent(trans, csum_root);
10860 }
10861
10862 static void free_roots_info_cache(void)
10863 {
10864         if (!roots_info_cache)
10865                 return;
10866
10867         while (!cache_tree_empty(roots_info_cache)) {
10868                 struct cache_extent *entry;
10869                 struct root_item_info *rii;
10870
10871                 entry = first_cache_extent(roots_info_cache);
10872                 if (!entry)
10873                         break;
10874                 remove_cache_extent(roots_info_cache, entry);
10875                 rii = container_of(entry, struct root_item_info, cache_extent);
10876                 free(rii);
10877         }
10878
10879         free(roots_info_cache);
10880         roots_info_cache = NULL;
10881 }
10882
10883 static int build_roots_info_cache(struct btrfs_fs_info *info)
10884 {
10885         int ret = 0;
10886         struct btrfs_key key;
10887         struct extent_buffer *leaf;
10888         struct btrfs_path *path;
10889
10890         if (!roots_info_cache) {
10891                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10892                 if (!roots_info_cache)
10893                         return -ENOMEM;
10894                 cache_tree_init(roots_info_cache);
10895         }
10896
10897         path = btrfs_alloc_path();
10898         if (!path)
10899                 return -ENOMEM;
10900
10901         key.objectid = 0;
10902         key.type = BTRFS_EXTENT_ITEM_KEY;
10903         key.offset = 0;
10904
10905         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10906         if (ret < 0)
10907                 goto out;
10908         leaf = path->nodes[0];
10909
10910         while (1) {
10911                 struct btrfs_key found_key;
10912                 struct btrfs_extent_item *ei;
10913                 struct btrfs_extent_inline_ref *iref;
10914                 int slot = path->slots[0];
10915                 int type;
10916                 u64 flags;
10917                 u64 root_id;
10918                 u8 level;
10919                 struct cache_extent *entry;
10920                 struct root_item_info *rii;
10921
10922                 if (slot >= btrfs_header_nritems(leaf)) {
10923                         ret = btrfs_next_leaf(info->extent_root, path);
10924                         if (ret < 0) {
10925                                 break;
10926                         } else if (ret) {
10927                                 ret = 0;
10928                                 break;
10929                         }
10930                         leaf = path->nodes[0];
10931                         slot = path->slots[0];
10932                 }
10933
10934                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10935
10936                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10937                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10938                         goto next;
10939
10940                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10941                 flags = btrfs_extent_flags(leaf, ei);
10942
10943                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10944                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10945                         goto next;
10946
10947                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10948                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10949                         level = found_key.offset;
10950                 } else {
10951                         struct btrfs_tree_block_info *binfo;
10952
10953                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10954                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10955                         level = btrfs_tree_block_level(leaf, binfo);
10956                 }
10957
10958                 /*
10959                  * For a root extent, it must be of the following type and the
10960                  * first (and only one) iref in the item.
10961                  */
10962                 type = btrfs_extent_inline_ref_type(leaf, iref);
10963                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10964                         goto next;
10965
10966                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10967                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10968                 if (!entry) {
10969                         rii = malloc(sizeof(struct root_item_info));
10970                         if (!rii) {
10971                                 ret = -ENOMEM;
10972                                 goto out;
10973                         }
10974                         rii->cache_extent.start = root_id;
10975                         rii->cache_extent.size = 1;
10976                         rii->level = (u8)-1;
10977                         entry = &rii->cache_extent;
10978                         ret = insert_cache_extent(roots_info_cache, entry);
10979                         ASSERT(ret == 0);
10980                 } else {
10981                         rii = container_of(entry, struct root_item_info,
10982                                            cache_extent);
10983                 }
10984
10985                 ASSERT(rii->cache_extent.start == root_id);
10986                 ASSERT(rii->cache_extent.size == 1);
10987
10988                 if (level > rii->level || rii->level == (u8)-1) {
10989                         rii->level = level;
10990                         rii->bytenr = found_key.objectid;
10991                         rii->gen = btrfs_extent_generation(leaf, ei);
10992                         rii->node_count = 1;
10993                 } else if (level == rii->level) {
10994                         rii->node_count++;
10995                 }
10996 next:
10997                 path->slots[0]++;
10998         }
10999
11000 out:
11001         btrfs_free_path(path);
11002
11003         return ret;
11004 }
11005
11006 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11007                                   struct btrfs_path *path,
11008                                   const struct btrfs_key *root_key,
11009                                   const int read_only_mode)
11010 {
11011         const u64 root_id = root_key->objectid;
11012         struct cache_extent *entry;
11013         struct root_item_info *rii;
11014         struct btrfs_root_item ri;
11015         unsigned long offset;
11016
11017         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11018         if (!entry) {
11019                 fprintf(stderr,
11020                         "Error: could not find extent items for root %llu\n",
11021                         root_key->objectid);
11022                 return -ENOENT;
11023         }
11024
11025         rii = container_of(entry, struct root_item_info, cache_extent);
11026         ASSERT(rii->cache_extent.start == root_id);
11027         ASSERT(rii->cache_extent.size == 1);
11028
11029         if (rii->node_count != 1) {
11030                 fprintf(stderr,
11031                         "Error: could not find btree root extent for root %llu\n",
11032                         root_id);
11033                 return -ENOENT;
11034         }
11035
11036         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11037         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11038
11039         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11040             btrfs_root_level(&ri) != rii->level ||
11041             btrfs_root_generation(&ri) != rii->gen) {
11042
11043                 /*
11044                  * If we're in repair mode but our caller told us to not update
11045                  * the root item, i.e. just check if it needs to be updated, don't
11046                  * print this message, since the caller will call us again shortly
11047                  * for the same root item without read only mode (the caller will
11048                  * open a transaction first).
11049                  */
11050                 if (!(read_only_mode && repair))
11051                         fprintf(stderr,
11052                                 "%sroot item for root %llu,"
11053                                 " current bytenr %llu, current gen %llu, current level %u,"
11054                                 " new bytenr %llu, new gen %llu, new level %u\n",
11055                                 (read_only_mode ? "" : "fixing "),
11056                                 root_id,
11057                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11058                                 btrfs_root_level(&ri),
11059                                 rii->bytenr, rii->gen, rii->level);
11060
11061                 if (btrfs_root_generation(&ri) > rii->gen) {
11062                         fprintf(stderr,
11063                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11064                                 root_id, btrfs_root_generation(&ri), rii->gen);
11065                         return -EINVAL;
11066                 }
11067
11068                 if (!read_only_mode) {
11069                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11070                         btrfs_set_root_level(&ri, rii->level);
11071                         btrfs_set_root_generation(&ri, rii->gen);
11072                         write_extent_buffer(path->nodes[0], &ri,
11073                                             offset, sizeof(ri));
11074                 }
11075
11076                 return 1;
11077         }
11078
11079         return 0;
11080 }
11081
11082 /*
11083  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11084  * caused read-only snapshots to be corrupted if they were created at a moment
11085  * when the source subvolume/snapshot had orphan items. The issue was that the
11086  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11087  * node instead of the post orphan cleanup root node.
11088  * So this function, and its callees, just detects and fixes those cases. Even
11089  * though the regression was for read-only snapshots, this function applies to
11090  * any snapshot/subvolume root.
11091  * This must be run before any other repair code - not doing it so, makes other
11092  * repair code delete or modify backrefs in the extent tree for example, which
11093  * will result in an inconsistent fs after repairing the root items.
11094  */
11095 static int repair_root_items(struct btrfs_fs_info *info)
11096 {
11097         struct btrfs_path *path = NULL;
11098         struct btrfs_key key;
11099         struct extent_buffer *leaf;
11100         struct btrfs_trans_handle *trans = NULL;
11101         int ret = 0;
11102         int bad_roots = 0;
11103         int need_trans = 0;
11104
11105         ret = build_roots_info_cache(info);
11106         if (ret)
11107                 goto out;
11108
11109         path = btrfs_alloc_path();
11110         if (!path) {
11111                 ret = -ENOMEM;
11112                 goto out;
11113         }
11114
11115         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11116         key.type = BTRFS_ROOT_ITEM_KEY;
11117         key.offset = 0;
11118
11119 again:
11120         /*
11121          * Avoid opening and committing transactions if a leaf doesn't have
11122          * any root items that need to be fixed, so that we avoid rotating
11123          * backup roots unnecessarily.
11124          */
11125         if (need_trans) {
11126                 trans = btrfs_start_transaction(info->tree_root, 1);
11127                 if (IS_ERR(trans)) {
11128                         ret = PTR_ERR(trans);
11129                         goto out;
11130                 }
11131         }
11132
11133         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11134                                 0, trans ? 1 : 0);
11135         if (ret < 0)
11136                 goto out;
11137         leaf = path->nodes[0];
11138
11139         while (1) {
11140                 struct btrfs_key found_key;
11141
11142                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11143                         int no_more_keys = find_next_key(path, &key);
11144
11145                         btrfs_release_path(path);
11146                         if (trans) {
11147                                 ret = btrfs_commit_transaction(trans,
11148                                                                info->tree_root);
11149                                 trans = NULL;
11150                                 if (ret < 0)
11151                                         goto out;
11152                         }
11153                         need_trans = 0;
11154                         if (no_more_keys)
11155                                 break;
11156                         goto again;
11157                 }
11158
11159                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11160
11161                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11162                         goto next;
11163                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11164                         goto next;
11165
11166                 ret = maybe_repair_root_item(info, path, &found_key,
11167                                              trans ? 0 : 1);
11168                 if (ret < 0)
11169                         goto out;
11170                 if (ret) {
11171                         if (!trans && repair) {
11172                                 need_trans = 1;
11173                                 key = found_key;
11174                                 btrfs_release_path(path);
11175                                 goto again;
11176                         }
11177                         bad_roots++;
11178                 }
11179 next:
11180                 path->slots[0]++;
11181         }
11182         ret = 0;
11183 out:
11184         free_roots_info_cache();
11185         btrfs_free_path(path);
11186         if (trans)
11187                 btrfs_commit_transaction(trans, info->tree_root);
11188         if (ret < 0)
11189                 return ret;
11190
11191         return bad_roots;
11192 }
11193
11194 const char * const cmd_check_usage[] = {
11195         "btrfs check [options] <device>",
11196         "Check structural integrity of a filesystem (unmounted).",
11197         "Check structural integrity of an unmounted filesystem. Verify internal",
11198         "trees' consistency and item connectivity. In the repair mode try to",
11199         "fix the problems found. ",
11200         "WARNING: the repair mode is considered dangerous",
11201         "",
11202         "-s|--super <superblock>     use this superblock copy",
11203         "-b|--backup                 use the first valid backup root copy",
11204         "--repair                    try to repair the filesystem",
11205         "--readonly                  run in read-only mode (default)",
11206         "--init-csum-tree            create a new CRC tree",
11207         "--init-extent-tree          create a new extent tree",
11208         "--mode <MODE>               select mode, allows to make some memory/IO",
11209         "                            trade-offs, where MODE is one of:",
11210         "                            original - read inodes and extents to memory (requires",
11211         "                                       more memory, does less IO)",
11212         "                            lowmem   - try to use less memory but read blocks again",
11213         "                                       when needed",
11214         "--check-data-csum           verify checksums of data blocks",
11215         "-Q|--qgroup-report           print a report on qgroup consistency",
11216         "-E|--subvol-extents <subvolid>",
11217         "                            print subvolume extents and sharing state",
11218         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11219         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11220         "-p|--progress               indicate progress",
11221         NULL
11222 };
11223
11224 int cmd_check(int argc, char **argv)
11225 {
11226         struct cache_tree root_cache;
11227         struct btrfs_root *root;
11228         struct btrfs_fs_info *info;
11229         u64 bytenr = 0;
11230         u64 subvolid = 0;
11231         u64 tree_root_bytenr = 0;
11232         u64 chunk_root_bytenr = 0;
11233         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11234         int ret;
11235         u64 num;
11236         int init_csum_tree = 0;
11237         int readonly = 0;
11238         int qgroup_report = 0;
11239         int qgroups_repaired = 0;
11240         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11241
11242         while(1) {
11243                 int c;
11244                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11245                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11246                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11247                         GETOPT_VAL_MODE };
11248                 static const struct option long_options[] = {
11249                         { "super", required_argument, NULL, 's' },
11250                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11251                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11252                         { "init-csum-tree", no_argument, NULL,
11253                                 GETOPT_VAL_INIT_CSUM },
11254                         { "init-extent-tree", no_argument, NULL,
11255                                 GETOPT_VAL_INIT_EXTENT },
11256                         { "check-data-csum", no_argument, NULL,
11257                                 GETOPT_VAL_CHECK_CSUM },
11258                         { "backup", no_argument, NULL, 'b' },
11259                         { "subvol-extents", required_argument, NULL, 'E' },
11260                         { "qgroup-report", no_argument, NULL, 'Q' },
11261                         { "tree-root", required_argument, NULL, 'r' },
11262                         { "chunk-root", required_argument, NULL,
11263                                 GETOPT_VAL_CHUNK_TREE },
11264                         { "progress", no_argument, NULL, 'p' },
11265                         { "mode", required_argument, NULL,
11266                                 GETOPT_VAL_MODE },
11267                         { NULL, 0, NULL, 0}
11268                 };
11269
11270                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11271                 if (c < 0)
11272                         break;
11273                 switch(c) {
11274                         case 'a': /* ignored */ break;
11275                         case 'b':
11276                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11277                                 break;
11278                         case 's':
11279                                 num = arg_strtou64(optarg);
11280                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11281                                         fprintf(stderr,
11282                                                 "ERROR: super mirror should be less than: %d\n",
11283                                                 BTRFS_SUPER_MIRROR_MAX);
11284                                         exit(1);
11285                                 }
11286                                 bytenr = btrfs_sb_offset(((int)num));
11287                                 printf("using SB copy %llu, bytenr %llu\n", num,
11288                                        (unsigned long long)bytenr);
11289                                 break;
11290                         case 'Q':
11291                                 qgroup_report = 1;
11292                                 break;
11293                         case 'E':
11294                                 subvolid = arg_strtou64(optarg);
11295                                 break;
11296                         case 'r':
11297                                 tree_root_bytenr = arg_strtou64(optarg);
11298                                 break;
11299                         case GETOPT_VAL_CHUNK_TREE:
11300                                 chunk_root_bytenr = arg_strtou64(optarg);
11301                                 break;
11302                         case 'p':
11303                                 ctx.progress_enabled = true;
11304                                 break;
11305                         case '?':
11306                         case 'h':
11307                                 usage(cmd_check_usage);
11308                         case GETOPT_VAL_REPAIR:
11309                                 printf("enabling repair mode\n");
11310                                 repair = 1;
11311                                 ctree_flags |= OPEN_CTREE_WRITES;
11312                                 break;
11313                         case GETOPT_VAL_READONLY:
11314                                 readonly = 1;
11315                                 break;
11316                         case GETOPT_VAL_INIT_CSUM:
11317                                 printf("Creating a new CRC tree\n");
11318                                 init_csum_tree = 1;
11319                                 repair = 1;
11320                                 ctree_flags |= OPEN_CTREE_WRITES;
11321                                 break;
11322                         case GETOPT_VAL_INIT_EXTENT:
11323                                 init_extent_tree = 1;
11324                                 ctree_flags |= (OPEN_CTREE_WRITES |
11325                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11326                                 repair = 1;
11327                                 break;
11328                         case GETOPT_VAL_CHECK_CSUM:
11329                                 check_data_csum = 1;
11330                                 break;
11331                         case GETOPT_VAL_MODE:
11332                                 check_mode = parse_check_mode(optarg);
11333                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11334                                         error("unknown mode: %s", optarg);
11335                                         exit(1);
11336                                 }
11337                                 break;
11338                 }
11339         }
11340
11341         if (check_argc_exact(argc - optind, 1))
11342                 usage(cmd_check_usage);
11343
11344         if (ctx.progress_enabled) {
11345                 ctx.tp = TASK_NOTHING;
11346                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11347         }
11348
11349         /* This check is the only reason for --readonly to exist */
11350         if (readonly && repair) {
11351                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11352                 exit(1);
11353         }
11354
11355         /*
11356          * Not supported yet
11357          */
11358         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11359                 error("Low memory mode doesn't support repair yet");
11360                 exit(1);
11361         }
11362
11363         radix_tree_init();
11364         cache_tree_init(&root_cache);
11365
11366         if((ret = check_mounted(argv[optind])) < 0) {
11367                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11368                 goto err_out;
11369         } else if(ret) {
11370                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11371                 ret = -EBUSY;
11372                 goto err_out;
11373         }
11374
11375         /* only allow partial opening under repair mode */
11376         if (repair)
11377                 ctree_flags |= OPEN_CTREE_PARTIAL;
11378
11379         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11380                                   chunk_root_bytenr, ctree_flags);
11381         if (!info) {
11382                 fprintf(stderr, "Couldn't open file system\n");
11383                 ret = -EIO;
11384                 goto err_out;
11385         }
11386
11387         global_info = info;
11388         root = info->fs_root;
11389
11390         /*
11391          * repair mode will force us to commit transaction which
11392          * will make us fail to load log tree when mounting.
11393          */
11394         if (repair && btrfs_super_log_root(info->super_copy)) {
11395                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11396                 if (!ret) {
11397                         ret = 1;
11398                         goto close_out;
11399                 }
11400                 ret = zero_log_tree(root);
11401                 if (ret) {
11402                         fprintf(stderr, "fail to zero log tree\n");
11403                         goto close_out;
11404                 }
11405         }
11406
11407         uuid_unparse(info->super_copy->fsid, uuidbuf);
11408         if (qgroup_report) {
11409                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11410                        uuidbuf);
11411                 ret = qgroup_verify_all(info);
11412                 if (ret == 0)
11413                         report_qgroups(1);
11414                 goto close_out;
11415         }
11416         if (subvolid) {
11417                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11418                        subvolid, argv[optind], uuidbuf);
11419                 ret = print_extent_state(info, subvolid);
11420                 goto close_out;
11421         }
11422         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11423
11424         if (!extent_buffer_uptodate(info->tree_root->node) ||
11425             !extent_buffer_uptodate(info->dev_root->node) ||
11426             !extent_buffer_uptodate(info->chunk_root->node)) {
11427                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11428                 ret = -EIO;
11429                 goto close_out;
11430         }
11431
11432         if (init_extent_tree || init_csum_tree) {
11433                 struct btrfs_trans_handle *trans;
11434
11435                 trans = btrfs_start_transaction(info->extent_root, 0);
11436                 if (IS_ERR(trans)) {
11437                         fprintf(stderr, "Error starting transaction\n");
11438                         ret = PTR_ERR(trans);
11439                         goto close_out;
11440                 }
11441
11442                 if (init_extent_tree) {
11443                         printf("Creating a new extent tree\n");
11444                         ret = reinit_extent_tree(trans, info);
11445                         if (ret)
11446                                 goto close_out;
11447                 }
11448
11449                 if (init_csum_tree) {
11450                         fprintf(stderr, "Reinit crc root\n");
11451                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11452                         if (ret) {
11453                                 fprintf(stderr, "crc root initialization failed\n");
11454                                 ret = -EIO;
11455                                 goto close_out;
11456                         }
11457
11458                         ret = fill_csum_tree(trans, info->csum_root,
11459                                              init_extent_tree);
11460                         if (ret) {
11461                                 fprintf(stderr, "crc refilling failed\n");
11462                                 return -EIO;
11463                         }
11464                 }
11465                 /*
11466                  * Ok now we commit and run the normal fsck, which will add
11467                  * extent entries for all of the items it finds.
11468                  */
11469                 ret = btrfs_commit_transaction(trans, info->extent_root);
11470                 if (ret)
11471                         goto close_out;
11472         }
11473         if (!extent_buffer_uptodate(info->extent_root->node)) {
11474                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11475                 ret = -EIO;
11476                 goto close_out;
11477         }
11478         if (!extent_buffer_uptodate(info->csum_root->node)) {
11479                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11480                 ret = -EIO;
11481                 goto close_out;
11482         }
11483
11484         if (!ctx.progress_enabled)
11485                 fprintf(stderr, "checking extents\n");
11486         if (check_mode == CHECK_MODE_LOWMEM)
11487                 ret = check_chunks_and_extents_v2(root);
11488         else
11489                 ret = check_chunks_and_extents(root);
11490         if (ret)
11491                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11492
11493         ret = repair_root_items(info);
11494         if (ret < 0)
11495                 goto close_out;
11496         if (repair) {
11497                 fprintf(stderr, "Fixed %d roots.\n", ret);
11498                 ret = 0;
11499         } else if (ret > 0) {
11500                 fprintf(stderr,
11501                        "Found %d roots with an outdated root item.\n",
11502                        ret);
11503                 fprintf(stderr,
11504                         "Please run a filesystem check with the option --repair to fix them.\n");
11505                 ret = 1;
11506                 goto close_out;
11507         }
11508
11509         if (!ctx.progress_enabled) {
11510                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11511                         fprintf(stderr, "checking free space tree\n");
11512                 else
11513                         fprintf(stderr, "checking free space cache\n");
11514         }
11515         ret = check_space_cache(root);
11516         if (ret)
11517                 goto out;
11518
11519         /*
11520          * We used to have to have these hole extents in between our real
11521          * extents so if we don't have this flag set we need to make sure there
11522          * are no gaps in the file extents for inodes, otherwise we can just
11523          * ignore it when this happens.
11524          */
11525         no_holes = btrfs_fs_incompat(root->fs_info,
11526                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11527         if (!ctx.progress_enabled)
11528                 fprintf(stderr, "checking fs roots\n");
11529         ret = check_fs_roots(root, &root_cache);
11530         if (ret)
11531                 goto out;
11532
11533         fprintf(stderr, "checking csums\n");
11534         ret = check_csums(root);
11535         if (ret)
11536                 goto out;
11537
11538         fprintf(stderr, "checking root refs\n");
11539         ret = check_root_refs(root, &root_cache);
11540         if (ret)
11541                 goto out;
11542
11543         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11544                 struct extent_buffer *eb;
11545
11546                 eb = list_first_entry(&root->fs_info->recow_ebs,
11547                                       struct extent_buffer, recow);
11548                 list_del_init(&eb->recow);
11549                 ret = recow_extent_buffer(root, eb);
11550                 if (ret)
11551                         break;
11552         }
11553
11554         while (!list_empty(&delete_items)) {
11555                 struct bad_item *bad;
11556
11557                 bad = list_first_entry(&delete_items, struct bad_item, list);
11558                 list_del_init(&bad->list);
11559                 if (repair)
11560                         ret = delete_bad_item(root, bad);
11561                 free(bad);
11562         }
11563
11564         if (info->quota_enabled) {
11565                 int err;
11566                 fprintf(stderr, "checking quota groups\n");
11567                 err = qgroup_verify_all(info);
11568                 if (err)
11569                         goto out;
11570                 report_qgroups(0);
11571                 err = repair_qgroups(info, &qgroups_repaired);
11572                 if (err)
11573                         goto out;
11574         }
11575
11576         if (!list_empty(&root->fs_info->recow_ebs)) {
11577                 fprintf(stderr, "Transid errors in file system\n");
11578                 ret = 1;
11579         }
11580 out:
11581         /* Don't override original ret */
11582         if (!ret && qgroups_repaired)
11583                 ret = qgroups_repaired;
11584
11585         if (found_old_backref) { /*
11586                  * there was a disk format change when mixed
11587                  * backref was in testing tree. The old format
11588                  * existed about one week.
11589                  */
11590                 printf("\n * Found old mixed backref format. "
11591                        "The old format is not supported! *"
11592                        "\n * Please mount the FS in readonly mode, "
11593                        "backup data and re-format the FS. *\n\n");
11594                 ret = 1;
11595         }
11596         printf("found %llu bytes used err is %d\n",
11597                (unsigned long long)bytes_used, ret);
11598         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11599         printf("total tree bytes: %llu\n",
11600                (unsigned long long)total_btree_bytes);
11601         printf("total fs tree bytes: %llu\n",
11602                (unsigned long long)total_fs_tree_bytes);
11603         printf("total extent tree bytes: %llu\n",
11604                (unsigned long long)total_extent_tree_bytes);
11605         printf("btree space waste bytes: %llu\n",
11606                (unsigned long long)btree_space_waste);
11607         printf("file data blocks allocated: %llu\n referenced %llu\n",
11608                 (unsigned long long)data_bytes_allocated,
11609                 (unsigned long long)data_bytes_referenced);
11610
11611         free_qgroup_counts();
11612         free_root_recs_tree(&root_cache);
11613 close_out:
11614         close_ctree(root);
11615 err_out:
11616         if (ctx.progress_enabled)
11617                 task_deinit(ctx.info);
11618
11619         return ret;
11620 }