btrfs-progs: check: release path after usage
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         unsigned int filetype:8;
189         int errors;
190         unsigned int ref_type;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         struct rb_node *rb;
663         size_t size;
664         int ret;
665
666         rec = malloc(sizeof(*rec));
667         if (!rec)
668                 return ERR_PTR(-ENOMEM);
669         memcpy(rec, orig_rec, sizeof(*rec));
670         rec->refs = 1;
671         INIT_LIST_HEAD(&rec->backrefs);
672         INIT_LIST_HEAD(&rec->orphan_extents);
673         rec->holes = RB_ROOT;
674
675         list_for_each_entry(orig, &orig_rec->backrefs, list) {
676                 size = sizeof(*orig) + orig->namelen + 1;
677                 backref = malloc(size);
678                 if (!backref) {
679                         ret = -ENOMEM;
680                         goto cleanup;
681                 }
682                 memcpy(backref, orig, size);
683                 list_add_tail(&backref->list, &rec->backrefs);
684         }
685         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686                 dst_orphan = malloc(sizeof(*dst_orphan));
687                 if (!dst_orphan) {
688                         ret = -ENOMEM;
689                         goto cleanup;
690                 }
691                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693         }
694         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
695         if (ret < 0)
696                 goto cleanup_rb;
697
698         return rec;
699
700 cleanup_rb:
701         rb = rb_first(&rec->holes);
702         while (rb) {
703                 struct file_extent_hole *hole;
704
705                 hole = rb_entry(rb, struct file_extent_hole, node);
706                 rb = rb_next(rb);
707                 free(hole);
708         }
709
710 cleanup:
711         if (!list_empty(&rec->backrefs))
712                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713                         list_del(&orig->list);
714                         free(orig);
715                 }
716
717         if (!list_empty(&rec->orphan_extents))
718                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719                         list_del(&orig->list);
720                         free(orig);
721                 }
722
723         free(rec);
724
725         return ERR_PTR(ret);
726 }
727
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
729                                       u64 objectid)
730 {
731         struct orphan_data_extent *orphan;
732
733         if (list_empty(orphan_extents))
734                 return;
735         printf("The following data extent is lost in tree %llu:\n",
736                objectid);
737         list_for_each_entry(orphan, orphan_extents, list) {
738                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
740                        orphan->disk_len);
741         }
742 }
743
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
745 {
746         u64 root_objectid = root->root_key.objectid;
747         int errors = rec->errors;
748
749         if (!errors)
750                 return;
751         /* reloc root errors, we print its corresponding fs root objectid*/
752         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753                 root_objectid = root->root_key.offset;
754                 fprintf(stderr, "reloc");
755         }
756         fprintf(stderr, "root %llu inode %llu errors %x",
757                 (unsigned long long) root_objectid,
758                 (unsigned long long) rec->ino, rec->errors);
759
760         if (errors & I_ERR_NO_INODE_ITEM)
761                 fprintf(stderr, ", no inode item");
762         if (errors & I_ERR_NO_ORPHAN_ITEM)
763                 fprintf(stderr, ", no orphan item");
764         if (errors & I_ERR_DUP_INODE_ITEM)
765                 fprintf(stderr, ", dup inode item");
766         if (errors & I_ERR_DUP_DIR_INDEX)
767                 fprintf(stderr, ", dup dir index");
768         if (errors & I_ERR_ODD_DIR_ITEM)
769                 fprintf(stderr, ", odd dir item");
770         if (errors & I_ERR_ODD_FILE_EXTENT)
771                 fprintf(stderr, ", odd file extent");
772         if (errors & I_ERR_BAD_FILE_EXTENT)
773                 fprintf(stderr, ", bad file extent");
774         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775                 fprintf(stderr, ", file extent overlap");
776         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777                 fprintf(stderr, ", file extent discount");
778         if (errors & I_ERR_DIR_ISIZE_WRONG)
779                 fprintf(stderr, ", dir isize wrong");
780         if (errors & I_ERR_FILE_NBYTES_WRONG)
781                 fprintf(stderr, ", nbytes wrong");
782         if (errors & I_ERR_ODD_CSUM_ITEM)
783                 fprintf(stderr, ", odd csum item");
784         if (errors & I_ERR_SOME_CSUM_MISSING)
785                 fprintf(stderr, ", some csum missing");
786         if (errors & I_ERR_LINK_COUNT_WRONG)
787                 fprintf(stderr, ", link count wrong");
788         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789                 fprintf(stderr, ", orphan file extent");
790         fprintf(stderr, "\n");
791         /* Print the orphan extents if needed */
792         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
794
795         /* Print the holes if needed */
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797                 struct file_extent_hole *hole;
798                 struct rb_node *node;
799                 int found = 0;
800
801                 node = rb_first(&rec->holes);
802                 fprintf(stderr, "Found file extent holes:\n");
803                 while (node) {
804                         found = 1;
805                         hole = rb_entry(node, struct file_extent_hole, node);
806                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
807                                 hole->start, hole->len);
808                         node = rb_next(node);
809                 }
810                 if (!found)
811                         fprintf(stderr, "\tstart: 0, len: %llu\n",
812                                 round_up(rec->isize, root->sectorsize));
813         }
814 }
815
816 static void print_ref_error(int errors)
817 {
818         if (errors & REF_ERR_NO_DIR_ITEM)
819                 fprintf(stderr, ", no dir item");
820         if (errors & REF_ERR_NO_DIR_INDEX)
821                 fprintf(stderr, ", no dir index");
822         if (errors & REF_ERR_NO_INODE_REF)
823                 fprintf(stderr, ", no inode ref");
824         if (errors & REF_ERR_DUP_DIR_ITEM)
825                 fprintf(stderr, ", dup dir item");
826         if (errors & REF_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & REF_ERR_DUP_INODE_REF)
829                 fprintf(stderr, ", dup inode ref");
830         if (errors & REF_ERR_INDEX_UNMATCH)
831                 fprintf(stderr, ", index mismatch");
832         if (errors & REF_ERR_FILETYPE_UNMATCH)
833                 fprintf(stderr, ", filetype mismatch");
834         if (errors & REF_ERR_NAME_TOO_LONG)
835                 fprintf(stderr, ", name too long");
836         if (errors & REF_ERR_NO_ROOT_REF)
837                 fprintf(stderr, ", no root ref");
838         if (errors & REF_ERR_NO_ROOT_BACKREF)
839                 fprintf(stderr, ", no root backref");
840         if (errors & REF_ERR_DUP_ROOT_REF)
841                 fprintf(stderr, ", dup root ref");
842         if (errors & REF_ERR_DUP_ROOT_BACKREF)
843                 fprintf(stderr, ", dup root backref");
844         fprintf(stderr, "\n");
845 }
846
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
848                                           u64 ino, int mod)
849 {
850         struct ptr_node *node;
851         struct cache_extent *cache;
852         struct inode_record *rec = NULL;
853         int ret;
854
855         cache = lookup_cache_extent(inode_cache, ino, 1);
856         if (cache) {
857                 node = container_of(cache, struct ptr_node, cache);
858                 rec = node->data;
859                 if (mod && rec->refs > 1) {
860                         node->data = clone_inode_rec(rec);
861                         if (IS_ERR(node->data))
862                                 return node->data;
863                         rec->refs--;
864                         rec = node->data;
865                 }
866         } else if (mod) {
867                 rec = calloc(1, sizeof(*rec));
868                 if (!rec)
869                         return ERR_PTR(-ENOMEM);
870                 rec->ino = ino;
871                 rec->extent_start = (u64)-1;
872                 rec->refs = 1;
873                 INIT_LIST_HEAD(&rec->backrefs);
874                 INIT_LIST_HEAD(&rec->orphan_extents);
875                 rec->holes = RB_ROOT;
876
877                 node = malloc(sizeof(*node));
878                 if (!node) {
879                         free(rec);
880                         return ERR_PTR(-ENOMEM);
881                 }
882                 node->cache.start = ino;
883                 node->cache.size = 1;
884                 node->data = rec;
885
886                 if (ino == BTRFS_FREE_INO_OBJECTID)
887                         rec->found_link = 1;
888
889                 ret = insert_cache_extent(inode_cache, &node->cache);
890                 if (ret)
891                         return ERR_PTR(-EEXIST);
892         }
893         return rec;
894 }
895
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
897 {
898         struct orphan_data_extent *orphan;
899
900         while (!list_empty(orphan_extents)) {
901                 orphan = list_entry(orphan_extents->next,
902                                     struct orphan_data_extent, list);
903                 list_del(&orphan->list);
904                 free(orphan);
905         }
906 }
907
908 static void free_inode_rec(struct inode_record *rec)
909 {
910         struct inode_backref *backref;
911
912         if (--rec->refs > 0)
913                 return;
914
915         while (!list_empty(&rec->backrefs)) {
916                 backref = to_inode_backref(rec->backrefs.next);
917                 list_del(&backref->list);
918                 free(backref);
919         }
920         free_orphan_data_extents(&rec->orphan_extents);
921         free_file_extent_holes(&rec->holes);
922         free(rec);
923 }
924
925 static int can_free_inode_rec(struct inode_record *rec)
926 {
927         if (!rec->errors && rec->checked && rec->found_inode_item &&
928             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
929                 return 1;
930         return 0;
931 }
932
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934                                  struct inode_record *rec)
935 {
936         struct cache_extent *cache;
937         struct inode_backref *tmp, *backref;
938         struct ptr_node *node;
939         unsigned char filetype;
940
941         if (!rec->found_inode_item)
942                 return;
943
944         filetype = imode_to_type(rec->imode);
945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946                 if (backref->found_dir_item && backref->found_dir_index) {
947                         if (backref->filetype != filetype)
948                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949                         if (!backref->errors && backref->found_inode_ref &&
950                             rec->nlink == rec->found_link) {
951                                 list_del(&backref->list);
952                                 free(backref);
953                         }
954                 }
955         }
956
957         if (!rec->checked || rec->merging)
958                 return;
959
960         if (S_ISDIR(rec->imode)) {
961                 if (rec->found_size != rec->isize)
962                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963                 if (rec->found_file_extent)
964                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
965         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966                 if (rec->found_dir_item)
967                         rec->errors |= I_ERR_ODD_DIR_ITEM;
968                 if (rec->found_size != rec->nbytes)
969                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970                 if (rec->nlink > 0 && !no_holes &&
971                     (rec->extent_end < rec->isize ||
972                      first_extent_gap(&rec->holes) < rec->isize))
973                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
974         }
975
976         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977                 if (rec->found_csum_item && rec->nodatasum)
978                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
979                 if (rec->some_csum_missing && !rec->nodatasum)
980                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
981         }
982
983         BUG_ON(rec->refs != 1);
984         if (can_free_inode_rec(rec)) {
985                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986                 node = container_of(cache, struct ptr_node, cache);
987                 BUG_ON(node->data != rec);
988                 remove_cache_extent(inode_cache, &node->cache);
989                 free(node);
990                 free_inode_rec(rec);
991         }
992 }
993
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
995 {
996         struct btrfs_path path;
997         struct btrfs_key key;
998         int ret;
999
1000         key.objectid = BTRFS_ORPHAN_OBJECTID;
1001         key.type = BTRFS_ORPHAN_ITEM_KEY;
1002         key.offset = ino;
1003
1004         btrfs_init_path(&path);
1005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006         btrfs_release_path(&path);
1007         if (ret > 0)
1008                 ret = -ENOENT;
1009         return ret;
1010 }
1011
1012 static int process_inode_item(struct extent_buffer *eb,
1013                               int slot, struct btrfs_key *key,
1014                               struct shared_node *active_node)
1015 {
1016         struct inode_record *rec;
1017         struct btrfs_inode_item *item;
1018
1019         rec = active_node->current;
1020         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021         if (rec->found_inode_item) {
1022                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1023                 return 1;
1024         }
1025         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026         rec->nlink = btrfs_inode_nlink(eb, item);
1027         rec->isize = btrfs_inode_size(eb, item);
1028         rec->nbytes = btrfs_inode_nbytes(eb, item);
1029         rec->imode = btrfs_inode_mode(eb, item);
1030         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1031                 rec->nodatasum = 1;
1032         rec->found_inode_item = 1;
1033         if (rec->nlink == 0)
1034                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035         maybe_free_inode_rec(&active_node->inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1040                                                 const char *name,
1041                                                 int namelen, u64 dir)
1042 {
1043         struct inode_backref *backref;
1044
1045         list_for_each_entry(backref, &rec->backrefs, list) {
1046                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1047                         break;
1048                 if (backref->dir != dir || backref->namelen != namelen)
1049                         continue;
1050                 if (memcmp(name, backref->name, namelen))
1051                         continue;
1052                 return backref;
1053         }
1054
1055         backref = malloc(sizeof(*backref) + namelen + 1);
1056         if (!backref)
1057                 return NULL;
1058         memset(backref, 0, sizeof(*backref));
1059         backref->dir = dir;
1060         backref->namelen = namelen;
1061         memcpy(backref->name, name, namelen);
1062         backref->name[namelen] = '\0';
1063         list_add_tail(&backref->list, &rec->backrefs);
1064         return backref;
1065 }
1066
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068                              u64 ino, u64 dir, u64 index,
1069                              const char *name, int namelen,
1070                              int filetype, int itemtype, int errors)
1071 {
1072         struct inode_record *rec;
1073         struct inode_backref *backref;
1074
1075         rec = get_inode_rec(inode_cache, ino, 1);
1076         BUG_ON(IS_ERR(rec));
1077         backref = get_inode_backref(rec, name, namelen, dir);
1078         BUG_ON(!backref);
1079         if (errors)
1080                 backref->errors |= errors;
1081         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082                 if (backref->found_dir_index)
1083                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084                 if (backref->found_inode_ref && backref->index != index)
1085                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1086                 if (backref->found_dir_item && backref->filetype != filetype)
1087                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1088
1089                 backref->index = index;
1090                 backref->filetype = filetype;
1091                 backref->found_dir_index = 1;
1092         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1093                 rec->found_link++;
1094                 if (backref->found_dir_item)
1095                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096                 if (backref->found_dir_index && backref->filetype != filetype)
1097                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1098
1099                 backref->filetype = filetype;
1100                 backref->found_dir_item = 1;
1101         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103                 if (backref->found_inode_ref)
1104                         backref->errors |= REF_ERR_DUP_INODE_REF;
1105                 if (backref->found_dir_index && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 else
1108                         backref->index = index;
1109
1110                 backref->ref_type = itemtype;
1111                 backref->found_inode_ref = 1;
1112         } else {
1113                 BUG_ON(1);
1114         }
1115
1116         maybe_free_inode_rec(inode_cache, rec);
1117         return 0;
1118 }
1119
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121                             struct cache_tree *dst_cache)
1122 {
1123         struct inode_backref *backref;
1124         u32 dir_count = 0;
1125         int ret = 0;
1126
1127         dst->merging = 1;
1128         list_for_each_entry(backref, &src->backrefs, list) {
1129                 if (backref->found_dir_index) {
1130                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1131                                         backref->index, backref->name,
1132                                         backref->namelen, backref->filetype,
1133                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1134                 }
1135                 if (backref->found_dir_item) {
1136                         dir_count++;
1137                         add_inode_backref(dst_cache, dst->ino,
1138                                         backref->dir, 0, backref->name,
1139                                         backref->namelen, backref->filetype,
1140                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1141                 }
1142                 if (backref->found_inode_ref) {
1143                         add_inode_backref(dst_cache, dst->ino,
1144                                         backref->dir, backref->index,
1145                                         backref->name, backref->namelen, 0,
1146                                         backref->ref_type, backref->errors);
1147                 }
1148         }
1149
1150         if (src->found_dir_item)
1151                 dst->found_dir_item = 1;
1152         if (src->found_file_extent)
1153                 dst->found_file_extent = 1;
1154         if (src->found_csum_item)
1155                 dst->found_csum_item = 1;
1156         if (src->some_csum_missing)
1157                 dst->some_csum_missing = 1;
1158         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1160                 if (ret < 0)
1161                         return ret;
1162         }
1163
1164         BUG_ON(src->found_link < dir_count);
1165         dst->found_link += src->found_link - dir_count;
1166         dst->found_size += src->found_size;
1167         if (src->extent_start != (u64)-1) {
1168                 if (dst->extent_start == (u64)-1) {
1169                         dst->extent_start = src->extent_start;
1170                         dst->extent_end = src->extent_end;
1171                 } else {
1172                         if (dst->extent_end > src->extent_start)
1173                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174                         else if (dst->extent_end < src->extent_start) {
1175                                 ret = add_file_extent_hole(&dst->holes,
1176                                         dst->extent_end,
1177                                         src->extent_start - dst->extent_end);
1178                         }
1179                         if (dst->extent_end < src->extent_end)
1180                                 dst->extent_end = src->extent_end;
1181                 }
1182         }
1183
1184         dst->errors |= src->errors;
1185         if (src->found_inode_item) {
1186                 if (!dst->found_inode_item) {
1187                         dst->nlink = src->nlink;
1188                         dst->isize = src->isize;
1189                         dst->nbytes = src->nbytes;
1190                         dst->imode = src->imode;
1191                         dst->nodatasum = src->nodatasum;
1192                         dst->found_inode_item = 1;
1193                 } else {
1194                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1195                 }
1196         }
1197         dst->merging = 0;
1198
1199         return 0;
1200 }
1201
1202 static int splice_shared_node(struct shared_node *src_node,
1203                               struct shared_node *dst_node)
1204 {
1205         struct cache_extent *cache;
1206         struct ptr_node *node, *ins;
1207         struct cache_tree *src, *dst;
1208         struct inode_record *rec, *conflict;
1209         u64 current_ino = 0;
1210         int splice = 0;
1211         int ret;
1212
1213         if (--src_node->refs == 0)
1214                 splice = 1;
1215         if (src_node->current)
1216                 current_ino = src_node->current->ino;
1217
1218         src = &src_node->root_cache;
1219         dst = &dst_node->root_cache;
1220 again:
1221         cache = search_cache_extent(src, 0);
1222         while (cache) {
1223                 node = container_of(cache, struct ptr_node, cache);
1224                 rec = node->data;
1225                 cache = next_cache_extent(cache);
1226
1227                 if (splice) {
1228                         remove_cache_extent(src, &node->cache);
1229                         ins = node;
1230                 } else {
1231                         ins = malloc(sizeof(*ins));
1232                         BUG_ON(!ins);
1233                         ins->cache.start = node->cache.start;
1234                         ins->cache.size = node->cache.size;
1235                         ins->data = rec;
1236                         rec->refs++;
1237                 }
1238                 ret = insert_cache_extent(dst, &ins->cache);
1239                 if (ret == -EEXIST) {
1240                         conflict = get_inode_rec(dst, rec->ino, 1);
1241                         BUG_ON(IS_ERR(conflict));
1242                         merge_inode_recs(rec, conflict, dst);
1243                         if (rec->checked) {
1244                                 conflict->checked = 1;
1245                                 if (dst_node->current == conflict)
1246                                         dst_node->current = NULL;
1247                         }
1248                         maybe_free_inode_rec(dst, conflict);
1249                         free_inode_rec(rec);
1250                         free(ins);
1251                 } else {
1252                         BUG_ON(ret);
1253                 }
1254         }
1255
1256         if (src == &src_node->root_cache) {
1257                 src = &src_node->inode_cache;
1258                 dst = &dst_node->inode_cache;
1259                 goto again;
1260         }
1261
1262         if (current_ino > 0 && (!dst_node->current ||
1263             current_ino > dst_node->current->ino)) {
1264                 if (dst_node->current) {
1265                         dst_node->current->checked = 1;
1266                         maybe_free_inode_rec(dst, dst_node->current);
1267                 }
1268                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269                 BUG_ON(IS_ERR(dst_node->current));
1270         }
1271         return 0;
1272 }
1273
1274 static void free_inode_ptr(struct cache_extent *cache)
1275 {
1276         struct ptr_node *node;
1277         struct inode_record *rec;
1278
1279         node = container_of(cache, struct ptr_node, cache);
1280         rec = node->data;
1281         free_inode_rec(rec);
1282         free(node);
1283 }
1284
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1286
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1288                                             u64 bytenr)
1289 {
1290         struct cache_extent *cache;
1291         struct shared_node *node;
1292
1293         cache = lookup_cache_extent(shared, bytenr, 1);
1294         if (cache) {
1295                 node = container_of(cache, struct shared_node, cache);
1296                 return node;
1297         }
1298         return NULL;
1299 }
1300
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1302 {
1303         int ret;
1304         struct shared_node *node;
1305
1306         node = calloc(1, sizeof(*node));
1307         if (!node)
1308                 return -ENOMEM;
1309         node->cache.start = bytenr;
1310         node->cache.size = 1;
1311         cache_tree_init(&node->root_cache);
1312         cache_tree_init(&node->inode_cache);
1313         node->refs = refs;
1314
1315         ret = insert_cache_extent(shared, &node->cache);
1316
1317         return ret;
1318 }
1319
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321                              struct walk_control *wc, int level)
1322 {
1323         struct shared_node *node;
1324         struct shared_node *dest;
1325         int ret;
1326
1327         if (level == wc->active_node)
1328                 return 0;
1329
1330         BUG_ON(wc->active_node <= level);
1331         node = find_shared_node(&wc->shared, bytenr);
1332         if (!node) {
1333                 ret = add_shared_node(&wc->shared, bytenr, refs);
1334                 BUG_ON(ret);
1335                 node = find_shared_node(&wc->shared, bytenr);
1336                 wc->nodes[level] = node;
1337                 wc->active_node = level;
1338                 return 0;
1339         }
1340
1341         if (wc->root_level == wc->active_node &&
1342             btrfs_root_refs(&root->root_item) == 0) {
1343                 if (--node->refs == 0) {
1344                         free_inode_recs_tree(&node->root_cache);
1345                         free_inode_recs_tree(&node->inode_cache);
1346                         remove_cache_extent(&wc->shared, &node->cache);
1347                         free(node);
1348                 }
1349                 return 1;
1350         }
1351
1352         dest = wc->nodes[wc->active_node];
1353         splice_shared_node(node, dest);
1354         if (node->refs == 0) {
1355                 remove_cache_extent(&wc->shared, &node->cache);
1356                 free(node);
1357         }
1358         return 1;
1359 }
1360
1361 static int leave_shared_node(struct btrfs_root *root,
1362                              struct walk_control *wc, int level)
1363 {
1364         struct shared_node *node;
1365         struct shared_node *dest;
1366         int i;
1367
1368         if (level == wc->root_level)
1369                 return 0;
1370
1371         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1372                 if (wc->nodes[i])
1373                         break;
1374         }
1375         BUG_ON(i >= BTRFS_MAX_LEVEL);
1376
1377         node = wc->nodes[wc->active_node];
1378         wc->nodes[wc->active_node] = NULL;
1379         wc->active_node = i;
1380
1381         dest = wc->nodes[wc->active_node];
1382         if (wc->active_node < wc->root_level ||
1383             btrfs_root_refs(&root->root_item) > 0) {
1384                 BUG_ON(node->refs <= 1);
1385                 splice_shared_node(node, dest);
1386         } else {
1387                 BUG_ON(node->refs < 2);
1388                 node->refs--;
1389         }
1390         return 0;
1391 }
1392
1393 /*
1394  * Returns:
1395  * < 0 - on error
1396  * 1   - if the root with id child_root_id is a child of root parent_root_id
1397  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1398  *       has other root(s) as parent(s)
1399  * 2   - if the root child_root_id doesn't have any parent roots
1400  */
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1402                          u64 child_root_id)
1403 {
1404         struct btrfs_path path;
1405         struct btrfs_key key;
1406         struct extent_buffer *leaf;
1407         int has_parent = 0;
1408         int ret;
1409
1410         btrfs_init_path(&path);
1411
1412         key.objectid = parent_root_id;
1413         key.type = BTRFS_ROOT_REF_KEY;
1414         key.offset = child_root_id;
1415         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1416                                 0, 0);
1417         if (ret < 0)
1418                 return ret;
1419         btrfs_release_path(&path);
1420         if (!ret)
1421                 return 1;
1422
1423         key.objectid = child_root_id;
1424         key.type = BTRFS_ROOT_BACKREF_KEY;
1425         key.offset = 0;
1426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1427                                 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430
1431         while (1) {
1432                 leaf = path.nodes[0];
1433                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1435                         if (ret)
1436                                 break;
1437                         leaf = path.nodes[0];
1438                 }
1439
1440                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441                 if (key.objectid != child_root_id ||
1442                     key.type != BTRFS_ROOT_BACKREF_KEY)
1443                         break;
1444
1445                 has_parent = 1;
1446
1447                 if (key.offset == parent_root_id) {
1448                         btrfs_release_path(&path);
1449                         return 1;
1450                 }
1451
1452                 path.slots[0]++;
1453         }
1454 out:
1455         btrfs_release_path(&path);
1456         if (ret < 0)
1457                 return ret;
1458         return has_parent ? 0 : 2;
1459 }
1460
1461 static int process_dir_item(struct btrfs_root *root,
1462                             struct extent_buffer *eb,
1463                             int slot, struct btrfs_key *key,
1464                             struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u32 data_len;
1471         int error;
1472         int nritems = 0;
1473         int filetype;
1474         struct btrfs_dir_item *di;
1475         struct inode_record *rec;
1476         struct cache_tree *root_cache;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_key location;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         root_cache = &active_node->root_cache;
1482         inode_cache = &active_node->inode_cache;
1483         rec = active_node->current;
1484         rec->found_dir_item = 1;
1485
1486         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 nritems++;
1490                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491                 name_len = btrfs_dir_name_len(eb, di);
1492                 data_len = btrfs_dir_data_len(eb, di);
1493                 filetype = btrfs_dir_type(eb, di);
1494
1495                 rec->found_size += name_len;
1496                 if (name_len <= BTRFS_NAME_LEN) {
1497                         len = name_len;
1498                         error = 0;
1499                 } else {
1500                         len = BTRFS_NAME_LEN;
1501                         error = REF_ERR_NAME_TOO_LONG;
1502                 }
1503                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1504
1505                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506                         add_inode_backref(inode_cache, location.objectid,
1507                                           key->objectid, key->offset, namebuf,
1508                                           len, filetype, key->type, error);
1509                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510                         add_inode_backref(root_cache, location.objectid,
1511                                           key->objectid, key->offset,
1512                                           namebuf, len, filetype,
1513                                           key->type, error);
1514                 } else {
1515                         fprintf(stderr, "invalid location in dir item %u\n",
1516                                 location.type);
1517                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518                                           key->objectid, key->offset, namebuf,
1519                                           len, filetype, key->type, error);
1520                 }
1521
1522                 len = sizeof(*di) + name_len + data_len;
1523                 di = (struct btrfs_dir_item *)((char *)di + len);
1524                 cur += len;
1525         }
1526         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1528
1529         return 0;
1530 }
1531
1532 static int process_inode_ref(struct extent_buffer *eb,
1533                              int slot, struct btrfs_key *key,
1534                              struct shared_node *active_node)
1535 {
1536         u32 total;
1537         u32 cur = 0;
1538         u32 len;
1539         u32 name_len;
1540         u64 index;
1541         int error;
1542         struct cache_tree *inode_cache;
1543         struct btrfs_inode_ref *ref;
1544         char namebuf[BTRFS_NAME_LEN];
1545
1546         inode_cache = &active_node->inode_cache;
1547
1548         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549         total = btrfs_item_size_nr(eb, slot);
1550         while (cur < total) {
1551                 name_len = btrfs_inode_ref_name_len(eb, ref);
1552                 index = btrfs_inode_ref_index(eb, ref);
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561                 add_inode_backref(inode_cache, key->objectid, key->offset,
1562                                   index, namebuf, len, 0, key->type, error);
1563
1564                 len = sizeof(*ref) + name_len;
1565                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1566                 cur += len;
1567         }
1568         return 0;
1569 }
1570
1571 static int process_inode_extref(struct extent_buffer *eb,
1572                                 int slot, struct btrfs_key *key,
1573                                 struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u64 index;
1580         u64 parent;
1581         int error;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_inode_extref *extref;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         inode_cache = &active_node->inode_cache;
1587
1588         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589         total = btrfs_item_size_nr(eb, slot);
1590         while (cur < total) {
1591                 name_len = btrfs_inode_extref_name_len(eb, extref);
1592                 index = btrfs_inode_extref_index(eb, extref);
1593                 parent = btrfs_inode_extref_parent(eb, extref);
1594                 if (name_len <= BTRFS_NAME_LEN) {
1595                         len = name_len;
1596                         error = 0;
1597                 } else {
1598                         len = BTRFS_NAME_LEN;
1599                         error = REF_ERR_NAME_TOO_LONG;
1600                 }
1601                 read_extent_buffer(eb, namebuf,
1602                                    (unsigned long)(extref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, parent,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*extref) + name_len;
1607                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611
1612 }
1613
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615                             u64 len, u64 *found)
1616 {
1617         struct btrfs_key key;
1618         struct btrfs_path path;
1619         struct extent_buffer *leaf;
1620         int ret;
1621         size_t size;
1622         *found = 0;
1623         u64 csum_end;
1624         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1625
1626         btrfs_init_path(&path);
1627
1628         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1629         key.offset = start;
1630         key.type = BTRFS_EXTENT_CSUM_KEY;
1631
1632         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1633                                 &key, &path, 0, 0);
1634         if (ret < 0)
1635                 goto out;
1636         if (ret > 0 && path.slots[0] > 0) {
1637                 leaf = path.nodes[0];
1638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640                     key.type == BTRFS_EXTENT_CSUM_KEY)
1641                         path.slots[0]--;
1642         }
1643
1644         while (len > 0) {
1645                 leaf = path.nodes[0];
1646                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1648                         if (ret > 0)
1649                                 break;
1650                         else if (ret < 0)
1651                                 goto out;
1652                         leaf = path.nodes[0];
1653                 }
1654
1655                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657                     key.type != BTRFS_EXTENT_CSUM_KEY)
1658                         break;
1659
1660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661                 if (key.offset >= start + len)
1662                         break;
1663
1664                 if (key.offset > start)
1665                         start = key.offset;
1666
1667                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669                 if (csum_end > start) {
1670                         size = min(csum_end - start, len);
1671                         len -= size;
1672                         start += size;
1673                         *found += size;
1674                 }
1675
1676                 path.slots[0]++;
1677         }
1678 out:
1679         btrfs_release_path(&path);
1680         if (ret < 0)
1681                 return ret;
1682         return 0;
1683 }
1684
1685 static int process_file_extent(struct btrfs_root *root,
1686                                 struct extent_buffer *eb,
1687                                 int slot, struct btrfs_key *key,
1688                                 struct shared_node *active_node)
1689 {
1690         struct inode_record *rec;
1691         struct btrfs_file_extent_item *fi;
1692         u64 num_bytes = 0;
1693         u64 disk_bytenr = 0;
1694         u64 extent_offset = 0;
1695         u64 mask = root->sectorsize - 1;
1696         int extent_type;
1697         int ret;
1698
1699         rec = active_node->current;
1700         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701         rec->found_file_extent = 1;
1702
1703         if (rec->extent_start == (u64)-1) {
1704                 rec->extent_start = key->offset;
1705                 rec->extent_end = key->offset;
1706         }
1707
1708         if (rec->extent_end > key->offset)
1709                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710         else if (rec->extent_end < key->offset) {
1711                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712                                            key->offset - rec->extent_end);
1713                 if (ret < 0)
1714                         return ret;
1715         }
1716
1717         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718         extent_type = btrfs_file_extent_type(eb, fi);
1719
1720         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1722                 if (num_bytes == 0)
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 rec->found_size += num_bytes;
1725                 num_bytes = (num_bytes + mask) & ~mask;
1726         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730                 extent_offset = btrfs_file_extent_offset(eb, fi);
1731                 if (num_bytes == 0 || (num_bytes & mask))
1732                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733                 if (num_bytes + extent_offset >
1734                     btrfs_file_extent_ram_bytes(eb, fi))
1735                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737                     (btrfs_file_extent_compression(eb, fi) ||
1738                      btrfs_file_extent_encryption(eb, fi) ||
1739                      btrfs_file_extent_other_encoding(eb, fi)))
1740                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741                 if (disk_bytenr > 0)
1742                         rec->found_size += num_bytes;
1743         } else {
1744                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1745         }
1746         rec->extent_end = key->offset + num_bytes;
1747
1748         /*
1749          * The data reloc tree will copy full extents into its inode and then
1750          * copy the corresponding csums.  Because the extent it copied could be
1751          * a preallocated extent that hasn't been written to yet there may be no
1752          * csums to copy, ergo we won't have csums for our file extent.  This is
1753          * ok so just don't bother checking csums if the inode belongs to the
1754          * data reloc tree.
1755          */
1756         if (disk_bytenr > 0 &&
1757             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1758                 u64 found;
1759                 if (btrfs_file_extent_compression(eb, fi))
1760                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1761                 else
1762                         disk_bytenr += extent_offset;
1763
1764                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1765                 if (ret < 0)
1766                         return ret;
1767                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1768                         if (found > 0)
1769                                 rec->found_csum_item = 1;
1770                         if (found < num_bytes)
1771                                 rec->some_csum_missing = 1;
1772                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1773                         if (found > 0)
1774                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781                             struct walk_control *wc)
1782 {
1783         struct btrfs_key key;
1784         u32 nritems;
1785         int i;
1786         int ret = 0;
1787         struct cache_tree *inode_cache;
1788         struct shared_node *active_node;
1789
1790         if (wc->root_level == wc->active_node &&
1791             btrfs_root_refs(&root->root_item) == 0)
1792                 return 0;
1793
1794         active_node = wc->nodes[wc->active_node];
1795         inode_cache = &active_node->inode_cache;
1796         nritems = btrfs_header_nritems(eb);
1797         for (i = 0; i < nritems; i++) {
1798                 btrfs_item_key_to_cpu(eb, &key, i);
1799
1800                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1801                         continue;
1802                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1803                         continue;
1804
1805                 if (active_node->current == NULL ||
1806                     active_node->current->ino < key.objectid) {
1807                         if (active_node->current) {
1808                                 active_node->current->checked = 1;
1809                                 maybe_free_inode_rec(inode_cache,
1810                                                      active_node->current);
1811                         }
1812                         active_node->current = get_inode_rec(inode_cache,
1813                                                              key.objectid, 1);
1814                         BUG_ON(IS_ERR(active_node->current));
1815                 }
1816                 switch (key.type) {
1817                 case BTRFS_DIR_ITEM_KEY:
1818                 case BTRFS_DIR_INDEX_KEY:
1819                         ret = process_dir_item(root, eb, i, &key, active_node);
1820                         break;
1821                 case BTRFS_INODE_REF_KEY:
1822                         ret = process_inode_ref(eb, i, &key, active_node);
1823                         break;
1824                 case BTRFS_INODE_EXTREF_KEY:
1825                         ret = process_inode_extref(eb, i, &key, active_node);
1826                         break;
1827                 case BTRFS_INODE_ITEM_KEY:
1828                         ret = process_inode_item(eb, i, &key, active_node);
1829                         break;
1830                 case BTRFS_EXTENT_DATA_KEY:
1831                         ret = process_file_extent(root, eb, i, &key,
1832                                                   active_node);
1833                         break;
1834                 default:
1835                         break;
1836                 };
1837         }
1838         return ret;
1839 }
1840
1841 static void reada_walk_down(struct btrfs_root *root,
1842                             struct extent_buffer *node, int slot)
1843 {
1844         u64 bytenr;
1845         u64 ptr_gen;
1846         u32 nritems;
1847         u32 blocksize;
1848         int i;
1849         int level;
1850
1851         level = btrfs_header_level(node);
1852         if (level != 1)
1853                 return;
1854
1855         nritems = btrfs_header_nritems(node);
1856         blocksize = root->nodesize;
1857         for (i = slot; i < nritems; i++) {
1858                 bytenr = btrfs_node_blockptr(node, i);
1859                 ptr_gen = btrfs_node_ptr_generation(node, i);
1860                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1861         }
1862 }
1863
1864 /*
1865  * Check the child node/leaf by the following condition:
1866  * 1. the first item key of the node/leaf should be the same with the one
1867  *    in parent.
1868  * 2. block in parent node should match the child node/leaf.
1869  * 3. generation of parent node and child's header should be consistent.
1870  *
1871  * Or the child node/leaf pointed by the key in parent is not valid.
1872  *
1873  * We hope to check leaf owner too, but since subvol may share leaves,
1874  * which makes leaf owner check not so strong, key check should be
1875  * sufficient enough for that case.
1876  */
1877 static int check_child_node(struct btrfs_root *root,
1878                             struct extent_buffer *parent, int slot,
1879                             struct extent_buffer *child)
1880 {
1881         struct btrfs_key parent_key;
1882         struct btrfs_key child_key;
1883         int ret = 0;
1884
1885         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886         if (btrfs_header_level(child) == 0)
1887                 btrfs_item_key_to_cpu(child, &child_key, 0);
1888         else
1889                 btrfs_node_key_to_cpu(child, &child_key, 0);
1890
1891         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1892                 ret = -EINVAL;
1893                 fprintf(stderr,
1894                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895                         parent_key.objectid, parent_key.type, parent_key.offset,
1896                         child_key.objectid, child_key.type, child_key.offset);
1897         }
1898         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1899                 ret = -EINVAL;
1900                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901                         btrfs_node_blockptr(parent, slot),
1902                         btrfs_header_bytenr(child));
1903         }
1904         if (btrfs_node_ptr_generation(parent, slot) !=
1905             btrfs_header_generation(child)) {
1906                 ret = -EINVAL;
1907                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908                         btrfs_header_generation(child),
1909                         btrfs_node_ptr_generation(parent, slot));
1910         }
1911         return ret;
1912 }
1913
1914 struct node_refs {
1915         u64 bytenr[BTRFS_MAX_LEVEL];
1916         u64 refs[BTRFS_MAX_LEVEL];
1917 };
1918
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920                           struct walk_control *wc, int *level,
1921                           struct node_refs *nrefs)
1922 {
1923         enum btrfs_tree_block_status status;
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         struct extent_buffer *next;
1927         struct extent_buffer *cur;
1928         u32 blocksize;
1929         int ret, err = 0;
1930         u64 refs;
1931
1932         WARN_ON(*level < 0);
1933         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1934
1935         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936                 refs = nrefs->refs[*level];
1937                 ret = 0;
1938         } else {
1939                 ret = btrfs_lookup_extent_info(NULL, root,
1940                                        path->nodes[*level]->start,
1941                                        *level, 1, &refs, NULL);
1942                 if (ret < 0) {
1943                         err = ret;
1944                         goto out;
1945                 }
1946                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947                 nrefs->refs[*level] = refs;
1948         }
1949
1950         if (refs > 1) {
1951                 ret = enter_shared_node(root, path->nodes[*level]->start,
1952                                         refs, wc, *level);
1953                 if (ret > 0) {
1954                         err = ret;
1955                         goto out;
1956                 }
1957         }
1958
1959         while (*level >= 0) {
1960                 WARN_ON(*level < 0);
1961                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962                 cur = path->nodes[*level];
1963
1964                 if (btrfs_header_level(cur) != *level)
1965                         WARN_ON(1);
1966
1967                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1968                         break;
1969                 if (*level == 0) {
1970                         ret = process_one_leaf(root, cur, wc);
1971                         if (ret < 0)
1972                                 err = ret;
1973                         break;
1974                 }
1975                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977                 blocksize = root->nodesize;
1978
1979                 if (bytenr == nrefs->bytenr[*level - 1]) {
1980                         refs = nrefs->refs[*level - 1];
1981                 } else {
1982                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983                                         *level - 1, 1, &refs, NULL);
1984                         if (ret < 0) {
1985                                 refs = 0;
1986                         } else {
1987                                 nrefs->bytenr[*level - 1] = bytenr;
1988                                 nrefs->refs[*level - 1] = refs;
1989                         }
1990                 }
1991
1992                 if (refs > 1) {
1993                         ret = enter_shared_node(root, bytenr, refs,
1994                                                 wc, *level - 1);
1995                         if (ret > 0) {
1996                                 path->slots[*level]++;
1997                                 continue;
1998                         }
1999                 }
2000
2001                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003                         free_extent_buffer(next);
2004                         reada_walk_down(root, cur, path->slots[*level]);
2005                         next = read_tree_block(root, bytenr, blocksize,
2006                                                ptr_gen);
2007                         if (!extent_buffer_uptodate(next)) {
2008                                 struct btrfs_key node_key;
2009
2010                                 btrfs_node_key_to_cpu(path->nodes[*level],
2011                                                       &node_key,
2012                                                       path->slots[*level]);
2013                                 btrfs_add_corrupt_extent_record(root->fs_info,
2014                                                 &node_key,
2015                                                 path->nodes[*level]->start,
2016                                                 root->nodesize, *level);
2017                                 err = -EIO;
2018                                 goto out;
2019                         }
2020                 }
2021
2022                 ret = check_child_node(root, cur, path->slots[*level], next);
2023                 if (ret) {
2024                         err = ret;
2025                         goto out;
2026                 }
2027
2028                 if (btrfs_is_leaf(next))
2029                         status = btrfs_check_leaf(root, NULL, next);
2030                 else
2031                         status = btrfs_check_node(root, NULL, next);
2032                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033                         free_extent_buffer(next);
2034                         err = -EIO;
2035                         goto out;
2036                 }
2037
2038                 *level = *level - 1;
2039                 free_extent_buffer(path->nodes[*level]);
2040                 path->nodes[*level] = next;
2041                 path->slots[*level] = 0;
2042         }
2043 out:
2044         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2045         return err;
2046 }
2047
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049                         struct walk_control *wc, int *level)
2050 {
2051         int i;
2052         struct extent_buffer *leaf;
2053
2054         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055                 leaf = path->nodes[i];
2056                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2057                         path->slots[i]++;
2058                         *level = i;
2059                         return 0;
2060                 } else {
2061                         free_extent_buffer(path->nodes[*level]);
2062                         path->nodes[*level] = NULL;
2063                         BUG_ON(*level > wc->active_node);
2064                         if (*level == wc->active_node)
2065                                 leave_shared_node(root, wc, *level);
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 static int check_root_dir(struct inode_record *rec)
2073 {
2074         struct inode_backref *backref;
2075         int ret = -1;
2076
2077         if (!rec->found_inode_item || rec->errors)
2078                 goto out;
2079         if (rec->nlink != 1 || rec->found_link != 0)
2080                 goto out;
2081         if (list_empty(&rec->backrefs))
2082                 goto out;
2083         backref = to_inode_backref(rec->backrefs.next);
2084         if (!backref->found_inode_ref)
2085                 goto out;
2086         if (backref->index != 0 || backref->namelen != 2 ||
2087             memcmp(backref->name, "..", 2))
2088                 goto out;
2089         if (backref->found_dir_index || backref->found_dir_item)
2090                 goto out;
2091         ret = 0;
2092 out:
2093         return ret;
2094 }
2095
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097                               struct btrfs_root *root, struct btrfs_path *path,
2098                               struct inode_record *rec)
2099 {
2100         struct btrfs_inode_item *ei;
2101         struct btrfs_key key;
2102         int ret;
2103
2104         key.objectid = rec->ino;
2105         key.type = BTRFS_INODE_ITEM_KEY;
2106         key.offset = (u64)-1;
2107
2108         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2109         if (ret < 0)
2110                 goto out;
2111         if (ret) {
2112                 if (!path->slots[0]) {
2113                         ret = -ENOENT;
2114                         goto out;
2115                 }
2116                 path->slots[0]--;
2117                 ret = 0;
2118         }
2119         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120         if (key.objectid != rec->ino) {
2121                 ret = -ENOENT;
2122                 goto out;
2123         }
2124
2125         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126                             struct btrfs_inode_item);
2127         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128         btrfs_mark_buffer_dirty(path->nodes[0]);
2129         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131                root->root_key.objectid);
2132 out:
2133         btrfs_release_path(path);
2134         return ret;
2135 }
2136
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138                                     struct btrfs_root *root,
2139                                     struct btrfs_path *path,
2140                                     struct inode_record *rec)
2141 {
2142         int ret;
2143
2144         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145         btrfs_release_path(path);
2146         if (!ret)
2147                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2148         return ret;
2149 }
2150
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152                                struct btrfs_root *root,
2153                                struct btrfs_path *path,
2154                                struct inode_record *rec)
2155 {
2156         struct btrfs_inode_item *ei;
2157         struct btrfs_key key;
2158         int ret = 0;
2159
2160         key.objectid = rec->ino;
2161         key.type = BTRFS_INODE_ITEM_KEY;
2162         key.offset = 0;
2163
2164         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2165         if (ret) {
2166                 if (ret > 0)
2167                         ret = -ENOENT;
2168                 goto out;
2169         }
2170
2171         /* Since ret == 0, no need to check anything */
2172         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173                             struct btrfs_inode_item);
2174         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175         btrfs_mark_buffer_dirty(path->nodes[0]);
2176         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177         printf("reset nbytes for ino %llu root %llu\n",
2178                rec->ino, root->root_key.objectid);
2179 out:
2180         btrfs_release_path(path);
2181         return ret;
2182 }
2183
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185                                  struct cache_tree *inode_cache,
2186                                  struct inode_record *rec,
2187                                  struct inode_backref *backref)
2188 {
2189         struct btrfs_path *path;
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_dir_item *dir_item;
2192         struct extent_buffer *leaf;
2193         struct btrfs_key key;
2194         struct btrfs_disk_key disk_key;
2195         struct inode_record *dir_rec;
2196         unsigned long name_ptr;
2197         u32 data_size = sizeof(*dir_item) + backref->namelen;
2198         int ret;
2199
2200         path = btrfs_alloc_path();
2201         if (!path)
2202                 return -ENOMEM;
2203
2204         trans = btrfs_start_transaction(root, 1);
2205         if (IS_ERR(trans)) {
2206                 btrfs_free_path(path);
2207                 return PTR_ERR(trans);
2208         }
2209
2210         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2211                 (unsigned long long)rec->ino);
2212         key.objectid = backref->dir;
2213         key.type = BTRFS_DIR_INDEX_KEY;
2214         key.offset = backref->index;
2215
2216         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2217         BUG_ON(ret);
2218
2219         leaf = path->nodes[0];
2220         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2221
2222         disk_key.objectid = cpu_to_le64(rec->ino);
2223         disk_key.type = BTRFS_INODE_ITEM_KEY;
2224         disk_key.offset = 0;
2225
2226         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2227         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2228         btrfs_set_dir_data_len(leaf, dir_item, 0);
2229         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2230         name_ptr = (unsigned long)(dir_item + 1);
2231         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2232         btrfs_mark_buffer_dirty(leaf);
2233         btrfs_free_path(path);
2234         btrfs_commit_transaction(trans, root);
2235
2236         backref->found_dir_index = 1;
2237         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2238         BUG_ON(IS_ERR(dir_rec));
2239         if (!dir_rec)
2240                 return 0;
2241         dir_rec->found_size += backref->namelen;
2242         if (dir_rec->found_size == dir_rec->isize &&
2243             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2244                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2245         if (dir_rec->found_size != dir_rec->isize)
2246                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247
2248         return 0;
2249 }
2250
2251 static int delete_dir_index(struct btrfs_root *root,
2252                             struct cache_tree *inode_cache,
2253                             struct inode_record *rec,
2254                             struct inode_backref *backref)
2255 {
2256         struct btrfs_trans_handle *trans;
2257         struct btrfs_dir_item *di;
2258         struct btrfs_path *path;
2259         int ret = 0;
2260
2261         path = btrfs_alloc_path();
2262         if (!path)
2263                 return -ENOMEM;
2264
2265         trans = btrfs_start_transaction(root, 1);
2266         if (IS_ERR(trans)) {
2267                 btrfs_free_path(path);
2268                 return PTR_ERR(trans);
2269         }
2270
2271
2272         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2273                 (unsigned long long)backref->dir,
2274                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2275                 (unsigned long long)root->objectid);
2276
2277         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2278                                     backref->name, backref->namelen,
2279                                     backref->index, -1);
2280         if (IS_ERR(di)) {
2281                 ret = PTR_ERR(di);
2282                 btrfs_free_path(path);
2283                 btrfs_commit_transaction(trans, root);
2284                 if (ret == -ENOENT)
2285                         return 0;
2286                 return ret;
2287         }
2288
2289         if (!di)
2290                 ret = btrfs_del_item(trans, root, path);
2291         else
2292                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2293         BUG_ON(ret);
2294         btrfs_free_path(path);
2295         btrfs_commit_transaction(trans, root);
2296         return ret;
2297 }
2298
2299 static int create_inode_item(struct btrfs_root *root,
2300                              struct inode_record *rec,
2301                              struct inode_backref *backref, int root_dir)
2302 {
2303         struct btrfs_trans_handle *trans;
2304         struct btrfs_inode_item inode_item;
2305         time_t now = time(NULL);
2306         int ret;
2307
2308         trans = btrfs_start_transaction(root, 1);
2309         if (IS_ERR(trans)) {
2310                 ret = PTR_ERR(trans);
2311                 return ret;
2312         }
2313
2314         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2315                 "be incomplete, please check permissions and content after "
2316                 "the fsck completes.\n", (unsigned long long)root->objectid,
2317                 (unsigned long long)rec->ino);
2318
2319         memset(&inode_item, 0, sizeof(inode_item));
2320         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2321         if (root_dir)
2322                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2323         else
2324                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2325         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2326         if (rec->found_dir_item) {
2327                 if (rec->found_file_extent)
2328                         fprintf(stderr, "root %llu inode %llu has both a dir "
2329                                 "item and extents, unsure if it is a dir or a "
2330                                 "regular file so setting it as a directory\n",
2331                                 (unsigned long long)root->objectid,
2332                                 (unsigned long long)rec->ino);
2333                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2334                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2335         } else if (!rec->found_dir_item) {
2336                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2337                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2338         }
2339         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2340         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2341         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2342         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2343         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2344         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2345         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2346         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2347
2348         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2349         BUG_ON(ret);
2350         btrfs_commit_transaction(trans, root);
2351         return 0;
2352 }
2353
2354 static int repair_inode_backrefs(struct btrfs_root *root,
2355                                  struct inode_record *rec,
2356                                  struct cache_tree *inode_cache,
2357                                  int delete)
2358 {
2359         struct inode_backref *tmp, *backref;
2360         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2361         int ret = 0;
2362         int repaired = 0;
2363
2364         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2365                 if (!delete && rec->ino == root_dirid) {
2366                         if (!rec->found_inode_item) {
2367                                 ret = create_inode_item(root, rec, backref, 1);
2368                                 if (ret)
2369                                         break;
2370                                 repaired++;
2371                         }
2372                 }
2373
2374                 /* Index 0 for root dir's are special, don't mess with it */
2375                 if (rec->ino == root_dirid && backref->index == 0)
2376                         continue;
2377
2378                 if (delete &&
2379                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2380                      (backref->found_dir_index && backref->found_inode_ref &&
2381                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2382                         ret = delete_dir_index(root, inode_cache, rec, backref);
2383                         if (ret)
2384                                 break;
2385                         repaired++;
2386                         list_del(&backref->list);
2387                         free(backref);
2388                 }
2389
2390                 if (!delete && !backref->found_dir_index &&
2391                     backref->found_dir_item && backref->found_inode_ref) {
2392                         ret = add_missing_dir_index(root, inode_cache, rec,
2393                                                     backref);
2394                         if (ret)
2395                                 break;
2396                         repaired++;
2397                         if (backref->found_dir_item &&
2398                             backref->found_dir_index &&
2399                             backref->found_dir_index) {
2400                                 if (!backref->errors &&
2401                                     backref->found_inode_ref) {
2402                                         list_del(&backref->list);
2403                                         free(backref);
2404                                 }
2405                         }
2406                 }
2407
2408                 if (!delete && (!backref->found_dir_index &&
2409                                 !backref->found_dir_item &&
2410                                 backref->found_inode_ref)) {
2411                         struct btrfs_trans_handle *trans;
2412                         struct btrfs_key location;
2413
2414                         ret = check_dir_conflict(root, backref->name,
2415                                                  backref->namelen,
2416                                                  backref->dir,
2417                                                  backref->index);
2418                         if (ret) {
2419                                 /*
2420                                  * let nlink fixing routine to handle it,
2421                                  * which can do it better.
2422                                  */
2423                                 ret = 0;
2424                                 break;
2425                         }
2426                         location.objectid = rec->ino;
2427                         location.type = BTRFS_INODE_ITEM_KEY;
2428                         location.offset = 0;
2429
2430                         trans = btrfs_start_transaction(root, 1);
2431                         if (IS_ERR(trans)) {
2432                                 ret = PTR_ERR(trans);
2433                                 break;
2434                         }
2435                         fprintf(stderr, "adding missing dir index/item pair "
2436                                 "for inode %llu\n",
2437                                 (unsigned long long)rec->ino);
2438                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2439                                                     backref->namelen,
2440                                                     backref->dir, &location,
2441                                                     imode_to_type(rec->imode),
2442                                                     backref->index);
2443                         BUG_ON(ret);
2444                         btrfs_commit_transaction(trans, root);
2445                         repaired++;
2446                 }
2447
2448                 if (!delete && (backref->found_inode_ref &&
2449                                 backref->found_dir_index &&
2450                                 backref->found_dir_item &&
2451                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2452                                 !rec->found_inode_item)) {
2453                         ret = create_inode_item(root, rec, backref, 0);
2454                         if (ret)
2455                                 break;
2456                         repaired++;
2457                 }
2458
2459         }
2460         return ret ? ret : repaired;
2461 }
2462
2463 /*
2464  * To determine the file type for nlink/inode_item repair
2465  *
2466  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2467  * Return -ENOENT if file type is not found.
2468  */
2469 static int find_file_type(struct inode_record *rec, u8 *type)
2470 {
2471         struct inode_backref *backref;
2472
2473         /* For inode item recovered case */
2474         if (rec->found_inode_item) {
2475                 *type = imode_to_type(rec->imode);
2476                 return 0;
2477         }
2478
2479         list_for_each_entry(backref, &rec->backrefs, list) {
2480                 if (backref->found_dir_index || backref->found_dir_item) {
2481                         *type = backref->filetype;
2482                         return 0;
2483                 }
2484         }
2485         return -ENOENT;
2486 }
2487
2488 /*
2489  * To determine the file name for nlink repair
2490  *
2491  * Return 0 if file name is found, set name and namelen.
2492  * Return -ENOENT if file name is not found.
2493  */
2494 static int find_file_name(struct inode_record *rec,
2495                           char *name, int *namelen)
2496 {
2497         struct inode_backref *backref;
2498
2499         list_for_each_entry(backref, &rec->backrefs, list) {
2500                 if (backref->found_dir_index || backref->found_dir_item ||
2501                     backref->found_inode_ref) {
2502                         memcpy(name, backref->name, backref->namelen);
2503                         *namelen = backref->namelen;
2504                         return 0;
2505                 }
2506         }
2507         return -ENOENT;
2508 }
2509
2510 /* Reset the nlink of the inode to the correct one */
2511 static int reset_nlink(struct btrfs_trans_handle *trans,
2512                        struct btrfs_root *root,
2513                        struct btrfs_path *path,
2514                        struct inode_record *rec)
2515 {
2516         struct inode_backref *backref;
2517         struct inode_backref *tmp;
2518         struct btrfs_key key;
2519         struct btrfs_inode_item *inode_item;
2520         int ret = 0;
2521
2522         /* We don't believe this either, reset it and iterate backref */
2523         rec->found_link = 0;
2524
2525         /* Remove all backref including the valid ones */
2526         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2527                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2528                                    backref->index, backref->name,
2529                                    backref->namelen, 0);
2530                 if (ret < 0)
2531                         goto out;
2532
2533                 /* remove invalid backref, so it won't be added back */
2534                 if (!(backref->found_dir_index &&
2535                       backref->found_dir_item &&
2536                       backref->found_inode_ref)) {
2537                         list_del(&backref->list);
2538                         free(backref);
2539                 } else {
2540                         rec->found_link++;
2541                 }
2542         }
2543
2544         /* Set nlink to 0 */
2545         key.objectid = rec->ino;
2546         key.type = BTRFS_INODE_ITEM_KEY;
2547         key.offset = 0;
2548         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2549         if (ret < 0)
2550                 goto out;
2551         if (ret > 0) {
2552                 ret = -ENOENT;
2553                 goto out;
2554         }
2555         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2556                                     struct btrfs_inode_item);
2557         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2558         btrfs_mark_buffer_dirty(path->nodes[0]);
2559         btrfs_release_path(path);
2560
2561         /*
2562          * Add back valid inode_ref/dir_item/dir_index,
2563          * add_link() will handle the nlink inc, so new nlink must be correct
2564          */
2565         list_for_each_entry(backref, &rec->backrefs, list) {
2566                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2567                                      backref->name, backref->namelen,
2568                                      backref->filetype, &backref->index, 1);
2569                 if (ret < 0)
2570                         goto out;
2571         }
2572 out:
2573         btrfs_release_path(path);
2574         return ret;
2575 }
2576
2577 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2578                                struct btrfs_root *root,
2579                                struct btrfs_path *path,
2580                                struct inode_record *rec)
2581 {
2582         char *dir_name = "lost+found";
2583         char namebuf[BTRFS_NAME_LEN] = {0};
2584         u64 lost_found_ino;
2585         u32 mode = 0700;
2586         u8 type = 0;
2587         int namelen = 0;
2588         int name_recovered = 0;
2589         int type_recovered = 0;
2590         int ret = 0;
2591
2592         /*
2593          * Get file name and type first before these invalid inode ref
2594          * are deleted by remove_all_invalid_backref()
2595          */
2596         name_recovered = !find_file_name(rec, namebuf, &namelen);
2597         type_recovered = !find_file_type(rec, &type);
2598
2599         if (!name_recovered) {
2600                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2601                        rec->ino, rec->ino);
2602                 namelen = count_digits(rec->ino);
2603                 sprintf(namebuf, "%llu", rec->ino);
2604                 name_recovered = 1;
2605         }
2606         if (!type_recovered) {
2607                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2608                        rec->ino);
2609                 type = BTRFS_FT_REG_FILE;
2610                 type_recovered = 1;
2611         }
2612
2613         ret = reset_nlink(trans, root, path, rec);
2614         if (ret < 0) {
2615                 fprintf(stderr,
2616                         "Failed to reset nlink for inode %llu: %s\n",
2617                         rec->ino, strerror(-ret));
2618                 goto out;
2619         }
2620
2621         if (rec->found_link == 0) {
2622                 lost_found_ino = root->highest_inode;
2623                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2624                         ret = -EOVERFLOW;
2625                         goto out;
2626                 }
2627                 lost_found_ino++;
2628                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2629                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2630                                   mode);
2631                 if (ret < 0) {
2632                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2633                                 dir_name, strerror(-ret));
2634                         goto out;
2635                 }
2636                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2637                                      namebuf, namelen, type, NULL, 1);
2638                 /*
2639                  * Add ".INO" suffix several times to handle case where
2640                  * "FILENAME.INO" is already taken by another file.
2641                  */
2642                 while (ret == -EEXIST) {
2643                         /*
2644                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2645                          */
2646                         if (namelen + count_digits(rec->ino) + 1 >
2647                             BTRFS_NAME_LEN) {
2648                                 ret = -EFBIG;
2649                                 goto out;
2650                         }
2651                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2652                                  ".%llu", rec->ino);
2653                         namelen += count_digits(rec->ino) + 1;
2654                         ret = btrfs_add_link(trans, root, rec->ino,
2655                                              lost_found_ino, namebuf,
2656                                              namelen, type, NULL, 1);
2657                 }
2658                 if (ret < 0) {
2659                         fprintf(stderr,
2660                                 "Failed to link the inode %llu to %s dir: %s\n",
2661                                 rec->ino, dir_name, strerror(-ret));
2662                         goto out;
2663                 }
2664                 /*
2665                  * Just increase the found_link, don't actually add the
2666                  * backref. This will make things easier and this inode
2667                  * record will be freed after the repair is done.
2668                  * So fsck will not report problem about this inode.
2669                  */
2670                 rec->found_link++;
2671                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2672                        namelen, namebuf, dir_name);
2673         }
2674         printf("Fixed the nlink of inode %llu\n", rec->ino);
2675 out:
2676         /*
2677          * Clear the flag anyway, or we will loop forever for the same inode
2678          * as it will not be removed from the bad inode list and the dead loop
2679          * happens.
2680          */
2681         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2682         btrfs_release_path(path);
2683         return ret;
2684 }
2685
2686 /*
2687  * Check if there is any normal(reg or prealloc) file extent for given
2688  * ino.
2689  * This is used to determine the file type when neither its dir_index/item or
2690  * inode_item exists.
2691  *
2692  * This will *NOT* report error, if any error happens, just consider it does
2693  * not have any normal file extent.
2694  */
2695 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2696 {
2697         struct btrfs_path *path;
2698         struct btrfs_key key;
2699         struct btrfs_key found_key;
2700         struct btrfs_file_extent_item *fi;
2701         u8 type;
2702         int ret = 0;
2703
2704         path = btrfs_alloc_path();
2705         if (!path)
2706                 goto out;
2707         key.objectid = ino;
2708         key.type = BTRFS_EXTENT_DATA_KEY;
2709         key.offset = 0;
2710
2711         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2712         if (ret < 0) {
2713                 ret = 0;
2714                 goto out;
2715         }
2716         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2717                 ret = btrfs_next_leaf(root, path);
2718                 if (ret) {
2719                         ret = 0;
2720                         goto out;
2721                 }
2722         }
2723         while (1) {
2724                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2725                                       path->slots[0]);
2726                 if (found_key.objectid != ino ||
2727                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2728                         break;
2729                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2730                                     struct btrfs_file_extent_item);
2731                 type = btrfs_file_extent_type(path->nodes[0], fi);
2732                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2733                         ret = 1;
2734                         goto out;
2735                 }
2736         }
2737 out:
2738         btrfs_free_path(path);
2739         return ret;
2740 }
2741
2742 static u32 btrfs_type_to_imode(u8 type)
2743 {
2744         static u32 imode_by_btrfs_type[] = {
2745                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2746                 [BTRFS_FT_DIR]          = S_IFDIR,
2747                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2748                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2749                 [BTRFS_FT_FIFO]         = S_IFIFO,
2750                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2751                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2752         };
2753
2754         return imode_by_btrfs_type[(type)];
2755 }
2756
2757 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2758                                 struct btrfs_root *root,
2759                                 struct btrfs_path *path,
2760                                 struct inode_record *rec)
2761 {
2762         u8 filetype;
2763         u32 mode = 0700;
2764         int type_recovered = 0;
2765         int ret = 0;
2766
2767         printf("Trying to rebuild inode:%llu\n", rec->ino);
2768
2769         type_recovered = !find_file_type(rec, &filetype);
2770
2771         /*
2772          * Try to determine inode type if type not found.
2773          *
2774          * For found regular file extent, it must be FILE.
2775          * For found dir_item/index, it must be DIR.
2776          *
2777          * For undetermined one, use FILE as fallback.
2778          *
2779          * TODO:
2780          * 1. If found backref(inode_index/item is already handled) to it,
2781          *    it must be DIR.
2782          *    Need new inode-inode ref structure to allow search for that.
2783          */
2784         if (!type_recovered) {
2785                 if (rec->found_file_extent &&
2786                     find_normal_file_extent(root, rec->ino)) {
2787                         type_recovered = 1;
2788                         filetype = BTRFS_FT_REG_FILE;
2789                 } else if (rec->found_dir_item) {
2790                         type_recovered = 1;
2791                         filetype = BTRFS_FT_DIR;
2792                 } else if (!list_empty(&rec->orphan_extents)) {
2793                         type_recovered = 1;
2794                         filetype = BTRFS_FT_REG_FILE;
2795                 } else{
2796                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2797                                rec->ino);
2798                         type_recovered = 1;
2799                         filetype = BTRFS_FT_REG_FILE;
2800                 }
2801         }
2802
2803         ret = btrfs_new_inode(trans, root, rec->ino,
2804                               mode | btrfs_type_to_imode(filetype));
2805         if (ret < 0)
2806                 goto out;
2807
2808         /*
2809          * Here inode rebuild is done, we only rebuild the inode item,
2810          * don't repair the nlink(like move to lost+found).
2811          * That is the job of nlink repair.
2812          *
2813          * We just fill the record and return
2814          */
2815         rec->found_dir_item = 1;
2816         rec->imode = mode | btrfs_type_to_imode(filetype);
2817         rec->nlink = 0;
2818         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2819         /* Ensure the inode_nlinks repair function will be called */
2820         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2821 out:
2822         return ret;
2823 }
2824
2825 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2826                                       struct btrfs_root *root,
2827                                       struct btrfs_path *path,
2828                                       struct inode_record *rec)
2829 {
2830         struct orphan_data_extent *orphan;
2831         struct orphan_data_extent *tmp;
2832         int ret = 0;
2833
2834         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2835                 /*
2836                  * Check for conflicting file extents
2837                  *
2838                  * Here we don't know whether the extents is compressed or not,
2839                  * so we can only assume it not compressed nor data offset,
2840                  * and use its disk_len as extent length.
2841                  */
2842                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2843                                        orphan->offset, orphan->disk_len, 0);
2844                 btrfs_release_path(path);
2845                 if (ret < 0)
2846                         goto out;
2847                 if (!ret) {
2848                         fprintf(stderr,
2849                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2850                                 orphan->disk_bytenr, orphan->disk_len);
2851                         ret = btrfs_free_extent(trans,
2852                                         root->fs_info->extent_root,
2853                                         orphan->disk_bytenr, orphan->disk_len,
2854                                         0, root->objectid, orphan->objectid,
2855                                         orphan->offset);
2856                         if (ret < 0)
2857                                 goto out;
2858                 }
2859                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2860                                 orphan->offset, orphan->disk_bytenr,
2861                                 orphan->disk_len, orphan->disk_len);
2862                 if (ret < 0)
2863                         goto out;
2864
2865                 /* Update file size info */
2866                 rec->found_size += orphan->disk_len;
2867                 if (rec->found_size == rec->nbytes)
2868                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2869
2870                 /* Update the file extent hole info too */
2871                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2872                                            orphan->disk_len);
2873                 if (ret < 0)
2874                         goto out;
2875                 if (RB_EMPTY_ROOT(&rec->holes))
2876                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2877
2878                 list_del(&orphan->list);
2879                 free(orphan);
2880         }
2881         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2882 out:
2883         return ret;
2884 }
2885
2886 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2887                                         struct btrfs_root *root,
2888                                         struct btrfs_path *path,
2889                                         struct inode_record *rec)
2890 {
2891         struct rb_node *node;
2892         struct file_extent_hole *hole;
2893         int found = 0;
2894         int ret = 0;
2895
2896         node = rb_first(&rec->holes);
2897
2898         while (node) {
2899                 found = 1;
2900                 hole = rb_entry(node, struct file_extent_hole, node);
2901                 ret = btrfs_punch_hole(trans, root, rec->ino,
2902                                        hole->start, hole->len);
2903                 if (ret < 0)
2904                         goto out;
2905                 ret = del_file_extent_hole(&rec->holes, hole->start,
2906                                            hole->len);
2907                 if (ret < 0)
2908                         goto out;
2909                 if (RB_EMPTY_ROOT(&rec->holes))
2910                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2911                 node = rb_first(&rec->holes);
2912         }
2913         /* special case for a file losing all its file extent */
2914         if (!found) {
2915                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2916                                        round_up(rec->isize, root->sectorsize));
2917                 if (ret < 0)
2918                         goto out;
2919         }
2920         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2921                rec->ino, root->objectid);
2922 out:
2923         return ret;
2924 }
2925
2926 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2927 {
2928         struct btrfs_trans_handle *trans;
2929         struct btrfs_path *path;
2930         int ret = 0;
2931
2932         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2933                              I_ERR_NO_ORPHAN_ITEM |
2934                              I_ERR_LINK_COUNT_WRONG |
2935                              I_ERR_NO_INODE_ITEM |
2936                              I_ERR_FILE_EXTENT_ORPHAN |
2937                              I_ERR_FILE_EXTENT_DISCOUNT|
2938                              I_ERR_FILE_NBYTES_WRONG)))
2939                 return rec->errors;
2940
2941         path = btrfs_alloc_path();
2942         if (!path)
2943                 return -ENOMEM;
2944
2945         /*
2946          * For nlink repair, it may create a dir and add link, so
2947          * 2 for parent(256)'s dir_index and dir_item
2948          * 2 for lost+found dir's inode_item and inode_ref
2949          * 1 for the new inode_ref of the file
2950          * 2 for lost+found dir's dir_index and dir_item for the file
2951          */
2952         trans = btrfs_start_transaction(root, 7);
2953         if (IS_ERR(trans)) {
2954                 btrfs_free_path(path);
2955                 return PTR_ERR(trans);
2956         }
2957
2958         if (rec->errors & I_ERR_NO_INODE_ITEM)
2959                 ret = repair_inode_no_item(trans, root, path, rec);
2960         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2961                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2962         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2963                 ret = repair_inode_discount_extent(trans, root, path, rec);
2964         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2965                 ret = repair_inode_isize(trans, root, path, rec);
2966         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2967                 ret = repair_inode_orphan_item(trans, root, path, rec);
2968         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2969                 ret = repair_inode_nlinks(trans, root, path, rec);
2970         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2971                 ret = repair_inode_nbytes(trans, root, path, rec);
2972         btrfs_commit_transaction(trans, root);
2973         btrfs_free_path(path);
2974         return ret;
2975 }
2976
2977 static int check_inode_recs(struct btrfs_root *root,
2978                             struct cache_tree *inode_cache)
2979 {
2980         struct cache_extent *cache;
2981         struct ptr_node *node;
2982         struct inode_record *rec;
2983         struct inode_backref *backref;
2984         int stage = 0;
2985         int ret = 0;
2986         int err = 0;
2987         u64 error = 0;
2988         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2989
2990         if (btrfs_root_refs(&root->root_item) == 0) {
2991                 if (!cache_tree_empty(inode_cache))
2992                         fprintf(stderr, "warning line %d\n", __LINE__);
2993                 return 0;
2994         }
2995
2996         /*
2997          * We need to record the highest inode number for later 'lost+found'
2998          * dir creation.
2999          * We must select an ino not used/referred by any existing inode, or
3000          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3001          * this may cause 'lost+found' dir has wrong nlinks.
3002          */
3003         cache = last_cache_extent(inode_cache);
3004         if (cache) {
3005                 node = container_of(cache, struct ptr_node, cache);
3006                 rec = node->data;
3007                 if (rec->ino > root->highest_inode)
3008                         root->highest_inode = rec->ino;
3009         }
3010
3011         /*
3012          * We need to repair backrefs first because we could change some of the
3013          * errors in the inode recs.
3014          *
3015          * We also need to go through and delete invalid backrefs first and then
3016          * add the correct ones second.  We do this because we may get EEXIST
3017          * when adding back the correct index because we hadn't yet deleted the
3018          * invalid index.
3019          *
3020          * For example, if we were missing a dir index then the directories
3021          * isize would be wrong, so if we fixed the isize to what we thought it
3022          * would be and then fixed the backref we'd still have a invalid fs, so
3023          * we need to add back the dir index and then check to see if the isize
3024          * is still wrong.
3025          */
3026         while (stage < 3) {
3027                 stage++;
3028                 if (stage == 3 && !err)
3029                         break;
3030
3031                 cache = search_cache_extent(inode_cache, 0);
3032                 while (repair && cache) {
3033                         node = container_of(cache, struct ptr_node, cache);
3034                         rec = node->data;
3035                         cache = next_cache_extent(cache);
3036
3037                         /* Need to free everything up and rescan */
3038                         if (stage == 3) {
3039                                 remove_cache_extent(inode_cache, &node->cache);
3040                                 free(node);
3041                                 free_inode_rec(rec);
3042                                 continue;
3043                         }
3044
3045                         if (list_empty(&rec->backrefs))
3046                                 continue;
3047
3048                         ret = repair_inode_backrefs(root, rec, inode_cache,
3049                                                     stage == 1);
3050                         if (ret < 0) {
3051                                 err = ret;
3052                                 stage = 2;
3053                                 break;
3054                         } if (ret > 0) {
3055                                 err = -EAGAIN;
3056                         }
3057                 }
3058         }
3059         if (err)
3060                 return err;
3061
3062         rec = get_inode_rec(inode_cache, root_dirid, 0);
3063         BUG_ON(IS_ERR(rec));
3064         if (rec) {
3065                 ret = check_root_dir(rec);
3066                 if (ret) {
3067                         fprintf(stderr, "root %llu root dir %llu error\n",
3068                                 (unsigned long long)root->root_key.objectid,
3069                                 (unsigned long long)root_dirid);
3070                         print_inode_error(root, rec);
3071                         error++;
3072                 }
3073         } else {
3074                 if (repair) {
3075                         struct btrfs_trans_handle *trans;
3076
3077                         trans = btrfs_start_transaction(root, 1);
3078                         if (IS_ERR(trans)) {
3079                                 err = PTR_ERR(trans);
3080                                 return err;
3081                         }
3082
3083                         fprintf(stderr,
3084                                 "root %llu missing its root dir, recreating\n",
3085                                 (unsigned long long)root->objectid);
3086
3087                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3088                         BUG_ON(ret);
3089
3090                         btrfs_commit_transaction(trans, root);
3091                         return -EAGAIN;
3092                 }
3093
3094                 fprintf(stderr, "root %llu root dir %llu not found\n",
3095                         (unsigned long long)root->root_key.objectid,
3096                         (unsigned long long)root_dirid);
3097         }
3098
3099         while (1) {
3100                 cache = search_cache_extent(inode_cache, 0);
3101                 if (!cache)
3102                         break;
3103                 node = container_of(cache, struct ptr_node, cache);
3104                 rec = node->data;
3105                 remove_cache_extent(inode_cache, &node->cache);
3106                 free(node);
3107                 if (rec->ino == root_dirid ||
3108                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3109                         free_inode_rec(rec);
3110                         continue;
3111                 }
3112
3113                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3114                         ret = check_orphan_item(root, rec->ino);
3115                         if (ret == 0)
3116                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3117                         if (can_free_inode_rec(rec)) {
3118                                 free_inode_rec(rec);
3119                                 continue;
3120                         }
3121                 }
3122
3123                 if (!rec->found_inode_item)
3124                         rec->errors |= I_ERR_NO_INODE_ITEM;
3125                 if (rec->found_link != rec->nlink)
3126                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3127                 if (repair) {
3128                         ret = try_repair_inode(root, rec);
3129                         if (ret == 0 && can_free_inode_rec(rec)) {
3130                                 free_inode_rec(rec);
3131                                 continue;
3132                         }
3133                         ret = 0;
3134                 }
3135
3136                 if (!(repair && ret == 0))
3137                         error++;
3138                 print_inode_error(root, rec);
3139                 list_for_each_entry(backref, &rec->backrefs, list) {
3140                         if (!backref->found_dir_item)
3141                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3142                         if (!backref->found_dir_index)
3143                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3144                         if (!backref->found_inode_ref)
3145                                 backref->errors |= REF_ERR_NO_INODE_REF;
3146                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3147                                 " namelen %u name %s filetype %d errors %x",
3148                                 (unsigned long long)backref->dir,
3149                                 (unsigned long long)backref->index,
3150                                 backref->namelen, backref->name,
3151                                 backref->filetype, backref->errors);
3152                         print_ref_error(backref->errors);
3153                 }
3154                 free_inode_rec(rec);
3155         }
3156         return (error > 0) ? -1 : 0;
3157 }
3158
3159 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3160                                         u64 objectid)
3161 {
3162         struct cache_extent *cache;
3163         struct root_record *rec = NULL;
3164         int ret;
3165
3166         cache = lookup_cache_extent(root_cache, objectid, 1);
3167         if (cache) {
3168                 rec = container_of(cache, struct root_record, cache);
3169         } else {
3170                 rec = calloc(1, sizeof(*rec));
3171                 if (!rec)
3172                         return ERR_PTR(-ENOMEM);
3173                 rec->objectid = objectid;
3174                 INIT_LIST_HEAD(&rec->backrefs);
3175                 rec->cache.start = objectid;
3176                 rec->cache.size = 1;
3177
3178                 ret = insert_cache_extent(root_cache, &rec->cache);
3179                 if (ret)
3180                         return ERR_PTR(-EEXIST);
3181         }
3182         return rec;
3183 }
3184
3185 static struct root_backref *get_root_backref(struct root_record *rec,
3186                                              u64 ref_root, u64 dir, u64 index,
3187                                              const char *name, int namelen)
3188 {
3189         struct root_backref *backref;
3190
3191         list_for_each_entry(backref, &rec->backrefs, list) {
3192                 if (backref->ref_root != ref_root || backref->dir != dir ||
3193                     backref->namelen != namelen)
3194                         continue;
3195                 if (memcmp(name, backref->name, namelen))
3196                         continue;
3197                 return backref;
3198         }
3199
3200         backref = calloc(1, sizeof(*backref) + namelen + 1);
3201         if (!backref)
3202                 return NULL;
3203         backref->ref_root = ref_root;
3204         backref->dir = dir;
3205         backref->index = index;
3206         backref->namelen = namelen;
3207         memcpy(backref->name, name, namelen);
3208         backref->name[namelen] = '\0';
3209         list_add_tail(&backref->list, &rec->backrefs);
3210         return backref;
3211 }
3212
3213 static void free_root_record(struct cache_extent *cache)
3214 {
3215         struct root_record *rec;
3216         struct root_backref *backref;
3217
3218         rec = container_of(cache, struct root_record, cache);
3219         while (!list_empty(&rec->backrefs)) {
3220                 backref = to_root_backref(rec->backrefs.next);
3221                 list_del(&backref->list);
3222                 free(backref);
3223         }
3224
3225         free(rec);
3226 }
3227
3228 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3229
3230 static int add_root_backref(struct cache_tree *root_cache,
3231                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3232                             const char *name, int namelen,
3233                             int item_type, int errors)
3234 {
3235         struct root_record *rec;
3236         struct root_backref *backref;
3237
3238         rec = get_root_rec(root_cache, root_id);
3239         BUG_ON(IS_ERR(rec));
3240         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3241         BUG_ON(!backref);
3242
3243         backref->errors |= errors;
3244
3245         if (item_type != BTRFS_DIR_ITEM_KEY) {
3246                 if (backref->found_dir_index || backref->found_back_ref ||
3247                     backref->found_forward_ref) {
3248                         if (backref->index != index)
3249                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3250                 } else {
3251                         backref->index = index;
3252                 }
3253         }
3254
3255         if (item_type == BTRFS_DIR_ITEM_KEY) {
3256                 if (backref->found_forward_ref)
3257                         rec->found_ref++;
3258                 backref->found_dir_item = 1;
3259         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3260                 backref->found_dir_index = 1;
3261         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3262                 if (backref->found_forward_ref)
3263                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3264                 else if (backref->found_dir_item)
3265                         rec->found_ref++;
3266                 backref->found_forward_ref = 1;
3267         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3268                 if (backref->found_back_ref)
3269                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3270                 backref->found_back_ref = 1;
3271         } else {
3272                 BUG_ON(1);
3273         }
3274
3275         if (backref->found_forward_ref && backref->found_dir_item)
3276                 backref->reachable = 1;
3277         return 0;
3278 }
3279
3280 static int merge_root_recs(struct btrfs_root *root,
3281                            struct cache_tree *src_cache,
3282                            struct cache_tree *dst_cache)
3283 {
3284         struct cache_extent *cache;
3285         struct ptr_node *node;
3286         struct inode_record *rec;
3287         struct inode_backref *backref;
3288         int ret = 0;
3289
3290         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3291                 free_inode_recs_tree(src_cache);
3292                 return 0;
3293         }
3294
3295         while (1) {
3296                 cache = search_cache_extent(src_cache, 0);
3297                 if (!cache)
3298                         break;
3299                 node = container_of(cache, struct ptr_node, cache);
3300                 rec = node->data;
3301                 remove_cache_extent(src_cache, &node->cache);
3302                 free(node);
3303
3304                 ret = is_child_root(root, root->objectid, rec->ino);
3305                 if (ret < 0)
3306                         break;
3307                 else if (ret == 0)
3308                         goto skip;
3309
3310                 list_for_each_entry(backref, &rec->backrefs, list) {
3311                         BUG_ON(backref->found_inode_ref);
3312                         if (backref->found_dir_item)
3313                                 add_root_backref(dst_cache, rec->ino,
3314                                         root->root_key.objectid, backref->dir,
3315                                         backref->index, backref->name,
3316                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3317                                         backref->errors);
3318                         if (backref->found_dir_index)
3319                                 add_root_backref(dst_cache, rec->ino,
3320                                         root->root_key.objectid, backref->dir,
3321                                         backref->index, backref->name,
3322                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3323                                         backref->errors);
3324                 }
3325 skip:
3326                 free_inode_rec(rec);
3327         }
3328         if (ret < 0)
3329                 return ret;
3330         return 0;
3331 }
3332
3333 static int check_root_refs(struct btrfs_root *root,
3334                            struct cache_tree *root_cache)
3335 {
3336         struct root_record *rec;
3337         struct root_record *ref_root;
3338         struct root_backref *backref;
3339         struct cache_extent *cache;
3340         int loop = 1;
3341         int ret;
3342         int error;
3343         int errors = 0;
3344
3345         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3346         BUG_ON(IS_ERR(rec));
3347         rec->found_ref = 1;
3348
3349         /* fixme: this can not detect circular references */
3350         while (loop) {
3351                 loop = 0;
3352                 cache = search_cache_extent(root_cache, 0);
3353                 while (1) {
3354                         if (!cache)
3355                                 break;
3356                         rec = container_of(cache, struct root_record, cache);
3357                         cache = next_cache_extent(cache);
3358
3359                         if (rec->found_ref == 0)
3360                                 continue;
3361
3362                         list_for_each_entry(backref, &rec->backrefs, list) {
3363                                 if (!backref->reachable)
3364                                         continue;
3365
3366                                 ref_root = get_root_rec(root_cache,
3367                                                         backref->ref_root);
3368                                 BUG_ON(IS_ERR(ref_root));
3369                                 if (ref_root->found_ref > 0)
3370                                         continue;
3371
3372                                 backref->reachable = 0;
3373                                 rec->found_ref--;
3374                                 if (rec->found_ref == 0)
3375                                         loop = 1;
3376                         }
3377                 }
3378         }
3379
3380         cache = search_cache_extent(root_cache, 0);
3381         while (1) {
3382                 if (!cache)
3383                         break;
3384                 rec = container_of(cache, struct root_record, cache);
3385                 cache = next_cache_extent(cache);
3386
3387                 if (rec->found_ref == 0 &&
3388                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3389                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3390                         ret = check_orphan_item(root->fs_info->tree_root,
3391                                                 rec->objectid);
3392                         if (ret == 0)
3393                                 continue;
3394
3395                         /*
3396                          * If we don't have a root item then we likely just have
3397                          * a dir item in a snapshot for this root but no actual
3398                          * ref key or anything so it's meaningless.
3399                          */
3400                         if (!rec->found_root_item)
3401                                 continue;
3402                         errors++;
3403                         fprintf(stderr, "fs tree %llu not referenced\n",
3404                                 (unsigned long long)rec->objectid);
3405                 }
3406
3407                 error = 0;
3408                 if (rec->found_ref > 0 && !rec->found_root_item)
3409                         error = 1;
3410                 list_for_each_entry(backref, &rec->backrefs, list) {
3411                         if (!backref->found_dir_item)
3412                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3413                         if (!backref->found_dir_index)
3414                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3415                         if (!backref->found_back_ref)
3416                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3417                         if (!backref->found_forward_ref)
3418                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3419                         if (backref->reachable && backref->errors)
3420                                 error = 1;
3421                 }
3422                 if (!error)
3423                         continue;
3424
3425                 errors++;
3426                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3427                         (unsigned long long)rec->objectid, rec->found_ref,
3428                          rec->found_root_item ? "" : "not found");
3429
3430                 list_for_each_entry(backref, &rec->backrefs, list) {
3431                         if (!backref->reachable)
3432                                 continue;
3433                         if (!backref->errors && rec->found_root_item)
3434                                 continue;
3435                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3436                                 " index %llu namelen %u name %s errors %x\n",
3437                                 (unsigned long long)backref->ref_root,
3438                                 (unsigned long long)backref->dir,
3439                                 (unsigned long long)backref->index,
3440                                 backref->namelen, backref->name,
3441                                 backref->errors);
3442                         print_ref_error(backref->errors);
3443                 }
3444         }
3445         return errors > 0 ? 1 : 0;
3446 }
3447
3448 static int process_root_ref(struct extent_buffer *eb, int slot,
3449                             struct btrfs_key *key,
3450                             struct cache_tree *root_cache)
3451 {
3452         u64 dirid;
3453         u64 index;
3454         u32 len;
3455         u32 name_len;
3456         struct btrfs_root_ref *ref;
3457         char namebuf[BTRFS_NAME_LEN];
3458         int error;
3459
3460         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3461
3462         dirid = btrfs_root_ref_dirid(eb, ref);
3463         index = btrfs_root_ref_sequence(eb, ref);
3464         name_len = btrfs_root_ref_name_len(eb, ref);
3465
3466         if (name_len <= BTRFS_NAME_LEN) {
3467                 len = name_len;
3468                 error = 0;
3469         } else {
3470                 len = BTRFS_NAME_LEN;
3471                 error = REF_ERR_NAME_TOO_LONG;
3472         }
3473         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3474
3475         if (key->type == BTRFS_ROOT_REF_KEY) {
3476                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3477                                  index, namebuf, len, key->type, error);
3478         } else {
3479                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3480                                  index, namebuf, len, key->type, error);
3481         }
3482         return 0;
3483 }
3484
3485 static void free_corrupt_block(struct cache_extent *cache)
3486 {
3487         struct btrfs_corrupt_block *corrupt;
3488
3489         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3490         free(corrupt);
3491 }
3492
3493 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3494
3495 /*
3496  * Repair the btree of the given root.
3497  *
3498  * The fix is to remove the node key in corrupt_blocks cache_tree.
3499  * and rebalance the tree.
3500  * After the fix, the btree should be writeable.
3501  */
3502 static int repair_btree(struct btrfs_root *root,
3503                         struct cache_tree *corrupt_blocks)
3504 {
3505         struct btrfs_trans_handle *trans;
3506         struct btrfs_path *path;
3507         struct btrfs_corrupt_block *corrupt;
3508         struct cache_extent *cache;
3509         struct btrfs_key key;
3510         u64 offset;
3511         int level;
3512         int ret = 0;
3513
3514         if (cache_tree_empty(corrupt_blocks))
3515                 return 0;
3516
3517         path = btrfs_alloc_path();
3518         if (!path)
3519                 return -ENOMEM;
3520
3521         trans = btrfs_start_transaction(root, 1);
3522         if (IS_ERR(trans)) {
3523                 ret = PTR_ERR(trans);
3524                 fprintf(stderr, "Error starting transaction: %s\n",
3525                         strerror(-ret));
3526                 goto out_free_path;
3527         }
3528         cache = first_cache_extent(corrupt_blocks);
3529         while (cache) {
3530                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3531                                        cache);
3532                 level = corrupt->level;
3533                 path->lowest_level = level;
3534                 key.objectid = corrupt->key.objectid;
3535                 key.type = corrupt->key.type;
3536                 key.offset = corrupt->key.offset;
3537
3538                 /*
3539                  * Here we don't want to do any tree balance, since it may
3540                  * cause a balance with corrupted brother leaf/node,
3541                  * so ins_len set to 0 here.
3542                  * Balance will be done after all corrupt node/leaf is deleted.
3543                  */
3544                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3545                 if (ret < 0)
3546                         goto out;
3547                 offset = btrfs_node_blockptr(path->nodes[level],
3548                                              path->slots[level]);
3549
3550                 /* Remove the ptr */
3551                 ret = btrfs_del_ptr(trans, root, path, level,
3552                                     path->slots[level]);
3553                 if (ret < 0)
3554                         goto out;
3555                 /*
3556                  * Remove the corresponding extent
3557                  * return value is not concerned.
3558                  */
3559                 btrfs_release_path(path);
3560                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3561                                         0, root->root_key.objectid,
3562                                         level - 1, 0);
3563                 cache = next_cache_extent(cache);
3564         }
3565
3566         /* Balance the btree using btrfs_search_slot() */
3567         cache = first_cache_extent(corrupt_blocks);
3568         while (cache) {
3569                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3570                                        cache);
3571                 memcpy(&key, &corrupt->key, sizeof(key));
3572                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3573                 if (ret < 0)
3574                         goto out;
3575                 /* return will always >0 since it won't find the item */
3576                 ret = 0;
3577                 btrfs_release_path(path);
3578                 cache = next_cache_extent(cache);
3579         }
3580 out:
3581         btrfs_commit_transaction(trans, root);
3582 out_free_path:
3583         btrfs_free_path(path);
3584         return ret;
3585 }
3586
3587 static int check_fs_root(struct btrfs_root *root,
3588                          struct cache_tree *root_cache,
3589                          struct walk_control *wc)
3590 {
3591         int ret = 0;
3592         int err = 0;
3593         int wret;
3594         int level;
3595         struct btrfs_path path;
3596         struct shared_node root_node;
3597         struct root_record *rec;
3598         struct btrfs_root_item *root_item = &root->root_item;
3599         struct cache_tree corrupt_blocks;
3600         struct orphan_data_extent *orphan;
3601         struct orphan_data_extent *tmp;
3602         enum btrfs_tree_block_status status;
3603         struct node_refs nrefs;
3604
3605         /*
3606          * Reuse the corrupt_block cache tree to record corrupted tree block
3607          *
3608          * Unlike the usage in extent tree check, here we do it in a per
3609          * fs/subvol tree base.
3610          */
3611         cache_tree_init(&corrupt_blocks);
3612         root->fs_info->corrupt_blocks = &corrupt_blocks;
3613
3614         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3615                 rec = get_root_rec(root_cache, root->root_key.objectid);
3616                 BUG_ON(IS_ERR(rec));
3617                 if (btrfs_root_refs(root_item) > 0)
3618                         rec->found_root_item = 1;
3619         }
3620
3621         btrfs_init_path(&path);
3622         memset(&root_node, 0, sizeof(root_node));
3623         cache_tree_init(&root_node.root_cache);
3624         cache_tree_init(&root_node.inode_cache);
3625         memset(&nrefs, 0, sizeof(nrefs));
3626
3627         /* Move the orphan extent record to corresponding inode_record */
3628         list_for_each_entry_safe(orphan, tmp,
3629                                  &root->orphan_data_extents, list) {
3630                 struct inode_record *inode;
3631
3632                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3633                                       1);
3634                 BUG_ON(IS_ERR(inode));
3635                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3636                 list_move(&orphan->list, &inode->orphan_extents);
3637         }
3638
3639         level = btrfs_header_level(root->node);
3640         memset(wc->nodes, 0, sizeof(wc->nodes));
3641         wc->nodes[level] = &root_node;
3642         wc->active_node = level;
3643         wc->root_level = level;
3644
3645         /* We may not have checked the root block, lets do that now */
3646         if (btrfs_is_leaf(root->node))
3647                 status = btrfs_check_leaf(root, NULL, root->node);
3648         else
3649                 status = btrfs_check_node(root, NULL, root->node);
3650         if (status != BTRFS_TREE_BLOCK_CLEAN)
3651                 return -EIO;
3652
3653         if (btrfs_root_refs(root_item) > 0 ||
3654             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3655                 path.nodes[level] = root->node;
3656                 extent_buffer_get(root->node);
3657                 path.slots[level] = 0;
3658         } else {
3659                 struct btrfs_key key;
3660                 struct btrfs_disk_key found_key;
3661
3662                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3663                 level = root_item->drop_level;
3664                 path.lowest_level = level;
3665                 if (level > btrfs_header_level(root->node) ||
3666                     level >= BTRFS_MAX_LEVEL) {
3667                         error("ignoring invalid drop level: %u", level);
3668                         goto skip_walking;
3669                 }
3670                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3671                 if (wret < 0)
3672                         goto skip_walking;
3673                 btrfs_node_key(path.nodes[level], &found_key,
3674                                 path.slots[level]);
3675                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3676                                         sizeof(found_key)));
3677         }
3678
3679         while (1) {
3680                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3681                 if (wret < 0)
3682                         ret = wret;
3683                 if (wret != 0)
3684                         break;
3685
3686                 wret = walk_up_tree(root, &path, wc, &level);
3687                 if (wret < 0)
3688                         ret = wret;
3689                 if (wret != 0)
3690                         break;
3691         }
3692 skip_walking:
3693         btrfs_release_path(&path);
3694
3695         if (!cache_tree_empty(&corrupt_blocks)) {
3696                 struct cache_extent *cache;
3697                 struct btrfs_corrupt_block *corrupt;
3698
3699                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3700                        root->root_key.objectid);
3701                 cache = first_cache_extent(&corrupt_blocks);
3702                 while (cache) {
3703                         corrupt = container_of(cache,
3704                                                struct btrfs_corrupt_block,
3705                                                cache);
3706                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3707                                cache->start, corrupt->level,
3708                                corrupt->key.objectid, corrupt->key.type,
3709                                corrupt->key.offset);
3710                         cache = next_cache_extent(cache);
3711                 }
3712                 if (repair) {
3713                         printf("Try to repair the btree for root %llu\n",
3714                                root->root_key.objectid);
3715                         ret = repair_btree(root, &corrupt_blocks);
3716                         if (ret < 0)
3717                                 fprintf(stderr, "Failed to repair btree: %s\n",
3718                                         strerror(-ret));
3719                         if (!ret)
3720                                 printf("Btree for root %llu is fixed\n",
3721                                        root->root_key.objectid);
3722                 }
3723         }
3724
3725         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3726         if (err < 0)
3727                 ret = err;
3728
3729         if (root_node.current) {
3730                 root_node.current->checked = 1;
3731                 maybe_free_inode_rec(&root_node.inode_cache,
3732                                 root_node.current);
3733         }
3734
3735         err = check_inode_recs(root, &root_node.inode_cache);
3736         if (!ret)
3737                 ret = err;
3738
3739         free_corrupt_blocks_tree(&corrupt_blocks);
3740         root->fs_info->corrupt_blocks = NULL;
3741         free_orphan_data_extents(&root->orphan_data_extents);
3742         return ret;
3743 }
3744
3745 static int fs_root_objectid(u64 objectid)
3746 {
3747         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3748             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3749                 return 1;
3750         return is_fstree(objectid);
3751 }
3752
3753 static int check_fs_roots(struct btrfs_root *root,
3754                           struct cache_tree *root_cache)
3755 {
3756         struct btrfs_path path;
3757         struct btrfs_key key;
3758         struct walk_control wc;
3759         struct extent_buffer *leaf, *tree_node;
3760         struct btrfs_root *tmp_root;
3761         struct btrfs_root *tree_root = root->fs_info->tree_root;
3762         int ret;
3763         int err = 0;
3764
3765         if (ctx.progress_enabled) {
3766                 ctx.tp = TASK_FS_ROOTS;
3767                 task_start(ctx.info);
3768         }
3769
3770         /*
3771          * Just in case we made any changes to the extent tree that weren't
3772          * reflected into the free space cache yet.
3773          */
3774         if (repair)
3775                 reset_cached_block_groups(root->fs_info);
3776         memset(&wc, 0, sizeof(wc));
3777         cache_tree_init(&wc.shared);
3778         btrfs_init_path(&path);
3779
3780 again:
3781         key.offset = 0;
3782         key.objectid = 0;
3783         key.type = BTRFS_ROOT_ITEM_KEY;
3784         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3785         if (ret < 0) {
3786                 err = 1;
3787                 goto out;
3788         }
3789         tree_node = tree_root->node;
3790         while (1) {
3791                 if (tree_node != tree_root->node) {
3792                         free_root_recs_tree(root_cache);
3793                         btrfs_release_path(&path);
3794                         goto again;
3795                 }
3796                 leaf = path.nodes[0];
3797                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3798                         ret = btrfs_next_leaf(tree_root, &path);
3799                         if (ret) {
3800                                 if (ret < 0)
3801                                         err = 1;
3802                                 break;
3803                         }
3804                         leaf = path.nodes[0];
3805                 }
3806                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3807                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3808                     fs_root_objectid(key.objectid)) {
3809                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3810                                 tmp_root = btrfs_read_fs_root_no_cache(
3811                                                 root->fs_info, &key);
3812                         } else {
3813                                 key.offset = (u64)-1;
3814                                 tmp_root = btrfs_read_fs_root(
3815                                                 root->fs_info, &key);
3816                         }
3817                         if (IS_ERR(tmp_root)) {
3818                                 err = 1;
3819                                 goto next;
3820                         }
3821                         ret = check_fs_root(tmp_root, root_cache, &wc);
3822                         if (ret == -EAGAIN) {
3823                                 free_root_recs_tree(root_cache);
3824                                 btrfs_release_path(&path);
3825                                 goto again;
3826                         }
3827                         if (ret)
3828                                 err = 1;
3829                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3830                                 btrfs_free_fs_root(tmp_root);
3831                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3832                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3833                         process_root_ref(leaf, path.slots[0], &key,
3834                                          root_cache);
3835                 }
3836 next:
3837                 path.slots[0]++;
3838         }
3839 out:
3840         btrfs_release_path(&path);
3841         if (err)
3842                 free_extent_cache_tree(&wc.shared);
3843         if (!cache_tree_empty(&wc.shared))
3844                 fprintf(stderr, "warning line %d\n", __LINE__);
3845
3846         task_stop(ctx.info);
3847
3848         return err;
3849 }
3850
3851 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3852 {
3853         struct list_head *cur = rec->backrefs.next;
3854         struct extent_backref *back;
3855         struct tree_backref *tback;
3856         struct data_backref *dback;
3857         u64 found = 0;
3858         int err = 0;
3859
3860         while(cur != &rec->backrefs) {
3861                 back = to_extent_backref(cur);
3862                 cur = cur->next;
3863                 if (!back->found_extent_tree) {
3864                         err = 1;
3865                         if (!print_errs)
3866                                 goto out;
3867                         if (back->is_data) {
3868                                 dback = to_data_backref(back);
3869                                 fprintf(stderr, "Backref %llu %s %llu"
3870                                         " owner %llu offset %llu num_refs %lu"
3871                                         " not found in extent tree\n",
3872                                         (unsigned long long)rec->start,
3873                                         back->full_backref ?
3874                                         "parent" : "root",
3875                                         back->full_backref ?
3876                                         (unsigned long long)dback->parent:
3877                                         (unsigned long long)dback->root,
3878                                         (unsigned long long)dback->owner,
3879                                         (unsigned long long)dback->offset,
3880                                         (unsigned long)dback->num_refs);
3881                         } else {
3882                                 tback = to_tree_backref(back);
3883                                 fprintf(stderr, "Backref %llu parent %llu"
3884                                         " root %llu not found in extent tree\n",
3885                                         (unsigned long long)rec->start,
3886                                         (unsigned long long)tback->parent,
3887                                         (unsigned long long)tback->root);
3888                         }
3889                 }
3890                 if (!back->is_data && !back->found_ref) {
3891                         err = 1;
3892                         if (!print_errs)
3893                                 goto out;
3894                         tback = to_tree_backref(back);
3895                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3896                                 (unsigned long long)rec->start,
3897                                 back->full_backref ? "parent" : "root",
3898                                 back->full_backref ?
3899                                 (unsigned long long)tback->parent :
3900                                 (unsigned long long)tback->root, back);
3901                 }
3902                 if (back->is_data) {
3903                         dback = to_data_backref(back);
3904                         if (dback->found_ref != dback->num_refs) {
3905                                 err = 1;
3906                                 if (!print_errs)
3907                                         goto out;
3908                                 fprintf(stderr, "Incorrect local backref count"
3909                                         " on %llu %s %llu owner %llu"
3910                                         " offset %llu found %u wanted %u back %p\n",
3911                                         (unsigned long long)rec->start,
3912                                         back->full_backref ?
3913                                         "parent" : "root",
3914                                         back->full_backref ?
3915                                         (unsigned long long)dback->parent:
3916                                         (unsigned long long)dback->root,
3917                                         (unsigned long long)dback->owner,
3918                                         (unsigned long long)dback->offset,
3919                                         dback->found_ref, dback->num_refs, back);
3920                         }
3921                         if (dback->disk_bytenr != rec->start) {
3922                                 err = 1;
3923                                 if (!print_errs)
3924                                         goto out;
3925                                 fprintf(stderr, "Backref disk bytenr does not"
3926                                         " match extent record, bytenr=%llu, "
3927                                         "ref bytenr=%llu\n",
3928                                         (unsigned long long)rec->start,
3929                                         (unsigned long long)dback->disk_bytenr);
3930                         }
3931
3932                         if (dback->bytes != rec->nr) {
3933                                 err = 1;
3934                                 if (!print_errs)
3935                                         goto out;
3936                                 fprintf(stderr, "Backref bytes do not match "
3937                                         "extent backref, bytenr=%llu, ref "
3938                                         "bytes=%llu, backref bytes=%llu\n",
3939                                         (unsigned long long)rec->start,
3940                                         (unsigned long long)rec->nr,
3941                                         (unsigned long long)dback->bytes);
3942                         }
3943                 }
3944                 if (!back->is_data) {
3945                         found += 1;
3946                 } else {
3947                         dback = to_data_backref(back);
3948                         found += dback->found_ref;
3949                 }
3950         }
3951         if (found != rec->refs) {
3952                 err = 1;
3953                 if (!print_errs)
3954                         goto out;
3955                 fprintf(stderr, "Incorrect global backref count "
3956                         "on %llu found %llu wanted %llu\n",
3957                         (unsigned long long)rec->start,
3958                         (unsigned long long)found,
3959                         (unsigned long long)rec->refs);
3960         }
3961 out:
3962         return err;
3963 }
3964
3965 static int free_all_extent_backrefs(struct extent_record *rec)
3966 {
3967         struct extent_backref *back;
3968         struct list_head *cur;
3969         while (!list_empty(&rec->backrefs)) {
3970                 cur = rec->backrefs.next;
3971                 back = to_extent_backref(cur);
3972                 list_del(cur);
3973                 free(back);
3974         }
3975         return 0;
3976 }
3977
3978 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3979                                      struct cache_tree *extent_cache)
3980 {
3981         struct cache_extent *cache;
3982         struct extent_record *rec;
3983
3984         while (1) {
3985                 cache = first_cache_extent(extent_cache);
3986                 if (!cache)
3987                         break;
3988                 rec = container_of(cache, struct extent_record, cache);
3989                 remove_cache_extent(extent_cache, cache);
3990                 free_all_extent_backrefs(rec);
3991                 free(rec);
3992         }
3993 }
3994
3995 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3996                                  struct extent_record *rec)
3997 {
3998         if (rec->content_checked && rec->owner_ref_checked &&
3999             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4000             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4001             !rec->bad_full_backref && !rec->crossing_stripes &&
4002             !rec->wrong_chunk_type) {
4003                 remove_cache_extent(extent_cache, &rec->cache);
4004                 free_all_extent_backrefs(rec);
4005                 list_del_init(&rec->list);
4006                 free(rec);
4007         }
4008         return 0;
4009 }
4010
4011 static int check_owner_ref(struct btrfs_root *root,
4012                             struct extent_record *rec,
4013                             struct extent_buffer *buf)
4014 {
4015         struct extent_backref *node;
4016         struct tree_backref *back;
4017         struct btrfs_root *ref_root;
4018         struct btrfs_key key;
4019         struct btrfs_path path;
4020         struct extent_buffer *parent;
4021         int level;
4022         int found = 0;
4023         int ret;
4024
4025         list_for_each_entry(node, &rec->backrefs, list) {
4026                 if (node->is_data)
4027                         continue;
4028                 if (!node->found_ref)
4029                         continue;
4030                 if (node->full_backref)
4031                         continue;
4032                 back = to_tree_backref(node);
4033                 if (btrfs_header_owner(buf) == back->root)
4034                         return 0;
4035         }
4036         BUG_ON(rec->is_root);
4037
4038         /* try to find the block by search corresponding fs tree */
4039         key.objectid = btrfs_header_owner(buf);
4040         key.type = BTRFS_ROOT_ITEM_KEY;
4041         key.offset = (u64)-1;
4042
4043         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4044         if (IS_ERR(ref_root))
4045                 return 1;
4046
4047         level = btrfs_header_level(buf);
4048         if (level == 0)
4049                 btrfs_item_key_to_cpu(buf, &key, 0);
4050         else
4051                 btrfs_node_key_to_cpu(buf, &key, 0);
4052
4053         btrfs_init_path(&path);
4054         path.lowest_level = level + 1;
4055         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4056         if (ret < 0)
4057                 return 0;
4058
4059         parent = path.nodes[level + 1];
4060         if (parent && buf->start == btrfs_node_blockptr(parent,
4061                                                         path.slots[level + 1]))
4062                 found = 1;
4063
4064         btrfs_release_path(&path);
4065         return found ? 0 : 1;
4066 }
4067
4068 static int is_extent_tree_record(struct extent_record *rec)
4069 {
4070         struct list_head *cur = rec->backrefs.next;
4071         struct extent_backref *node;
4072         struct tree_backref *back;
4073         int is_extent = 0;
4074
4075         while(cur != &rec->backrefs) {
4076                 node = to_extent_backref(cur);
4077                 cur = cur->next;
4078                 if (node->is_data)
4079                         return 0;
4080                 back = to_tree_backref(node);
4081                 if (node->full_backref)
4082                         return 0;
4083                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4084                         is_extent = 1;
4085         }
4086         return is_extent;
4087 }
4088
4089
4090 static int record_bad_block_io(struct btrfs_fs_info *info,
4091                                struct cache_tree *extent_cache,
4092                                u64 start, u64 len)
4093 {
4094         struct extent_record *rec;
4095         struct cache_extent *cache;
4096         struct btrfs_key key;
4097
4098         cache = lookup_cache_extent(extent_cache, start, len);
4099         if (!cache)
4100                 return 0;
4101
4102         rec = container_of(cache, struct extent_record, cache);
4103         if (!is_extent_tree_record(rec))
4104                 return 0;
4105
4106         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4107         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4108 }
4109
4110 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4111                        struct extent_buffer *buf, int slot)
4112 {
4113         if (btrfs_header_level(buf)) {
4114                 struct btrfs_key_ptr ptr1, ptr2;
4115
4116                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4117                                    sizeof(struct btrfs_key_ptr));
4118                 read_extent_buffer(buf, &ptr2,
4119                                    btrfs_node_key_ptr_offset(slot + 1),
4120                                    sizeof(struct btrfs_key_ptr));
4121                 write_extent_buffer(buf, &ptr1,
4122                                     btrfs_node_key_ptr_offset(slot + 1),
4123                                     sizeof(struct btrfs_key_ptr));
4124                 write_extent_buffer(buf, &ptr2,
4125                                     btrfs_node_key_ptr_offset(slot),
4126                                     sizeof(struct btrfs_key_ptr));
4127                 if (slot == 0) {
4128                         struct btrfs_disk_key key;
4129                         btrfs_node_key(buf, &key, 0);
4130                         btrfs_fixup_low_keys(root, path, &key,
4131                                              btrfs_header_level(buf) + 1);
4132                 }
4133         } else {
4134                 struct btrfs_item *item1, *item2;
4135                 struct btrfs_key k1, k2;
4136                 char *item1_data, *item2_data;
4137                 u32 item1_offset, item2_offset, item1_size, item2_size;
4138
4139                 item1 = btrfs_item_nr(slot);
4140                 item2 = btrfs_item_nr(slot + 1);
4141                 btrfs_item_key_to_cpu(buf, &k1, slot);
4142                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4143                 item1_offset = btrfs_item_offset(buf, item1);
4144                 item2_offset = btrfs_item_offset(buf, item2);
4145                 item1_size = btrfs_item_size(buf, item1);
4146                 item2_size = btrfs_item_size(buf, item2);
4147
4148                 item1_data = malloc(item1_size);
4149                 if (!item1_data)
4150                         return -ENOMEM;
4151                 item2_data = malloc(item2_size);
4152                 if (!item2_data) {
4153                         free(item1_data);
4154                         return -ENOMEM;
4155                 }
4156
4157                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4158                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4159
4160                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4161                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4162                 free(item1_data);
4163                 free(item2_data);
4164
4165                 btrfs_set_item_offset(buf, item1, item2_offset);
4166                 btrfs_set_item_offset(buf, item2, item1_offset);
4167                 btrfs_set_item_size(buf, item1, item2_size);
4168                 btrfs_set_item_size(buf, item2, item1_size);
4169
4170                 path->slots[0] = slot;
4171                 btrfs_set_item_key_unsafe(root, path, &k2);
4172                 path->slots[0] = slot + 1;
4173                 btrfs_set_item_key_unsafe(root, path, &k1);
4174         }
4175         return 0;
4176 }
4177
4178 static int fix_key_order(struct btrfs_trans_handle *trans,
4179                          struct btrfs_root *root,
4180                          struct btrfs_path *path)
4181 {
4182         struct extent_buffer *buf;
4183         struct btrfs_key k1, k2;
4184         int i;
4185         int level = path->lowest_level;
4186         int ret = -EIO;
4187
4188         buf = path->nodes[level];
4189         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4190                 if (level) {
4191                         btrfs_node_key_to_cpu(buf, &k1, i);
4192                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4193                 } else {
4194                         btrfs_item_key_to_cpu(buf, &k1, i);
4195                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4196                 }
4197                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4198                         continue;
4199                 ret = swap_values(root, path, buf, i);
4200                 if (ret)
4201                         break;
4202                 btrfs_mark_buffer_dirty(buf);
4203                 i = 0;
4204         }
4205         return ret;
4206 }
4207
4208 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4209                              struct btrfs_root *root,
4210                              struct btrfs_path *path,
4211                              struct extent_buffer *buf, int slot)
4212 {
4213         struct btrfs_key key;
4214         int nritems = btrfs_header_nritems(buf);
4215
4216         btrfs_item_key_to_cpu(buf, &key, slot);
4217
4218         /* These are all the keys we can deal with missing. */
4219         if (key.type != BTRFS_DIR_INDEX_KEY &&
4220             key.type != BTRFS_EXTENT_ITEM_KEY &&
4221             key.type != BTRFS_METADATA_ITEM_KEY &&
4222             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4223             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4224                 return -1;
4225
4226         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4227                (unsigned long long)key.objectid, key.type,
4228                (unsigned long long)key.offset, slot, buf->start);
4229         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4230                               btrfs_item_nr_offset(slot + 1),
4231                               sizeof(struct btrfs_item) *
4232                               (nritems - slot - 1));
4233         btrfs_set_header_nritems(buf, nritems - 1);
4234         if (slot == 0) {
4235                 struct btrfs_disk_key disk_key;
4236
4237                 btrfs_item_key(buf, &disk_key, 0);
4238                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4239         }
4240         btrfs_mark_buffer_dirty(buf);
4241         return 0;
4242 }
4243
4244 static int fix_item_offset(struct btrfs_trans_handle *trans,
4245                            struct btrfs_root *root,
4246                            struct btrfs_path *path)
4247 {
4248         struct extent_buffer *buf;
4249         int i;
4250         int ret = 0;
4251
4252         /* We should only get this for leaves */
4253         BUG_ON(path->lowest_level);
4254         buf = path->nodes[0];
4255 again:
4256         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4257                 unsigned int shift = 0, offset;
4258
4259                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4260                     BTRFS_LEAF_DATA_SIZE(root)) {
4261                         if (btrfs_item_end_nr(buf, i) >
4262                             BTRFS_LEAF_DATA_SIZE(root)) {
4263                                 ret = delete_bogus_item(trans, root, path,
4264                                                         buf, i);
4265                                 if (!ret)
4266                                         goto again;
4267                                 fprintf(stderr, "item is off the end of the "
4268                                         "leaf, can't fix\n");
4269                                 ret = -EIO;
4270                                 break;
4271                         }
4272                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4273                                 btrfs_item_end_nr(buf, i);
4274                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4275                            btrfs_item_offset_nr(buf, i - 1)) {
4276                         if (btrfs_item_end_nr(buf, i) >
4277                             btrfs_item_offset_nr(buf, i - 1)) {
4278                                 ret = delete_bogus_item(trans, root, path,
4279                                                         buf, i);
4280                                 if (!ret)
4281                                         goto again;
4282                                 fprintf(stderr, "items overlap, can't fix\n");
4283                                 ret = -EIO;
4284                                 break;
4285                         }
4286                         shift = btrfs_item_offset_nr(buf, i - 1) -
4287                                 btrfs_item_end_nr(buf, i);
4288                 }
4289                 if (!shift)
4290                         continue;
4291
4292                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4293                        i, shift, (unsigned long long)buf->start);
4294                 offset = btrfs_item_offset_nr(buf, i);
4295                 memmove_extent_buffer(buf,
4296                                       btrfs_leaf_data(buf) + offset + shift,
4297                                       btrfs_leaf_data(buf) + offset,
4298                                       btrfs_item_size_nr(buf, i));
4299                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4300                                       offset + shift);
4301                 btrfs_mark_buffer_dirty(buf);
4302         }
4303
4304         /*
4305          * We may have moved things, in which case we want to exit so we don't
4306          * write those changes out.  Once we have proper abort functionality in
4307          * progs this can be changed to something nicer.
4308          */
4309         BUG_ON(ret);
4310         return ret;
4311 }
4312
4313 /*
4314  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4315  * then just return -EIO.
4316  */
4317 static int try_to_fix_bad_block(struct btrfs_root *root,
4318                                 struct extent_buffer *buf,
4319                                 enum btrfs_tree_block_status status)
4320 {
4321         struct btrfs_trans_handle *trans;
4322         struct ulist *roots;
4323         struct ulist_node *node;
4324         struct btrfs_root *search_root;
4325         struct btrfs_path *path;
4326         struct ulist_iterator iter;
4327         struct btrfs_key root_key, key;
4328         int ret;
4329
4330         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4331             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4332                 return -EIO;
4333
4334         path = btrfs_alloc_path();
4335         if (!path)
4336                 return -EIO;
4337
4338         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4339                                    0, &roots);
4340         if (ret) {
4341                 btrfs_free_path(path);
4342                 return -EIO;
4343         }
4344
4345         ULIST_ITER_INIT(&iter);
4346         while ((node = ulist_next(roots, &iter))) {
4347                 root_key.objectid = node->val;
4348                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4349                 root_key.offset = (u64)-1;
4350
4351                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4352                 if (IS_ERR(root)) {
4353                         ret = -EIO;
4354                         break;
4355                 }
4356
4357
4358                 trans = btrfs_start_transaction(search_root, 0);
4359                 if (IS_ERR(trans)) {
4360                         ret = PTR_ERR(trans);
4361                         break;
4362                 }
4363
4364                 path->lowest_level = btrfs_header_level(buf);
4365                 path->skip_check_block = 1;
4366                 if (path->lowest_level)
4367                         btrfs_node_key_to_cpu(buf, &key, 0);
4368                 else
4369                         btrfs_item_key_to_cpu(buf, &key, 0);
4370                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4371                 if (ret) {
4372                         ret = -EIO;
4373                         btrfs_commit_transaction(trans, search_root);
4374                         break;
4375                 }
4376                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4377                         ret = fix_key_order(trans, search_root, path);
4378                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4379                         ret = fix_item_offset(trans, search_root, path);
4380                 if (ret) {
4381                         btrfs_commit_transaction(trans, search_root);
4382                         break;
4383                 }
4384                 btrfs_release_path(path);
4385                 btrfs_commit_transaction(trans, search_root);
4386         }
4387         ulist_free(roots);
4388         btrfs_free_path(path);
4389         return ret;
4390 }
4391
4392 static int check_block(struct btrfs_root *root,
4393                        struct cache_tree *extent_cache,
4394                        struct extent_buffer *buf, u64 flags)
4395 {
4396         struct extent_record *rec;
4397         struct cache_extent *cache;
4398         struct btrfs_key key;
4399         enum btrfs_tree_block_status status;
4400         int ret = 0;
4401         int level;
4402
4403         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4404         if (!cache)
4405                 return 1;
4406         rec = container_of(cache, struct extent_record, cache);
4407         rec->generation = btrfs_header_generation(buf);
4408
4409         level = btrfs_header_level(buf);
4410         if (btrfs_header_nritems(buf) > 0) {
4411
4412                 if (level == 0)
4413                         btrfs_item_key_to_cpu(buf, &key, 0);
4414                 else
4415                         btrfs_node_key_to_cpu(buf, &key, 0);
4416
4417                 rec->info_objectid = key.objectid;
4418         }
4419         rec->info_level = level;
4420
4421         if (btrfs_is_leaf(buf))
4422                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4423         else
4424                 status = btrfs_check_node(root, &rec->parent_key, buf);
4425
4426         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4427                 if (repair)
4428                         status = try_to_fix_bad_block(root, buf, status);
4429                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4430                         ret = -EIO;
4431                         fprintf(stderr, "bad block %llu\n",
4432                                 (unsigned long long)buf->start);
4433                 } else {
4434                         /*
4435                          * Signal to callers we need to start the scan over
4436                          * again since we'll have cowed blocks.
4437                          */
4438                         ret = -EAGAIN;
4439                 }
4440         } else {
4441                 rec->content_checked = 1;
4442                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4443                         rec->owner_ref_checked = 1;
4444                 else {
4445                         ret = check_owner_ref(root, rec, buf);
4446                         if (!ret)
4447                                 rec->owner_ref_checked = 1;
4448                 }
4449         }
4450         if (!ret)
4451                 maybe_free_extent_rec(extent_cache, rec);
4452         return ret;
4453 }
4454
4455 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4456                                                 u64 parent, u64 root)
4457 {
4458         struct list_head *cur = rec->backrefs.next;
4459         struct extent_backref *node;
4460         struct tree_backref *back;
4461
4462         while(cur != &rec->backrefs) {
4463                 node = to_extent_backref(cur);
4464                 cur = cur->next;
4465                 if (node->is_data)
4466                         continue;
4467                 back = to_tree_backref(node);
4468                 if (parent > 0) {
4469                         if (!node->full_backref)
4470                                 continue;
4471                         if (parent == back->parent)
4472                                 return back;
4473                 } else {
4474                         if (node->full_backref)
4475                                 continue;
4476                         if (back->root == root)
4477                                 return back;
4478                 }
4479         }
4480         return NULL;
4481 }
4482
4483 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4484                                                 u64 parent, u64 root)
4485 {
4486         struct tree_backref *ref = malloc(sizeof(*ref));
4487
4488         if (!ref)
4489                 return NULL;
4490         memset(&ref->node, 0, sizeof(ref->node));
4491         if (parent > 0) {
4492                 ref->parent = parent;
4493                 ref->node.full_backref = 1;
4494         } else {
4495                 ref->root = root;
4496                 ref->node.full_backref = 0;
4497         }
4498         list_add_tail(&ref->node.list, &rec->backrefs);
4499
4500         return ref;
4501 }
4502
4503 static struct data_backref *find_data_backref(struct extent_record *rec,
4504                                                 u64 parent, u64 root,
4505                                                 u64 owner, u64 offset,
4506                                                 int found_ref,
4507                                                 u64 disk_bytenr, u64 bytes)
4508 {
4509         struct list_head *cur = rec->backrefs.next;
4510         struct extent_backref *node;
4511         struct data_backref *back;
4512
4513         while(cur != &rec->backrefs) {
4514                 node = to_extent_backref(cur);
4515                 cur = cur->next;
4516                 if (!node->is_data)
4517                         continue;
4518                 back = to_data_backref(node);
4519                 if (parent > 0) {
4520                         if (!node->full_backref)
4521                                 continue;
4522                         if (parent == back->parent)
4523                                 return back;
4524                 } else {
4525                         if (node->full_backref)
4526                                 continue;
4527                         if (back->root == root && back->owner == owner &&
4528                             back->offset == offset) {
4529                                 if (found_ref && node->found_ref &&
4530                                     (back->bytes != bytes ||
4531                                     back->disk_bytenr != disk_bytenr))
4532                                         continue;
4533                                 return back;
4534                         }
4535                 }
4536         }
4537         return NULL;
4538 }
4539
4540 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4541                                                 u64 parent, u64 root,
4542                                                 u64 owner, u64 offset,
4543                                                 u64 max_size)
4544 {
4545         struct data_backref *ref = malloc(sizeof(*ref));
4546
4547         if (!ref)
4548                 return NULL;
4549         memset(&ref->node, 0, sizeof(ref->node));
4550         ref->node.is_data = 1;
4551
4552         if (parent > 0) {
4553                 ref->parent = parent;
4554                 ref->owner = 0;
4555                 ref->offset = 0;
4556                 ref->node.full_backref = 1;
4557         } else {
4558                 ref->root = root;
4559                 ref->owner = owner;
4560                 ref->offset = offset;
4561                 ref->node.full_backref = 0;
4562         }
4563         ref->bytes = max_size;
4564         ref->found_ref = 0;
4565         ref->num_refs = 0;
4566         list_add_tail(&ref->node.list, &rec->backrefs);
4567         if (max_size > rec->max_size)
4568                 rec->max_size = max_size;
4569         return ref;
4570 }
4571
4572 /* Check if the type of extent matches with its chunk */
4573 static void check_extent_type(struct extent_record *rec)
4574 {
4575         struct btrfs_block_group_cache *bg_cache;
4576
4577         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4578         if (!bg_cache)
4579                 return;
4580
4581         /* data extent, check chunk directly*/
4582         if (!rec->metadata) {
4583                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4584                         rec->wrong_chunk_type = 1;
4585                 return;
4586         }
4587
4588         /* metadata extent, check the obvious case first */
4589         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4590                                  BTRFS_BLOCK_GROUP_METADATA))) {
4591                 rec->wrong_chunk_type = 1;
4592                 return;
4593         }
4594
4595         /*
4596          * Check SYSTEM extent, as it's also marked as metadata, we can only
4597          * make sure it's a SYSTEM extent by its backref
4598          */
4599         if (!list_empty(&rec->backrefs)) {
4600                 struct extent_backref *node;
4601                 struct tree_backref *tback;
4602                 u64 bg_type;
4603
4604                 node = to_extent_backref(rec->backrefs.next);
4605                 if (node->is_data) {
4606                         /* tree block shouldn't have data backref */
4607                         rec->wrong_chunk_type = 1;
4608                         return;
4609                 }
4610                 tback = container_of(node, struct tree_backref, node);
4611
4612                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4613                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4614                 else
4615                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4616                 if (!(bg_cache->flags & bg_type))
4617                         rec->wrong_chunk_type = 1;
4618         }
4619 }
4620
4621 /*
4622  * Allocate a new extent record, fill default values from @tmpl and insert int
4623  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4624  * the cache, otherwise it fails.
4625  */
4626 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4627                 struct extent_record *tmpl)
4628 {
4629         struct extent_record *rec;
4630         int ret = 0;
4631
4632         rec = malloc(sizeof(*rec));
4633         if (!rec)
4634                 return -ENOMEM;
4635         rec->start = tmpl->start;
4636         rec->max_size = tmpl->max_size;
4637         rec->nr = max(tmpl->nr, tmpl->max_size);
4638         rec->found_rec = tmpl->found_rec;
4639         rec->content_checked = tmpl->content_checked;
4640         rec->owner_ref_checked = tmpl->owner_ref_checked;
4641         rec->num_duplicates = 0;
4642         rec->metadata = tmpl->metadata;
4643         rec->flag_block_full_backref = FLAG_UNSET;
4644         rec->bad_full_backref = 0;
4645         rec->crossing_stripes = 0;
4646         rec->wrong_chunk_type = 0;
4647         rec->is_root = tmpl->is_root;
4648         rec->refs = tmpl->refs;
4649         rec->extent_item_refs = tmpl->extent_item_refs;
4650         rec->parent_generation = tmpl->parent_generation;
4651         INIT_LIST_HEAD(&rec->backrefs);
4652         INIT_LIST_HEAD(&rec->dups);
4653         INIT_LIST_HEAD(&rec->list);
4654         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4655         rec->cache.start = tmpl->start;
4656         rec->cache.size = tmpl->nr;
4657         ret = insert_cache_extent(extent_cache, &rec->cache);
4658         if (ret) {
4659                 free(rec);
4660                 return ret;
4661         }
4662         bytes_used += rec->nr;
4663
4664         if (tmpl->metadata)
4665                 rec->crossing_stripes = check_crossing_stripes(global_info,
4666                                 rec->start, global_info->tree_root->nodesize);
4667         check_extent_type(rec);
4668         return ret;
4669 }
4670
4671 /*
4672  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4673  * some are hints:
4674  * - refs              - if found, increase refs
4675  * - is_root           - if found, set
4676  * - content_checked   - if found, set
4677  * - owner_ref_checked - if found, set
4678  *
4679  * If not found, create a new one, initialize and insert.
4680  */
4681 static int add_extent_rec(struct cache_tree *extent_cache,
4682                 struct extent_record *tmpl)
4683 {
4684         struct extent_record *rec;
4685         struct cache_extent *cache;
4686         int ret = 0;
4687         int dup = 0;
4688
4689         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4690         if (cache) {
4691                 rec = container_of(cache, struct extent_record, cache);
4692                 if (tmpl->refs)
4693                         rec->refs++;
4694                 if (rec->nr == 1)
4695                         rec->nr = max(tmpl->nr, tmpl->max_size);
4696
4697                 /*
4698                  * We need to make sure to reset nr to whatever the extent
4699                  * record says was the real size, this way we can compare it to
4700                  * the backrefs.
4701                  */
4702                 if (tmpl->found_rec) {
4703                         if (tmpl->start != rec->start || rec->found_rec) {
4704                                 struct extent_record *tmp;
4705
4706                                 dup = 1;
4707                                 if (list_empty(&rec->list))
4708                                         list_add_tail(&rec->list,
4709                                                       &duplicate_extents);
4710
4711                                 /*
4712                                  * We have to do this song and dance in case we
4713                                  * find an extent record that falls inside of
4714                                  * our current extent record but does not have
4715                                  * the same objectid.
4716                                  */
4717                                 tmp = malloc(sizeof(*tmp));
4718                                 if (!tmp)
4719                                         return -ENOMEM;
4720                                 tmp->start = tmpl->start;
4721                                 tmp->max_size = tmpl->max_size;
4722                                 tmp->nr = tmpl->nr;
4723                                 tmp->found_rec = 1;
4724                                 tmp->metadata = tmpl->metadata;
4725                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4726                                 INIT_LIST_HEAD(&tmp->list);
4727                                 list_add_tail(&tmp->list, &rec->dups);
4728                                 rec->num_duplicates++;
4729                         } else {
4730                                 rec->nr = tmpl->nr;
4731                                 rec->found_rec = 1;
4732                         }
4733                 }
4734
4735                 if (tmpl->extent_item_refs && !dup) {
4736                         if (rec->extent_item_refs) {
4737                                 fprintf(stderr, "block %llu rec "
4738                                         "extent_item_refs %llu, passed %llu\n",
4739                                         (unsigned long long)tmpl->start,
4740                                         (unsigned long long)
4741                                                         rec->extent_item_refs,
4742                                         (unsigned long long)tmpl->extent_item_refs);
4743                         }
4744                         rec->extent_item_refs = tmpl->extent_item_refs;
4745                 }
4746                 if (tmpl->is_root)
4747                         rec->is_root = 1;
4748                 if (tmpl->content_checked)
4749                         rec->content_checked = 1;
4750                 if (tmpl->owner_ref_checked)
4751                         rec->owner_ref_checked = 1;
4752                 memcpy(&rec->parent_key, &tmpl->parent_key,
4753                                 sizeof(tmpl->parent_key));
4754                 if (tmpl->parent_generation)
4755                         rec->parent_generation = tmpl->parent_generation;
4756                 if (rec->max_size < tmpl->max_size)
4757                         rec->max_size = tmpl->max_size;
4758
4759                 /*
4760                  * A metadata extent can't cross stripe_len boundary, otherwise
4761                  * kernel scrub won't be able to handle it.
4762                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4763                  * it.
4764                  */
4765                 if (tmpl->metadata)
4766                         rec->crossing_stripes = check_crossing_stripes(
4767                                         global_info, rec->start,
4768                                         global_info->tree_root->nodesize);
4769                 check_extent_type(rec);
4770                 maybe_free_extent_rec(extent_cache, rec);
4771                 return ret;
4772         }
4773
4774         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4775
4776         return ret;
4777 }
4778
4779 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4780                             u64 parent, u64 root, int found_ref)
4781 {
4782         struct extent_record *rec;
4783         struct tree_backref *back;
4784         struct cache_extent *cache;
4785         int ret;
4786
4787         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4788         if (!cache) {
4789                 struct extent_record tmpl;
4790
4791                 memset(&tmpl, 0, sizeof(tmpl));
4792                 tmpl.start = bytenr;
4793                 tmpl.nr = 1;
4794                 tmpl.metadata = 1;
4795
4796                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4797                 if (ret)
4798                         return ret;
4799
4800                 /* really a bug in cache_extent implement now */
4801                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4802                 if (!cache)
4803                         return -ENOENT;
4804         }
4805
4806         rec = container_of(cache, struct extent_record, cache);
4807         if (rec->start != bytenr) {
4808                 /*
4809                  * Several cause, from unaligned bytenr to over lapping extents
4810                  */
4811                 return -EEXIST;
4812         }
4813
4814         back = find_tree_backref(rec, parent, root);
4815         if (!back) {
4816                 back = alloc_tree_backref(rec, parent, root);
4817                 if (!back)
4818                         return -ENOMEM;
4819         }
4820
4821         if (found_ref) {
4822                 if (back->node.found_ref) {
4823                         fprintf(stderr, "Extent back ref already exists "
4824                                 "for %llu parent %llu root %llu \n",
4825                                 (unsigned long long)bytenr,
4826                                 (unsigned long long)parent,
4827                                 (unsigned long long)root);
4828                 }
4829                 back->node.found_ref = 1;
4830         } else {
4831                 if (back->node.found_extent_tree) {
4832                         fprintf(stderr, "Extent back ref already exists "
4833                                 "for %llu parent %llu root %llu \n",
4834                                 (unsigned long long)bytenr,
4835                                 (unsigned long long)parent,
4836                                 (unsigned long long)root);
4837                 }
4838                 back->node.found_extent_tree = 1;
4839         }
4840         check_extent_type(rec);
4841         maybe_free_extent_rec(extent_cache, rec);
4842         return 0;
4843 }
4844
4845 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4846                             u64 parent, u64 root, u64 owner, u64 offset,
4847                             u32 num_refs, int found_ref, u64 max_size)
4848 {
4849         struct extent_record *rec;
4850         struct data_backref *back;
4851         struct cache_extent *cache;
4852         int ret;
4853
4854         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4855         if (!cache) {
4856                 struct extent_record tmpl;
4857
4858                 memset(&tmpl, 0, sizeof(tmpl));
4859                 tmpl.start = bytenr;
4860                 tmpl.nr = 1;
4861                 tmpl.max_size = max_size;
4862
4863                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4864                 if (ret)
4865                         return ret;
4866
4867                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4868                 if (!cache)
4869                         abort();
4870         }
4871
4872         rec = container_of(cache, struct extent_record, cache);
4873         if (rec->max_size < max_size)
4874                 rec->max_size = max_size;
4875
4876         /*
4877          * If found_ref is set then max_size is the real size and must match the
4878          * existing refs.  So if we have already found a ref then we need to
4879          * make sure that this ref matches the existing one, otherwise we need
4880          * to add a new backref so we can notice that the backrefs don't match
4881          * and we need to figure out who is telling the truth.  This is to
4882          * account for that awful fsync bug I introduced where we'd end up with
4883          * a btrfs_file_extent_item that would have its length include multiple
4884          * prealloc extents or point inside of a prealloc extent.
4885          */
4886         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4887                                  bytenr, max_size);
4888         if (!back) {
4889                 back = alloc_data_backref(rec, parent, root, owner, offset,
4890                                           max_size);
4891                 BUG_ON(!back);
4892         }
4893
4894         if (found_ref) {
4895                 BUG_ON(num_refs != 1);
4896                 if (back->node.found_ref)
4897                         BUG_ON(back->bytes != max_size);
4898                 back->node.found_ref = 1;
4899                 back->found_ref += 1;
4900                 back->bytes = max_size;
4901                 back->disk_bytenr = bytenr;
4902                 rec->refs += 1;
4903                 rec->content_checked = 1;
4904                 rec->owner_ref_checked = 1;
4905         } else {
4906                 if (back->node.found_extent_tree) {
4907                         fprintf(stderr, "Extent back ref already exists "
4908                                 "for %llu parent %llu root %llu "
4909                                 "owner %llu offset %llu num_refs %lu\n",
4910                                 (unsigned long long)bytenr,
4911                                 (unsigned long long)parent,
4912                                 (unsigned long long)root,
4913                                 (unsigned long long)owner,
4914                                 (unsigned long long)offset,
4915                                 (unsigned long)num_refs);
4916                 }
4917                 back->num_refs = num_refs;
4918                 back->node.found_extent_tree = 1;
4919         }
4920         maybe_free_extent_rec(extent_cache, rec);
4921         return 0;
4922 }
4923
4924 static int add_pending(struct cache_tree *pending,
4925                        struct cache_tree *seen, u64 bytenr, u32 size)
4926 {
4927         int ret;
4928         ret = add_cache_extent(seen, bytenr, size);
4929         if (ret)
4930                 return ret;
4931         add_cache_extent(pending, bytenr, size);
4932         return 0;
4933 }
4934
4935 static int pick_next_pending(struct cache_tree *pending,
4936                         struct cache_tree *reada,
4937                         struct cache_tree *nodes,
4938                         u64 last, struct block_info *bits, int bits_nr,
4939                         int *reada_bits)
4940 {
4941         unsigned long node_start = last;
4942         struct cache_extent *cache;
4943         int ret;
4944
4945         cache = search_cache_extent(reada, 0);
4946         if (cache) {
4947                 bits[0].start = cache->start;
4948                 bits[0].size = cache->size;
4949                 *reada_bits = 1;
4950                 return 1;
4951         }
4952         *reada_bits = 0;
4953         if (node_start > 32768)
4954                 node_start -= 32768;
4955
4956         cache = search_cache_extent(nodes, node_start);
4957         if (!cache)
4958                 cache = search_cache_extent(nodes, 0);
4959
4960         if (!cache) {
4961                  cache = search_cache_extent(pending, 0);
4962                  if (!cache)
4963                          return 0;
4964                  ret = 0;
4965                  do {
4966                          bits[ret].start = cache->start;
4967                          bits[ret].size = cache->size;
4968                          cache = next_cache_extent(cache);
4969                          ret++;
4970                  } while (cache && ret < bits_nr);
4971                  return ret;
4972         }
4973
4974         ret = 0;
4975         do {
4976                 bits[ret].start = cache->start;
4977                 bits[ret].size = cache->size;
4978                 cache = next_cache_extent(cache);
4979                 ret++;
4980         } while (cache && ret < bits_nr);
4981
4982         if (bits_nr - ret > 8) {
4983                 u64 lookup = bits[0].start + bits[0].size;
4984                 struct cache_extent *next;
4985                 next = search_cache_extent(pending, lookup);
4986                 while(next) {
4987                         if (next->start - lookup > 32768)
4988                                 break;
4989                         bits[ret].start = next->start;
4990                         bits[ret].size = next->size;
4991                         lookup = next->start + next->size;
4992                         ret++;
4993                         if (ret == bits_nr)
4994                                 break;
4995                         next = next_cache_extent(next);
4996                         if (!next)
4997                                 break;
4998                 }
4999         }
5000         return ret;
5001 }
5002
5003 static void free_chunk_record(struct cache_extent *cache)
5004 {
5005         struct chunk_record *rec;
5006
5007         rec = container_of(cache, struct chunk_record, cache);
5008         list_del_init(&rec->list);
5009         list_del_init(&rec->dextents);
5010         free(rec);
5011 }
5012
5013 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5014 {
5015         cache_tree_free_extents(chunk_cache, free_chunk_record);
5016 }
5017
5018 static void free_device_record(struct rb_node *node)
5019 {
5020         struct device_record *rec;
5021
5022         rec = container_of(node, struct device_record, node);
5023         free(rec);
5024 }
5025
5026 FREE_RB_BASED_TREE(device_cache, free_device_record);
5027
5028 int insert_block_group_record(struct block_group_tree *tree,
5029                               struct block_group_record *bg_rec)
5030 {
5031         int ret;
5032
5033         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5034         if (ret)
5035                 return ret;
5036
5037         list_add_tail(&bg_rec->list, &tree->block_groups);
5038         return 0;
5039 }
5040
5041 static void free_block_group_record(struct cache_extent *cache)
5042 {
5043         struct block_group_record *rec;
5044
5045         rec = container_of(cache, struct block_group_record, cache);
5046         list_del_init(&rec->list);
5047         free(rec);
5048 }
5049
5050 void free_block_group_tree(struct block_group_tree *tree)
5051 {
5052         cache_tree_free_extents(&tree->tree, free_block_group_record);
5053 }
5054
5055 int insert_device_extent_record(struct device_extent_tree *tree,
5056                                 struct device_extent_record *de_rec)
5057 {
5058         int ret;
5059
5060         /*
5061          * Device extent is a bit different from the other extents, because
5062          * the extents which belong to the different devices may have the
5063          * same start and size, so we need use the special extent cache
5064          * search/insert functions.
5065          */
5066         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5067         if (ret)
5068                 return ret;
5069
5070         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5071         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5072         return 0;
5073 }
5074
5075 static void free_device_extent_record(struct cache_extent *cache)
5076 {
5077         struct device_extent_record *rec;
5078
5079         rec = container_of(cache, struct device_extent_record, cache);
5080         if (!list_empty(&rec->chunk_list))
5081                 list_del_init(&rec->chunk_list);
5082         if (!list_empty(&rec->device_list))
5083                 list_del_init(&rec->device_list);
5084         free(rec);
5085 }
5086
5087 void free_device_extent_tree(struct device_extent_tree *tree)
5088 {
5089         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5090 }
5091
5092 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5093 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5094                                  struct extent_buffer *leaf, int slot)
5095 {
5096         struct btrfs_extent_ref_v0 *ref0;
5097         struct btrfs_key key;
5098         int ret;
5099
5100         btrfs_item_key_to_cpu(leaf, &key, slot);
5101         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5102         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5103                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5104                                 0, 0);
5105         } else {
5106                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5107                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5108         }
5109         return ret;
5110 }
5111 #endif
5112
5113 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5114                                             struct btrfs_key *key,
5115                                             int slot)
5116 {
5117         struct btrfs_chunk *ptr;
5118         struct chunk_record *rec;
5119         int num_stripes, i;
5120
5121         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5122         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5123
5124         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5125         if (!rec) {
5126                 fprintf(stderr, "memory allocation failed\n");
5127                 exit(-1);
5128         }
5129
5130         INIT_LIST_HEAD(&rec->list);
5131         INIT_LIST_HEAD(&rec->dextents);
5132         rec->bg_rec = NULL;
5133
5134         rec->cache.start = key->offset;
5135         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5136
5137         rec->generation = btrfs_header_generation(leaf);
5138
5139         rec->objectid = key->objectid;
5140         rec->type = key->type;
5141         rec->offset = key->offset;
5142
5143         rec->length = rec->cache.size;
5144         rec->owner = btrfs_chunk_owner(leaf, ptr);
5145         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5146         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5147         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5148         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5149         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5150         rec->num_stripes = num_stripes;
5151         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5152
5153         for (i = 0; i < rec->num_stripes; ++i) {
5154                 rec->stripes[i].devid =
5155                         btrfs_stripe_devid_nr(leaf, ptr, i);
5156                 rec->stripes[i].offset =
5157                         btrfs_stripe_offset_nr(leaf, ptr, i);
5158                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5159                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5160                                 BTRFS_UUID_SIZE);
5161         }
5162
5163         return rec;
5164 }
5165
5166 static int process_chunk_item(struct cache_tree *chunk_cache,
5167                               struct btrfs_key *key, struct extent_buffer *eb,
5168                               int slot)
5169 {
5170         struct chunk_record *rec;
5171         struct btrfs_chunk *chunk;
5172         int ret = 0;
5173
5174         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5175         /*
5176          * Do extra check for this chunk item,
5177          *
5178          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5179          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5180          * and owner<->key_type check.
5181          */
5182         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5183                                       key->offset);
5184         if (ret < 0) {
5185                 error("chunk(%llu, %llu) is not valid, ignore it",
5186                       key->offset, btrfs_chunk_length(eb, chunk));
5187                 return 0;
5188         }
5189         rec = btrfs_new_chunk_record(eb, key, slot);
5190         ret = insert_cache_extent(chunk_cache, &rec->cache);
5191         if (ret) {
5192                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5193                         rec->offset, rec->length);
5194                 free(rec);
5195         }
5196
5197         return ret;
5198 }
5199
5200 static int process_device_item(struct rb_root *dev_cache,
5201                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5202 {
5203         struct btrfs_dev_item *ptr;
5204         struct device_record *rec;
5205         int ret = 0;
5206
5207         ptr = btrfs_item_ptr(eb,
5208                 slot, struct btrfs_dev_item);
5209
5210         rec = malloc(sizeof(*rec));
5211         if (!rec) {
5212                 fprintf(stderr, "memory allocation failed\n");
5213                 return -ENOMEM;
5214         }
5215
5216         rec->devid = key->offset;
5217         rec->generation = btrfs_header_generation(eb);
5218
5219         rec->objectid = key->objectid;
5220         rec->type = key->type;
5221         rec->offset = key->offset;
5222
5223         rec->devid = btrfs_device_id(eb, ptr);
5224         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5225         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5226
5227         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5228         if (ret) {
5229                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5230                 free(rec);
5231         }
5232
5233         return ret;
5234 }
5235
5236 struct block_group_record *
5237 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5238                              int slot)
5239 {
5240         struct btrfs_block_group_item *ptr;
5241         struct block_group_record *rec;
5242
5243         rec = calloc(1, sizeof(*rec));
5244         if (!rec) {
5245                 fprintf(stderr, "memory allocation failed\n");
5246                 exit(-1);
5247         }
5248
5249         rec->cache.start = key->objectid;
5250         rec->cache.size = key->offset;
5251
5252         rec->generation = btrfs_header_generation(leaf);
5253
5254         rec->objectid = key->objectid;
5255         rec->type = key->type;
5256         rec->offset = key->offset;
5257
5258         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5259         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5260
5261         INIT_LIST_HEAD(&rec->list);
5262
5263         return rec;
5264 }
5265
5266 static int process_block_group_item(struct block_group_tree *block_group_cache,
5267                                     struct btrfs_key *key,
5268                                     struct extent_buffer *eb, int slot)
5269 {
5270         struct block_group_record *rec;
5271         int ret = 0;
5272
5273         rec = btrfs_new_block_group_record(eb, key, slot);
5274         ret = insert_block_group_record(block_group_cache, rec);
5275         if (ret) {
5276                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5277                         rec->objectid, rec->offset);
5278                 free(rec);
5279         }
5280
5281         return ret;
5282 }
5283
5284 struct device_extent_record *
5285 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5286                                struct btrfs_key *key, int slot)
5287 {
5288         struct device_extent_record *rec;
5289         struct btrfs_dev_extent *ptr;
5290
5291         rec = calloc(1, sizeof(*rec));
5292         if (!rec) {
5293                 fprintf(stderr, "memory allocation failed\n");
5294                 exit(-1);
5295         }
5296
5297         rec->cache.objectid = key->objectid;
5298         rec->cache.start = key->offset;
5299
5300         rec->generation = btrfs_header_generation(leaf);
5301
5302         rec->objectid = key->objectid;
5303         rec->type = key->type;
5304         rec->offset = key->offset;
5305
5306         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5307         rec->chunk_objecteid =
5308                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5309         rec->chunk_offset =
5310                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5311         rec->length = btrfs_dev_extent_length(leaf, ptr);
5312         rec->cache.size = rec->length;
5313
5314         INIT_LIST_HEAD(&rec->chunk_list);
5315         INIT_LIST_HEAD(&rec->device_list);
5316
5317         return rec;
5318 }
5319
5320 static int
5321 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5322                            struct btrfs_key *key, struct extent_buffer *eb,
5323                            int slot)
5324 {
5325         struct device_extent_record *rec;
5326         int ret;
5327
5328         rec = btrfs_new_device_extent_record(eb, key, slot);
5329         ret = insert_device_extent_record(dev_extent_cache, rec);
5330         if (ret) {
5331                 fprintf(stderr,
5332                         "Device extent[%llu, %llu, %llu] existed.\n",
5333                         rec->objectid, rec->offset, rec->length);
5334                 free(rec);
5335         }
5336
5337         return ret;
5338 }
5339
5340 static int process_extent_item(struct btrfs_root *root,
5341                                struct cache_tree *extent_cache,
5342                                struct extent_buffer *eb, int slot)
5343 {
5344         struct btrfs_extent_item *ei;
5345         struct btrfs_extent_inline_ref *iref;
5346         struct btrfs_extent_data_ref *dref;
5347         struct btrfs_shared_data_ref *sref;
5348         struct btrfs_key key;
5349         struct extent_record tmpl;
5350         unsigned long end;
5351         unsigned long ptr;
5352         int ret;
5353         int type;
5354         u32 item_size = btrfs_item_size_nr(eb, slot);
5355         u64 refs = 0;
5356         u64 offset;
5357         u64 num_bytes;
5358         int metadata = 0;
5359
5360         btrfs_item_key_to_cpu(eb, &key, slot);
5361
5362         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5363                 metadata = 1;
5364                 num_bytes = root->nodesize;
5365         } else {
5366                 num_bytes = key.offset;
5367         }
5368
5369         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5370                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5371                       key.objectid, root->sectorsize);
5372                 return -EIO;
5373         }
5374         if (item_size < sizeof(*ei)) {
5375 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5376                 struct btrfs_extent_item_v0 *ei0;
5377                 BUG_ON(item_size != sizeof(*ei0));
5378                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5379                 refs = btrfs_extent_refs_v0(eb, ei0);
5380 #else
5381                 BUG();
5382 #endif
5383                 memset(&tmpl, 0, sizeof(tmpl));
5384                 tmpl.start = key.objectid;
5385                 tmpl.nr = num_bytes;
5386                 tmpl.extent_item_refs = refs;
5387                 tmpl.metadata = metadata;
5388                 tmpl.found_rec = 1;
5389                 tmpl.max_size = num_bytes;
5390
5391                 return add_extent_rec(extent_cache, &tmpl);
5392         }
5393
5394         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5395         refs = btrfs_extent_refs(eb, ei);
5396         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5397                 metadata = 1;
5398         else
5399                 metadata = 0;
5400         if (metadata && num_bytes != root->nodesize) {
5401                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5402                       num_bytes, root->nodesize);
5403                 return -EIO;
5404         }
5405         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5406                 error("ignore invalid data extent, length %llu is not aligned to %u",
5407                       num_bytes, root->sectorsize);
5408                 return -EIO;
5409         }
5410
5411         memset(&tmpl, 0, sizeof(tmpl));
5412         tmpl.start = key.objectid;
5413         tmpl.nr = num_bytes;
5414         tmpl.extent_item_refs = refs;
5415         tmpl.metadata = metadata;
5416         tmpl.found_rec = 1;
5417         tmpl.max_size = num_bytes;
5418         add_extent_rec(extent_cache, &tmpl);
5419
5420         ptr = (unsigned long)(ei + 1);
5421         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5422             key.type == BTRFS_EXTENT_ITEM_KEY)
5423                 ptr += sizeof(struct btrfs_tree_block_info);
5424
5425         end = (unsigned long)ei + item_size;
5426         while (ptr < end) {
5427                 iref = (struct btrfs_extent_inline_ref *)ptr;
5428                 type = btrfs_extent_inline_ref_type(eb, iref);
5429                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5430                 switch (type) {
5431                 case BTRFS_TREE_BLOCK_REF_KEY:
5432                         ret = add_tree_backref(extent_cache, key.objectid,
5433                                         0, offset, 0);
5434                         if (ret < 0)
5435                                 error("add_tree_backref failed: %s",
5436                                       strerror(-ret));
5437                         break;
5438                 case BTRFS_SHARED_BLOCK_REF_KEY:
5439                         ret = add_tree_backref(extent_cache, key.objectid,
5440                                         offset, 0, 0);
5441                         if (ret < 0)
5442                                 error("add_tree_backref failed: %s",
5443                                       strerror(-ret));
5444                         break;
5445                 case BTRFS_EXTENT_DATA_REF_KEY:
5446                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5447                         add_data_backref(extent_cache, key.objectid, 0,
5448                                         btrfs_extent_data_ref_root(eb, dref),
5449                                         btrfs_extent_data_ref_objectid(eb,
5450                                                                        dref),
5451                                         btrfs_extent_data_ref_offset(eb, dref),
5452                                         btrfs_extent_data_ref_count(eb, dref),
5453                                         0, num_bytes);
5454                         break;
5455                 case BTRFS_SHARED_DATA_REF_KEY:
5456                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5457                         add_data_backref(extent_cache, key.objectid, offset,
5458                                         0, 0, 0,
5459                                         btrfs_shared_data_ref_count(eb, sref),
5460                                         0, num_bytes);
5461                         break;
5462                 default:
5463                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5464                                 key.objectid, key.type, num_bytes);
5465                         goto out;
5466                 }
5467                 ptr += btrfs_extent_inline_ref_size(type);
5468         }
5469         WARN_ON(ptr > end);
5470 out:
5471         return 0;
5472 }
5473
5474 static int check_cache_range(struct btrfs_root *root,
5475                              struct btrfs_block_group_cache *cache,
5476                              u64 offset, u64 bytes)
5477 {
5478         struct btrfs_free_space *entry;
5479         u64 *logical;
5480         u64 bytenr;
5481         int stripe_len;
5482         int i, nr, ret;
5483
5484         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5485                 bytenr = btrfs_sb_offset(i);
5486                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5487                                        cache->key.objectid, bytenr, 0,
5488                                        &logical, &nr, &stripe_len);
5489                 if (ret)
5490                         return ret;
5491
5492                 while (nr--) {
5493                         if (logical[nr] + stripe_len <= offset)
5494                                 continue;
5495                         if (offset + bytes <= logical[nr])
5496                                 continue;
5497                         if (logical[nr] == offset) {
5498                                 if (stripe_len >= bytes) {
5499                                         free(logical);
5500                                         return 0;
5501                                 }
5502                                 bytes -= stripe_len;
5503                                 offset += stripe_len;
5504                         } else if (logical[nr] < offset) {
5505                                 if (logical[nr] + stripe_len >=
5506                                     offset + bytes) {
5507                                         free(logical);
5508                                         return 0;
5509                                 }
5510                                 bytes = (offset + bytes) -
5511                                         (logical[nr] + stripe_len);
5512                                 offset = logical[nr] + stripe_len;
5513                         } else {
5514                                 /*
5515                                  * Could be tricky, the super may land in the
5516                                  * middle of the area we're checking.  First
5517                                  * check the easiest case, it's at the end.
5518                                  */
5519                                 if (logical[nr] + stripe_len >=
5520                                     bytes + offset) {
5521                                         bytes = logical[nr] - offset;
5522                                         continue;
5523                                 }
5524
5525                                 /* Check the left side */
5526                                 ret = check_cache_range(root, cache,
5527                                                         offset,
5528                                                         logical[nr] - offset);
5529                                 if (ret) {
5530                                         free(logical);
5531                                         return ret;
5532                                 }
5533
5534                                 /* Now we continue with the right side */
5535                                 bytes = (offset + bytes) -
5536                                         (logical[nr] + stripe_len);
5537                                 offset = logical[nr] + stripe_len;
5538                         }
5539                 }
5540
5541                 free(logical);
5542         }
5543
5544         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5545         if (!entry) {
5546                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5547                         offset, offset+bytes);
5548                 return -EINVAL;
5549         }
5550
5551         if (entry->offset != offset) {
5552                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5553                         entry->offset);
5554                 return -EINVAL;
5555         }
5556
5557         if (entry->bytes != bytes) {
5558                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5559                         bytes, entry->bytes, offset);
5560                 return -EINVAL;
5561         }
5562
5563         unlink_free_space(cache->free_space_ctl, entry);
5564         free(entry);
5565         return 0;
5566 }
5567
5568 static int verify_space_cache(struct btrfs_root *root,
5569                               struct btrfs_block_group_cache *cache)
5570 {
5571         struct btrfs_path *path;
5572         struct extent_buffer *leaf;
5573         struct btrfs_key key;
5574         u64 last;
5575         int ret = 0;
5576
5577         path = btrfs_alloc_path();
5578         if (!path)
5579                 return -ENOMEM;
5580
5581         root = root->fs_info->extent_root;
5582
5583         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5584
5585         key.objectid = last;
5586         key.offset = 0;
5587         key.type = BTRFS_EXTENT_ITEM_KEY;
5588
5589         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5590         if (ret < 0)
5591                 goto out;
5592         ret = 0;
5593         while (1) {
5594                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5595                         ret = btrfs_next_leaf(root, path);
5596                         if (ret < 0)
5597                                 goto out;
5598                         if (ret > 0) {
5599                                 ret = 0;
5600                                 break;
5601                         }
5602                 }
5603                 leaf = path->nodes[0];
5604                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5605                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5606                         break;
5607                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5608                     key.type != BTRFS_METADATA_ITEM_KEY) {
5609                         path->slots[0]++;
5610                         continue;
5611                 }
5612
5613                 if (last == key.objectid) {
5614                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5615                                 last = key.objectid + key.offset;
5616                         else
5617                                 last = key.objectid + root->nodesize;
5618                         path->slots[0]++;
5619                         continue;
5620                 }
5621
5622                 ret = check_cache_range(root, cache, last,
5623                                         key.objectid - last);
5624                 if (ret)
5625                         break;
5626                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5627                         last = key.objectid + key.offset;
5628                 else
5629                         last = key.objectid + root->nodesize;
5630                 path->slots[0]++;
5631         }
5632
5633         if (last < cache->key.objectid + cache->key.offset)
5634                 ret = check_cache_range(root, cache, last,
5635                                         cache->key.objectid +
5636                                         cache->key.offset - last);
5637
5638 out:
5639         btrfs_free_path(path);
5640
5641         if (!ret &&
5642             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5643                 fprintf(stderr, "There are still entries left in the space "
5644                         "cache\n");
5645                 ret = -EINVAL;
5646         }
5647
5648         return ret;
5649 }
5650
5651 static int check_space_cache(struct btrfs_root *root)
5652 {
5653         struct btrfs_block_group_cache *cache;
5654         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5655         int ret;
5656         int error = 0;
5657
5658         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5659             btrfs_super_generation(root->fs_info->super_copy) !=
5660             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5661                 printf("cache and super generation don't match, space cache "
5662                        "will be invalidated\n");
5663                 return 0;
5664         }
5665
5666         if (ctx.progress_enabled) {
5667                 ctx.tp = TASK_FREE_SPACE;
5668                 task_start(ctx.info);
5669         }
5670
5671         while (1) {
5672                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5673                 if (!cache)
5674                         break;
5675
5676                 start = cache->key.objectid + cache->key.offset;
5677                 if (!cache->free_space_ctl) {
5678                         if (btrfs_init_free_space_ctl(cache,
5679                                                       root->sectorsize)) {
5680                                 ret = -ENOMEM;
5681                                 break;
5682                         }
5683                 } else {
5684                         btrfs_remove_free_space_cache(cache);
5685                 }
5686
5687                 if (btrfs_fs_compat_ro(root->fs_info,
5688                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5689                         ret = exclude_super_stripes(root, cache);
5690                         if (ret) {
5691                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5692                                         strerror(-ret));
5693                                 error++;
5694                                 continue;
5695                         }
5696                         ret = load_free_space_tree(root->fs_info, cache);
5697                         free_excluded_extents(root, cache);
5698                         if (ret < 0) {
5699                                 fprintf(stderr, "could not load free space tree: %s\n",
5700                                         strerror(-ret));
5701                                 error++;
5702                                 continue;
5703                         }
5704                         error += ret;
5705                 } else {
5706                         ret = load_free_space_cache(root->fs_info, cache);
5707                         if (!ret)
5708                                 continue;
5709                 }
5710
5711                 ret = verify_space_cache(root, cache);
5712                 if (ret) {
5713                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5714                                 cache->key.objectid);
5715                         error++;
5716                 }
5717         }
5718
5719         task_stop(ctx.info);
5720
5721         return error ? -EINVAL : 0;
5722 }
5723
5724 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5725                         u64 num_bytes, unsigned long leaf_offset,
5726                         struct extent_buffer *eb) {
5727
5728         u64 offset = 0;
5729         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5730         char *data;
5731         unsigned long csum_offset;
5732         u32 csum;
5733         u32 csum_expected;
5734         u64 read_len;
5735         u64 data_checked = 0;
5736         u64 tmp;
5737         int ret = 0;
5738         int mirror;
5739         int num_copies;
5740
5741         if (num_bytes % root->sectorsize)
5742                 return -EINVAL;
5743
5744         data = malloc(num_bytes);
5745         if (!data)
5746                 return -ENOMEM;
5747
5748         while (offset < num_bytes) {
5749                 mirror = 0;
5750 again:
5751                 read_len = num_bytes - offset;
5752                 /* read as much space once a time */
5753                 ret = read_extent_data(root, data + offset,
5754                                 bytenr + offset, &read_len, mirror);
5755                 if (ret)
5756                         goto out;
5757                 data_checked = 0;
5758                 /* verify every 4k data's checksum */
5759                 while (data_checked < read_len) {
5760                         csum = ~(u32)0;
5761                         tmp = offset + data_checked;
5762
5763                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5764                                                csum, root->sectorsize);
5765                         btrfs_csum_final(csum, (u8 *)&csum);
5766
5767                         csum_offset = leaf_offset +
5768                                  tmp / root->sectorsize * csum_size;
5769                         read_extent_buffer(eb, (char *)&csum_expected,
5770                                            csum_offset, csum_size);
5771                         /* try another mirror */
5772                         if (csum != csum_expected) {
5773                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5774                                                 mirror, bytenr + tmp,
5775                                                 csum, csum_expected);
5776                                 num_copies = btrfs_num_copies(
5777                                                 &root->fs_info->mapping_tree,
5778                                                 bytenr, num_bytes);
5779                                 if (mirror < num_copies - 1) {
5780                                         mirror += 1;
5781                                         goto again;
5782                                 }
5783                         }
5784                         data_checked += root->sectorsize;
5785                 }
5786                 offset += read_len;
5787         }
5788 out:
5789         free(data);
5790         return ret;
5791 }
5792
5793 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5794                                u64 num_bytes)
5795 {
5796         struct btrfs_path *path;
5797         struct extent_buffer *leaf;
5798         struct btrfs_key key;
5799         int ret;
5800
5801         path = btrfs_alloc_path();
5802         if (!path) {
5803                 fprintf(stderr, "Error allocating path\n");
5804                 return -ENOMEM;
5805         }
5806
5807         key.objectid = bytenr;
5808         key.type = BTRFS_EXTENT_ITEM_KEY;
5809         key.offset = (u64)-1;
5810
5811 again:
5812         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5813                                 0, 0);
5814         if (ret < 0) {
5815                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5816                 btrfs_free_path(path);
5817                 return ret;
5818         } else if (ret) {
5819                 if (path->slots[0] > 0) {
5820                         path->slots[0]--;
5821                 } else {
5822                         ret = btrfs_prev_leaf(root, path);
5823                         if (ret < 0) {
5824                                 goto out;
5825                         } else if (ret > 0) {
5826                                 ret = 0;
5827                                 goto out;
5828                         }
5829                 }
5830         }
5831
5832         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5833
5834         /*
5835          * Block group items come before extent items if they have the same
5836          * bytenr, so walk back one more just in case.  Dear future traveller,
5837          * first congrats on mastering time travel.  Now if it's not too much
5838          * trouble could you go back to 2006 and tell Chris to make the
5839          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5840          * EXTENT_ITEM_KEY please?
5841          */
5842         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5843                 if (path->slots[0] > 0) {
5844                         path->slots[0]--;
5845                 } else {
5846                         ret = btrfs_prev_leaf(root, path);
5847                         if (ret < 0) {
5848                                 goto out;
5849                         } else if (ret > 0) {
5850                                 ret = 0;
5851                                 goto out;
5852                         }
5853                 }
5854                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5855         }
5856
5857         while (num_bytes) {
5858                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5859                         ret = btrfs_next_leaf(root, path);
5860                         if (ret < 0) {
5861                                 fprintf(stderr, "Error going to next leaf "
5862                                         "%d\n", ret);
5863                                 btrfs_free_path(path);
5864                                 return ret;
5865                         } else if (ret) {
5866                                 break;
5867                         }
5868                 }
5869                 leaf = path->nodes[0];
5870                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5871                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5872                         path->slots[0]++;
5873                         continue;
5874                 }
5875                 if (key.objectid + key.offset < bytenr) {
5876                         path->slots[0]++;
5877                         continue;
5878                 }
5879                 if (key.objectid > bytenr + num_bytes)
5880                         break;
5881
5882                 if (key.objectid == bytenr) {
5883                         if (key.offset >= num_bytes) {
5884                                 num_bytes = 0;
5885                                 break;
5886                         }
5887                         num_bytes -= key.offset;
5888                         bytenr += key.offset;
5889                 } else if (key.objectid < bytenr) {
5890                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5891                                 num_bytes = 0;
5892                                 break;
5893                         }
5894                         num_bytes = (bytenr + num_bytes) -
5895                                 (key.objectid + key.offset);
5896                         bytenr = key.objectid + key.offset;
5897                 } else {
5898                         if (key.objectid + key.offset < bytenr + num_bytes) {
5899                                 u64 new_start = key.objectid + key.offset;
5900                                 u64 new_bytes = bytenr + num_bytes - new_start;
5901
5902                                 /*
5903                                  * Weird case, the extent is in the middle of
5904                                  * our range, we'll have to search one side
5905                                  * and then the other.  Not sure if this happens
5906                                  * in real life, but no harm in coding it up
5907                                  * anyway just in case.
5908                                  */
5909                                 btrfs_release_path(path);
5910                                 ret = check_extent_exists(root, new_start,
5911                                                           new_bytes);
5912                                 if (ret) {
5913                                         fprintf(stderr, "Right section didn't "
5914                                                 "have a record\n");
5915                                         break;
5916                                 }
5917                                 num_bytes = key.objectid - bytenr;
5918                                 goto again;
5919                         }
5920                         num_bytes = key.objectid - bytenr;
5921                 }
5922                 path->slots[0]++;
5923         }
5924         ret = 0;
5925
5926 out:
5927         if (num_bytes && !ret) {
5928                 fprintf(stderr, "There are no extents for csum range "
5929                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5930                 ret = 1;
5931         }
5932
5933         btrfs_free_path(path);
5934         return ret;
5935 }
5936
5937 static int check_csums(struct btrfs_root *root)
5938 {
5939         struct btrfs_path *path;
5940         struct extent_buffer *leaf;
5941         struct btrfs_key key;
5942         u64 offset = 0, num_bytes = 0;
5943         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5944         int errors = 0;
5945         int ret;
5946         u64 data_len;
5947         unsigned long leaf_offset;
5948
5949         root = root->fs_info->csum_root;
5950         if (!extent_buffer_uptodate(root->node)) {
5951                 fprintf(stderr, "No valid csum tree found\n");
5952                 return -ENOENT;
5953         }
5954
5955         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5956         key.type = BTRFS_EXTENT_CSUM_KEY;
5957         key.offset = 0;
5958
5959         path = btrfs_alloc_path();
5960         if (!path)
5961                 return -ENOMEM;
5962
5963         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5964         if (ret < 0) {
5965                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5966                 btrfs_free_path(path);
5967                 return ret;
5968         }
5969
5970         if (ret > 0 && path->slots[0])
5971                 path->slots[0]--;
5972         ret = 0;
5973
5974         while (1) {
5975                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5976                         ret = btrfs_next_leaf(root, path);
5977                         if (ret < 0) {
5978                                 fprintf(stderr, "Error going to next leaf "
5979                                         "%d\n", ret);
5980                                 break;
5981                         }
5982                         if (ret)
5983                                 break;
5984                 }
5985                 leaf = path->nodes[0];
5986
5987                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5988                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5989                         path->slots[0]++;
5990                         continue;
5991                 }
5992
5993                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5994                               csum_size) * root->sectorsize;
5995                 if (!check_data_csum)
5996                         goto skip_csum_check;
5997                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5998                 ret = check_extent_csums(root, key.offset, data_len,
5999                                          leaf_offset, leaf);
6000                 if (ret)
6001                         break;
6002 skip_csum_check:
6003                 if (!num_bytes) {
6004                         offset = key.offset;
6005                 } else if (key.offset != offset + num_bytes) {
6006                         ret = check_extent_exists(root, offset, num_bytes);
6007                         if (ret) {
6008                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6009                                         "there is no extent record\n",
6010                                         offset, offset+num_bytes);
6011                                 errors++;
6012                         }
6013                         offset = key.offset;
6014                         num_bytes = 0;
6015                 }
6016                 num_bytes += data_len;
6017                 path->slots[0]++;
6018         }
6019
6020         btrfs_free_path(path);
6021         return errors;
6022 }
6023
6024 static int is_dropped_key(struct btrfs_key *key,
6025                           struct btrfs_key *drop_key) {
6026         if (key->objectid < drop_key->objectid)
6027                 return 1;
6028         else if (key->objectid == drop_key->objectid) {
6029                 if (key->type < drop_key->type)
6030                         return 1;
6031                 else if (key->type == drop_key->type) {
6032                         if (key->offset < drop_key->offset)
6033                                 return 1;
6034                 }
6035         }
6036         return 0;
6037 }
6038
6039 /*
6040  * Here are the rules for FULL_BACKREF.
6041  *
6042  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6043  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6044  *      FULL_BACKREF set.
6045  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6046  *    if it happened after the relocation occurred since we'll have dropped the
6047  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6048  *    have no real way to know for sure.
6049  *
6050  * We process the blocks one root at a time, and we start from the lowest root
6051  * objectid and go to the highest.  So we can just lookup the owner backref for
6052  * the record and if we don't find it then we know it doesn't exist and we have
6053  * a FULL BACKREF.
6054  *
6055  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6056  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6057  * be set or not and then we can check later once we've gathered all the refs.
6058  */
6059 static int calc_extent_flag(struct btrfs_root *root,
6060                            struct cache_tree *extent_cache,
6061                            struct extent_buffer *buf,
6062                            struct root_item_record *ri,
6063                            u64 *flags)
6064 {
6065         struct extent_record *rec;
6066         struct cache_extent *cache;
6067         struct tree_backref *tback;
6068         u64 owner = 0;
6069
6070         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6071         /* we have added this extent before */
6072         if (!cache)
6073                 return -ENOENT;
6074
6075         rec = container_of(cache, struct extent_record, cache);
6076
6077         /*
6078          * Except file/reloc tree, we can not have
6079          * FULL BACKREF MODE
6080          */
6081         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6082                 goto normal;
6083         /*
6084          * root node
6085          */
6086         if (buf->start == ri->bytenr)
6087                 goto normal;
6088
6089         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6090                 goto full_backref;
6091
6092         owner = btrfs_header_owner(buf);
6093         if (owner == ri->objectid)
6094                 goto normal;
6095
6096         tback = find_tree_backref(rec, 0, owner);
6097         if (!tback)
6098                 goto full_backref;
6099 normal:
6100         *flags = 0;
6101         if (rec->flag_block_full_backref != FLAG_UNSET &&
6102             rec->flag_block_full_backref != 0)
6103                 rec->bad_full_backref = 1;
6104         return 0;
6105 full_backref:
6106         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6107         if (rec->flag_block_full_backref != FLAG_UNSET &&
6108             rec->flag_block_full_backref != 1)
6109                 rec->bad_full_backref = 1;
6110         return 0;
6111 }
6112
6113 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6114 {
6115         fprintf(stderr, "Invalid key type(");
6116         print_key_type(stderr, 0, key_type);
6117         fprintf(stderr, ") found in root(");
6118         print_objectid(stderr, rootid, 0);
6119         fprintf(stderr, ")\n");
6120 }
6121
6122 /*
6123  * Check if the key is valid with its extent buffer.
6124  *
6125  * This is a early check in case invalid key exists in a extent buffer
6126  * This is not comprehensive yet, but should prevent wrong key/item passed
6127  * further
6128  */
6129 static int check_type_with_root(u64 rootid, u8 key_type)
6130 {
6131         switch (key_type) {
6132         /* Only valid in chunk tree */
6133         case BTRFS_DEV_ITEM_KEY:
6134         case BTRFS_CHUNK_ITEM_KEY:
6135                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6136                         goto err;
6137                 break;
6138         /* valid in csum and log tree */
6139         case BTRFS_CSUM_TREE_OBJECTID:
6140                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6141                       is_fstree(rootid)))
6142                         goto err;
6143                 break;
6144         case BTRFS_EXTENT_ITEM_KEY:
6145         case BTRFS_METADATA_ITEM_KEY:
6146         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6147                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6148                         goto err;
6149                 break;
6150         case BTRFS_ROOT_ITEM_KEY:
6151                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6152                         goto err;
6153                 break;
6154         case BTRFS_DEV_EXTENT_KEY:
6155                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6156                         goto err;
6157                 break;
6158         }
6159         return 0;
6160 err:
6161         report_mismatch_key_root(key_type, rootid);
6162         return -EINVAL;
6163 }
6164
6165 static int run_next_block(struct btrfs_root *root,
6166                           struct block_info *bits,
6167                           int bits_nr,
6168                           u64 *last,
6169                           struct cache_tree *pending,
6170                           struct cache_tree *seen,
6171                           struct cache_tree *reada,
6172                           struct cache_tree *nodes,
6173                           struct cache_tree *extent_cache,
6174                           struct cache_tree *chunk_cache,
6175                           struct rb_root *dev_cache,
6176                           struct block_group_tree *block_group_cache,
6177                           struct device_extent_tree *dev_extent_cache,
6178                           struct root_item_record *ri)
6179 {
6180         struct extent_buffer *buf;
6181         struct extent_record *rec = NULL;
6182         u64 bytenr;
6183         u32 size;
6184         u64 parent;
6185         u64 owner;
6186         u64 flags;
6187         u64 ptr;
6188         u64 gen = 0;
6189         int ret = 0;
6190         int i;
6191         int nritems;
6192         struct btrfs_key key;
6193         struct cache_extent *cache;
6194         int reada_bits;
6195
6196         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6197                                     bits_nr, &reada_bits);
6198         if (nritems == 0)
6199                 return 1;
6200
6201         if (!reada_bits) {
6202                 for(i = 0; i < nritems; i++) {
6203                         ret = add_cache_extent(reada, bits[i].start,
6204                                                bits[i].size);
6205                         if (ret == -EEXIST)
6206                                 continue;
6207
6208                         /* fixme, get the parent transid */
6209                         readahead_tree_block(root, bits[i].start,
6210                                              bits[i].size, 0);
6211                 }
6212         }
6213         *last = bits[0].start;
6214         bytenr = bits[0].start;
6215         size = bits[0].size;
6216
6217         cache = lookup_cache_extent(pending, bytenr, size);
6218         if (cache) {
6219                 remove_cache_extent(pending, cache);
6220                 free(cache);
6221         }
6222         cache = lookup_cache_extent(reada, bytenr, size);
6223         if (cache) {
6224                 remove_cache_extent(reada, cache);
6225                 free(cache);
6226         }
6227         cache = lookup_cache_extent(nodes, bytenr, size);
6228         if (cache) {
6229                 remove_cache_extent(nodes, cache);
6230                 free(cache);
6231         }
6232         cache = lookup_cache_extent(extent_cache, bytenr, size);
6233         if (cache) {
6234                 rec = container_of(cache, struct extent_record, cache);
6235                 gen = rec->parent_generation;
6236         }
6237
6238         /* fixme, get the real parent transid */
6239         buf = read_tree_block(root, bytenr, size, gen);
6240         if (!extent_buffer_uptodate(buf)) {
6241                 record_bad_block_io(root->fs_info,
6242                                     extent_cache, bytenr, size);
6243                 goto out;
6244         }
6245
6246         nritems = btrfs_header_nritems(buf);
6247
6248         flags = 0;
6249         if (!init_extent_tree) {
6250                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6251                                        btrfs_header_level(buf), 1, NULL,
6252                                        &flags);
6253                 if (ret < 0) {
6254                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6255                         if (ret < 0) {
6256                                 fprintf(stderr, "Couldn't calc extent flags\n");
6257                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6258                         }
6259                 }
6260         } else {
6261                 flags = 0;
6262                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6263                 if (ret < 0) {
6264                         fprintf(stderr, "Couldn't calc extent flags\n");
6265                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6266                 }
6267         }
6268
6269         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6270                 if (ri != NULL &&
6271                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6272                     ri->objectid == btrfs_header_owner(buf)) {
6273                         /*
6274                          * Ok we got to this block from it's original owner and
6275                          * we have FULL_BACKREF set.  Relocation can leave
6276                          * converted blocks over so this is altogether possible,
6277                          * however it's not possible if the generation > the
6278                          * last snapshot, so check for this case.
6279                          */
6280                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6281                             btrfs_header_generation(buf) > ri->last_snapshot) {
6282                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6283                                 rec->bad_full_backref = 1;
6284                         }
6285                 }
6286         } else {
6287                 if (ri != NULL &&
6288                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6289                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6290                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6291                         rec->bad_full_backref = 1;
6292                 }
6293         }
6294
6295         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6296                 rec->flag_block_full_backref = 1;
6297                 parent = bytenr;
6298                 owner = 0;
6299         } else {
6300                 rec->flag_block_full_backref = 0;
6301                 parent = 0;
6302                 owner = btrfs_header_owner(buf);
6303         }
6304
6305         ret = check_block(root, extent_cache, buf, flags);
6306         if (ret)
6307                 goto out;
6308
6309         if (btrfs_is_leaf(buf)) {
6310                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6311                 for (i = 0; i < nritems; i++) {
6312                         struct btrfs_file_extent_item *fi;
6313                         btrfs_item_key_to_cpu(buf, &key, i);
6314                         /*
6315                          * Check key type against the leaf owner.
6316                          * Could filter quite a lot of early error if
6317                          * owner is correct
6318                          */
6319                         if (check_type_with_root(btrfs_header_owner(buf),
6320                                                  key.type)) {
6321                                 fprintf(stderr, "ignoring invalid key\n");
6322                                 continue;
6323                         }
6324                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6325                                 process_extent_item(root, extent_cache, buf,
6326                                                     i);
6327                                 continue;
6328                         }
6329                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6330                                 process_extent_item(root, extent_cache, buf,
6331                                                     i);
6332                                 continue;
6333                         }
6334                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6335                                 total_csum_bytes +=
6336                                         btrfs_item_size_nr(buf, i);
6337                                 continue;
6338                         }
6339                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6340                                 process_chunk_item(chunk_cache, &key, buf, i);
6341                                 continue;
6342                         }
6343                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6344                                 process_device_item(dev_cache, &key, buf, i);
6345                                 continue;
6346                         }
6347                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6348                                 process_block_group_item(block_group_cache,
6349                                         &key, buf, i);
6350                                 continue;
6351                         }
6352                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6353                                 process_device_extent_item(dev_extent_cache,
6354                                         &key, buf, i);
6355                                 continue;
6356
6357                         }
6358                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6359 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6360                                 process_extent_ref_v0(extent_cache, buf, i);
6361 #else
6362                                 BUG();
6363 #endif
6364                                 continue;
6365                         }
6366
6367                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6368                                 ret = add_tree_backref(extent_cache,
6369                                                 key.objectid, 0, key.offset, 0);
6370                                 if (ret < 0)
6371                                         error("add_tree_backref failed: %s",
6372                                               strerror(-ret));
6373                                 continue;
6374                         }
6375                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6376                                 ret = add_tree_backref(extent_cache,
6377                                                 key.objectid, key.offset, 0, 0);
6378                                 if (ret < 0)
6379                                         error("add_tree_backref failed: %s",
6380                                               strerror(-ret));
6381                                 continue;
6382                         }
6383                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6384                                 struct btrfs_extent_data_ref *ref;
6385                                 ref = btrfs_item_ptr(buf, i,
6386                                                 struct btrfs_extent_data_ref);
6387                                 add_data_backref(extent_cache,
6388                                         key.objectid, 0,
6389                                         btrfs_extent_data_ref_root(buf, ref),
6390                                         btrfs_extent_data_ref_objectid(buf,
6391                                                                        ref),
6392                                         btrfs_extent_data_ref_offset(buf, ref),
6393                                         btrfs_extent_data_ref_count(buf, ref),
6394                                         0, root->sectorsize);
6395                                 continue;
6396                         }
6397                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6398                                 struct btrfs_shared_data_ref *ref;
6399                                 ref = btrfs_item_ptr(buf, i,
6400                                                 struct btrfs_shared_data_ref);
6401                                 add_data_backref(extent_cache,
6402                                         key.objectid, key.offset, 0, 0, 0,
6403                                         btrfs_shared_data_ref_count(buf, ref),
6404                                         0, root->sectorsize);
6405                                 continue;
6406                         }
6407                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6408                                 struct bad_item *bad;
6409
6410                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6411                                         continue;
6412                                 if (!owner)
6413                                         continue;
6414                                 bad = malloc(sizeof(struct bad_item));
6415                                 if (!bad)
6416                                         continue;
6417                                 INIT_LIST_HEAD(&bad->list);
6418                                 memcpy(&bad->key, &key,
6419                                        sizeof(struct btrfs_key));
6420                                 bad->root_id = owner;
6421                                 list_add_tail(&bad->list, &delete_items);
6422                                 continue;
6423                         }
6424                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6425                                 continue;
6426                         fi = btrfs_item_ptr(buf, i,
6427                                             struct btrfs_file_extent_item);
6428                         if (btrfs_file_extent_type(buf, fi) ==
6429                             BTRFS_FILE_EXTENT_INLINE)
6430                                 continue;
6431                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6432                                 continue;
6433
6434                         data_bytes_allocated +=
6435                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6436                         if (data_bytes_allocated < root->sectorsize) {
6437                                 abort();
6438                         }
6439                         data_bytes_referenced +=
6440                                 btrfs_file_extent_num_bytes(buf, fi);
6441                         add_data_backref(extent_cache,
6442                                 btrfs_file_extent_disk_bytenr(buf, fi),
6443                                 parent, owner, key.objectid, key.offset -
6444                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6445                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6446                 }
6447         } else {
6448                 int level;
6449                 struct btrfs_key first_key;
6450
6451                 first_key.objectid = 0;
6452
6453                 if (nritems > 0)
6454                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6455                 level = btrfs_header_level(buf);
6456                 for (i = 0; i < nritems; i++) {
6457                         struct extent_record tmpl;
6458
6459                         ptr = btrfs_node_blockptr(buf, i);
6460                         size = root->nodesize;
6461                         btrfs_node_key_to_cpu(buf, &key, i);
6462                         if (ri != NULL) {
6463                                 if ((level == ri->drop_level)
6464                                     && is_dropped_key(&key, &ri->drop_key)) {
6465                                         continue;
6466                                 }
6467                         }
6468
6469                         memset(&tmpl, 0, sizeof(tmpl));
6470                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6471                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6472                         tmpl.start = ptr;
6473                         tmpl.nr = size;
6474                         tmpl.refs = 1;
6475                         tmpl.metadata = 1;
6476                         tmpl.max_size = size;
6477                         ret = add_extent_rec(extent_cache, &tmpl);
6478                         if (ret < 0)
6479                                 goto out;
6480
6481                         ret = add_tree_backref(extent_cache, ptr, parent,
6482                                         owner, 1);
6483                         if (ret < 0) {
6484                                 error("add_tree_backref failed: %s",
6485                                       strerror(-ret));
6486                                 continue;
6487                         }
6488
6489                         if (level > 1) {
6490                                 add_pending(nodes, seen, ptr, size);
6491                         } else {
6492                                 add_pending(pending, seen, ptr, size);
6493                         }
6494                 }
6495                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6496                                       nritems) * sizeof(struct btrfs_key_ptr);
6497         }
6498         total_btree_bytes += buf->len;
6499         if (fs_root_objectid(btrfs_header_owner(buf)))
6500                 total_fs_tree_bytes += buf->len;
6501         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6502                 total_extent_tree_bytes += buf->len;
6503         if (!found_old_backref &&
6504             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6505             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6506             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6507                 found_old_backref = 1;
6508 out:
6509         free_extent_buffer(buf);
6510         return ret;
6511 }
6512
6513 static int add_root_to_pending(struct extent_buffer *buf,
6514                                struct cache_tree *extent_cache,
6515                                struct cache_tree *pending,
6516                                struct cache_tree *seen,
6517                                struct cache_tree *nodes,
6518                                u64 objectid)
6519 {
6520         struct extent_record tmpl;
6521         int ret;
6522
6523         if (btrfs_header_level(buf) > 0)
6524                 add_pending(nodes, seen, buf->start, buf->len);
6525         else
6526                 add_pending(pending, seen, buf->start, buf->len);
6527
6528         memset(&tmpl, 0, sizeof(tmpl));
6529         tmpl.start = buf->start;
6530         tmpl.nr = buf->len;
6531         tmpl.is_root = 1;
6532         tmpl.refs = 1;
6533         tmpl.metadata = 1;
6534         tmpl.max_size = buf->len;
6535         add_extent_rec(extent_cache, &tmpl);
6536
6537         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6538             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6539                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6540                                 0, 1);
6541         else
6542                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6543                                 1);
6544         return ret;
6545 }
6546
6547 /* as we fix the tree, we might be deleting blocks that
6548  * we're tracking for repair.  This hook makes sure we
6549  * remove any backrefs for blocks as we are fixing them.
6550  */
6551 static int free_extent_hook(struct btrfs_trans_handle *trans,
6552                             struct btrfs_root *root,
6553                             u64 bytenr, u64 num_bytes, u64 parent,
6554                             u64 root_objectid, u64 owner, u64 offset,
6555                             int refs_to_drop)
6556 {
6557         struct extent_record *rec;
6558         struct cache_extent *cache;
6559         int is_data;
6560         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6561
6562         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6563         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6564         if (!cache)
6565                 return 0;
6566
6567         rec = container_of(cache, struct extent_record, cache);
6568         if (is_data) {
6569                 struct data_backref *back;
6570                 back = find_data_backref(rec, parent, root_objectid, owner,
6571                                          offset, 1, bytenr, num_bytes);
6572                 if (!back)
6573                         goto out;
6574                 if (back->node.found_ref) {
6575                         back->found_ref -= refs_to_drop;
6576                         if (rec->refs)
6577                                 rec->refs -= refs_to_drop;
6578                 }
6579                 if (back->node.found_extent_tree) {
6580                         back->num_refs -= refs_to_drop;
6581                         if (rec->extent_item_refs)
6582                                 rec->extent_item_refs -= refs_to_drop;
6583                 }
6584                 if (back->found_ref == 0)
6585                         back->node.found_ref = 0;
6586                 if (back->num_refs == 0)
6587                         back->node.found_extent_tree = 0;
6588
6589                 if (!back->node.found_extent_tree && back->node.found_ref) {
6590                         list_del(&back->node.list);
6591                         free(back);
6592                 }
6593         } else {
6594                 struct tree_backref *back;
6595                 back = find_tree_backref(rec, parent, root_objectid);
6596                 if (!back)
6597                         goto out;
6598                 if (back->node.found_ref) {
6599                         if (rec->refs)
6600                                 rec->refs--;
6601                         back->node.found_ref = 0;
6602                 }
6603                 if (back->node.found_extent_tree) {
6604                         if (rec->extent_item_refs)
6605                                 rec->extent_item_refs--;
6606                         back->node.found_extent_tree = 0;
6607                 }
6608                 if (!back->node.found_extent_tree && back->node.found_ref) {
6609                         list_del(&back->node.list);
6610                         free(back);
6611                 }
6612         }
6613         maybe_free_extent_rec(extent_cache, rec);
6614 out:
6615         return 0;
6616 }
6617
6618 static int delete_extent_records(struct btrfs_trans_handle *trans,
6619                                  struct btrfs_root *root,
6620                                  struct btrfs_path *path,
6621                                  u64 bytenr, u64 new_len)
6622 {
6623         struct btrfs_key key;
6624         struct btrfs_key found_key;
6625         struct extent_buffer *leaf;
6626         int ret;
6627         int slot;
6628
6629
6630         key.objectid = bytenr;
6631         key.type = (u8)-1;
6632         key.offset = (u64)-1;
6633
6634         while(1) {
6635                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6636                                         &key, path, 0, 1);
6637                 if (ret < 0)
6638                         break;
6639
6640                 if (ret > 0) {
6641                         ret = 0;
6642                         if (path->slots[0] == 0)
6643                                 break;
6644                         path->slots[0]--;
6645                 }
6646                 ret = 0;
6647
6648                 leaf = path->nodes[0];
6649                 slot = path->slots[0];
6650
6651                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6652                 if (found_key.objectid != bytenr)
6653                         break;
6654
6655                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6656                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6657                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6658                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6659                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6660                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6661                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6662                         btrfs_release_path(path);
6663                         if (found_key.type == 0) {
6664                                 if (found_key.offset == 0)
6665                                         break;
6666                                 key.offset = found_key.offset - 1;
6667                                 key.type = found_key.type;
6668                         }
6669                         key.type = found_key.type - 1;
6670                         key.offset = (u64)-1;
6671                         continue;
6672                 }
6673
6674                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6675                         found_key.objectid, found_key.type, found_key.offset);
6676
6677                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6678                 if (ret)
6679                         break;
6680                 btrfs_release_path(path);
6681
6682                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6683                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6684                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6685                                 found_key.offset : root->nodesize;
6686
6687                         ret = btrfs_update_block_group(trans, root, bytenr,
6688                                                        bytes, 0, 0);
6689                         if (ret)
6690                                 break;
6691                 }
6692         }
6693
6694         btrfs_release_path(path);
6695         return ret;
6696 }
6697
6698 /*
6699  * for a single backref, this will allocate a new extent
6700  * and add the backref to it.
6701  */
6702 static int record_extent(struct btrfs_trans_handle *trans,
6703                          struct btrfs_fs_info *info,
6704                          struct btrfs_path *path,
6705                          struct extent_record *rec,
6706                          struct extent_backref *back,
6707                          int allocated, u64 flags)
6708 {
6709         int ret;
6710         struct btrfs_root *extent_root = info->extent_root;
6711         struct extent_buffer *leaf;
6712         struct btrfs_key ins_key;
6713         struct btrfs_extent_item *ei;
6714         struct tree_backref *tback;
6715         struct data_backref *dback;
6716         struct btrfs_tree_block_info *bi;
6717
6718         if (!back->is_data)
6719                 rec->max_size = max_t(u64, rec->max_size,
6720                                     info->extent_root->nodesize);
6721
6722         if (!allocated) {
6723                 u32 item_size = sizeof(*ei);
6724
6725                 if (!back->is_data)
6726                         item_size += sizeof(*bi);
6727
6728                 ins_key.objectid = rec->start;
6729                 ins_key.offset = rec->max_size;
6730                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6731
6732                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6733                                         &ins_key, item_size);
6734                 if (ret)
6735                         goto fail;
6736
6737                 leaf = path->nodes[0];
6738                 ei = btrfs_item_ptr(leaf, path->slots[0],
6739                                     struct btrfs_extent_item);
6740
6741                 btrfs_set_extent_refs(leaf, ei, 0);
6742                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6743
6744                 if (back->is_data) {
6745                         btrfs_set_extent_flags(leaf, ei,
6746                                                BTRFS_EXTENT_FLAG_DATA);
6747                 } else {
6748                         struct btrfs_disk_key copy_key;;
6749
6750                         tback = to_tree_backref(back);
6751                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6752                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6753                                              sizeof(*bi));
6754
6755                         btrfs_set_disk_key_objectid(&copy_key,
6756                                                     rec->info_objectid);
6757                         btrfs_set_disk_key_type(&copy_key, 0);
6758                         btrfs_set_disk_key_offset(&copy_key, 0);
6759
6760                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6761                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6762
6763                         btrfs_set_extent_flags(leaf, ei,
6764                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6765                 }
6766
6767                 btrfs_mark_buffer_dirty(leaf);
6768                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6769                                                rec->max_size, 1, 0);
6770                 if (ret)
6771                         goto fail;
6772                 btrfs_release_path(path);
6773         }
6774
6775         if (back->is_data) {
6776                 u64 parent;
6777                 int i;
6778
6779                 dback = to_data_backref(back);
6780                 if (back->full_backref)
6781                         parent = dback->parent;
6782                 else
6783                         parent = 0;
6784
6785                 for (i = 0; i < dback->found_ref; i++) {
6786                         /* if parent != 0, we're doing a full backref
6787                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6788                          * just makes the backref allocator create a data
6789                          * backref
6790                          */
6791                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6792                                                    rec->start, rec->max_size,
6793                                                    parent,
6794                                                    dback->root,
6795                                                    parent ?
6796                                                    BTRFS_FIRST_FREE_OBJECTID :
6797                                                    dback->owner,
6798                                                    dback->offset);
6799                         if (ret)
6800                                 break;
6801                 }
6802                 fprintf(stderr, "adding new data backref"
6803                                 " on %llu %s %llu owner %llu"
6804                                 " offset %llu found %d\n",
6805                                 (unsigned long long)rec->start,
6806                                 back->full_backref ?
6807                                 "parent" : "root",
6808                                 back->full_backref ?
6809                                 (unsigned long long)parent :
6810                                 (unsigned long long)dback->root,
6811                                 (unsigned long long)dback->owner,
6812                                 (unsigned long long)dback->offset,
6813                                 dback->found_ref);
6814         } else {
6815                 u64 parent;
6816
6817                 tback = to_tree_backref(back);
6818                 if (back->full_backref)
6819                         parent = tback->parent;
6820                 else
6821                         parent = 0;
6822
6823                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6824                                            rec->start, rec->max_size,
6825                                            parent, tback->root, 0, 0);
6826                 fprintf(stderr, "adding new tree backref on "
6827                         "start %llu len %llu parent %llu root %llu\n",
6828                         rec->start, rec->max_size, parent, tback->root);
6829         }
6830 fail:
6831         btrfs_release_path(path);
6832         return ret;
6833 }
6834
6835 static struct extent_entry *find_entry(struct list_head *entries,
6836                                        u64 bytenr, u64 bytes)
6837 {
6838         struct extent_entry *entry = NULL;
6839
6840         list_for_each_entry(entry, entries, list) {
6841                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6842                         return entry;
6843         }
6844
6845         return NULL;
6846 }
6847
6848 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6849 {
6850         struct extent_entry *entry, *best = NULL, *prev = NULL;
6851
6852         list_for_each_entry(entry, entries, list) {
6853                 if (!prev) {
6854                         prev = entry;
6855                         continue;
6856                 }
6857
6858                 /*
6859                  * If there are as many broken entries as entries then we know
6860                  * not to trust this particular entry.
6861                  */
6862                 if (entry->broken == entry->count)
6863                         continue;
6864
6865                 /*
6866                  * If our current entry == best then we can't be sure our best
6867                  * is really the best, so we need to keep searching.
6868                  */
6869                 if (best && best->count == entry->count) {
6870                         prev = entry;
6871                         best = NULL;
6872                         continue;
6873                 }
6874
6875                 /* Prev == entry, not good enough, have to keep searching */
6876                 if (!prev->broken && prev->count == entry->count)
6877                         continue;
6878
6879                 if (!best)
6880                         best = (prev->count > entry->count) ? prev : entry;
6881                 else if (best->count < entry->count)
6882                         best = entry;
6883                 prev = entry;
6884         }
6885
6886         return best;
6887 }
6888
6889 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6890                       struct data_backref *dback, struct extent_entry *entry)
6891 {
6892         struct btrfs_trans_handle *trans;
6893         struct btrfs_root *root;
6894         struct btrfs_file_extent_item *fi;
6895         struct extent_buffer *leaf;
6896         struct btrfs_key key;
6897         u64 bytenr, bytes;
6898         int ret, err;
6899
6900         key.objectid = dback->root;
6901         key.type = BTRFS_ROOT_ITEM_KEY;
6902         key.offset = (u64)-1;
6903         root = btrfs_read_fs_root(info, &key);
6904         if (IS_ERR(root)) {
6905                 fprintf(stderr, "Couldn't find root for our ref\n");
6906                 return -EINVAL;
6907         }
6908
6909         /*
6910          * The backref points to the original offset of the extent if it was
6911          * split, so we need to search down to the offset we have and then walk
6912          * forward until we find the backref we're looking for.
6913          */
6914         key.objectid = dback->owner;
6915         key.type = BTRFS_EXTENT_DATA_KEY;
6916         key.offset = dback->offset;
6917         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6918         if (ret < 0) {
6919                 fprintf(stderr, "Error looking up ref %d\n", ret);
6920                 return ret;
6921         }
6922
6923         while (1) {
6924                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6925                         ret = btrfs_next_leaf(root, path);
6926                         if (ret) {
6927                                 fprintf(stderr, "Couldn't find our ref, next\n");
6928                                 return -EINVAL;
6929                         }
6930                 }
6931                 leaf = path->nodes[0];
6932                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6933                 if (key.objectid != dback->owner ||
6934                     key.type != BTRFS_EXTENT_DATA_KEY) {
6935                         fprintf(stderr, "Couldn't find our ref, search\n");
6936                         return -EINVAL;
6937                 }
6938                 fi = btrfs_item_ptr(leaf, path->slots[0],
6939                                     struct btrfs_file_extent_item);
6940                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6941                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6942
6943                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6944                         break;
6945                 path->slots[0]++;
6946         }
6947
6948         btrfs_release_path(path);
6949
6950         trans = btrfs_start_transaction(root, 1);
6951         if (IS_ERR(trans))
6952                 return PTR_ERR(trans);
6953
6954         /*
6955          * Ok we have the key of the file extent we want to fix, now we can cow
6956          * down to the thing and fix it.
6957          */
6958         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6959         if (ret < 0) {
6960                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6961                         key.objectid, key.type, key.offset, ret);
6962                 goto out;
6963         }
6964         if (ret > 0) {
6965                 fprintf(stderr, "Well that's odd, we just found this key "
6966                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6967                         key.offset);
6968                 ret = -EINVAL;
6969                 goto out;
6970         }
6971         leaf = path->nodes[0];
6972         fi = btrfs_item_ptr(leaf, path->slots[0],
6973                             struct btrfs_file_extent_item);
6974
6975         if (btrfs_file_extent_compression(leaf, fi) &&
6976             dback->disk_bytenr != entry->bytenr) {
6977                 fprintf(stderr, "Ref doesn't match the record start and is "
6978                         "compressed, please take a btrfs-image of this file "
6979                         "system and send it to a btrfs developer so they can "
6980                         "complete this functionality for bytenr %Lu\n",
6981                         dback->disk_bytenr);
6982                 ret = -EINVAL;
6983                 goto out;
6984         }
6985
6986         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6987                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6988         } else if (dback->disk_bytenr > entry->bytenr) {
6989                 u64 off_diff, offset;
6990
6991                 off_diff = dback->disk_bytenr - entry->bytenr;
6992                 offset = btrfs_file_extent_offset(leaf, fi);
6993                 if (dback->disk_bytenr + offset +
6994                     btrfs_file_extent_num_bytes(leaf, fi) >
6995                     entry->bytenr + entry->bytes) {
6996                         fprintf(stderr, "Ref is past the entry end, please "
6997                                 "take a btrfs-image of this file system and "
6998                                 "send it to a btrfs developer, ref %Lu\n",
6999                                 dback->disk_bytenr);
7000                         ret = -EINVAL;
7001                         goto out;
7002                 }
7003                 offset += off_diff;
7004                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7005                 btrfs_set_file_extent_offset(leaf, fi, offset);
7006         } else if (dback->disk_bytenr < entry->bytenr) {
7007                 u64 offset;
7008
7009                 offset = btrfs_file_extent_offset(leaf, fi);
7010                 if (dback->disk_bytenr + offset < entry->bytenr) {
7011                         fprintf(stderr, "Ref is before the entry start, please"
7012                                 " take a btrfs-image of this file system and "
7013                                 "send it to a btrfs developer, ref %Lu\n",
7014                                 dback->disk_bytenr);
7015                         ret = -EINVAL;
7016                         goto out;
7017                 }
7018
7019                 offset += dback->disk_bytenr;
7020                 offset -= entry->bytenr;
7021                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7022                 btrfs_set_file_extent_offset(leaf, fi, offset);
7023         }
7024
7025         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7026
7027         /*
7028          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7029          * only do this if we aren't using compression, otherwise it's a
7030          * trickier case.
7031          */
7032         if (!btrfs_file_extent_compression(leaf, fi))
7033                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7034         else
7035                 printf("ram bytes may be wrong?\n");
7036         btrfs_mark_buffer_dirty(leaf);
7037 out:
7038         err = btrfs_commit_transaction(trans, root);
7039         btrfs_release_path(path);
7040         return ret ? ret : err;
7041 }
7042
7043 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7044                            struct extent_record *rec)
7045 {
7046         struct extent_backref *back;
7047         struct data_backref *dback;
7048         struct extent_entry *entry, *best = NULL;
7049         LIST_HEAD(entries);
7050         int nr_entries = 0;
7051         int broken_entries = 0;
7052         int ret = 0;
7053         short mismatch = 0;
7054
7055         /*
7056          * Metadata is easy and the backrefs should always agree on bytenr and
7057          * size, if not we've got bigger issues.
7058          */
7059         if (rec->metadata)
7060                 return 0;
7061
7062         list_for_each_entry(back, &rec->backrefs, list) {
7063                 if (back->full_backref || !back->is_data)
7064                         continue;
7065
7066                 dback = to_data_backref(back);
7067
7068                 /*
7069                  * We only pay attention to backrefs that we found a real
7070                  * backref for.
7071                  */
7072                 if (dback->found_ref == 0)
7073                         continue;
7074
7075                 /*
7076                  * For now we only catch when the bytes don't match, not the
7077                  * bytenr.  We can easily do this at the same time, but I want
7078                  * to have a fs image to test on before we just add repair
7079                  * functionality willy-nilly so we know we won't screw up the
7080                  * repair.
7081                  */
7082
7083                 entry = find_entry(&entries, dback->disk_bytenr,
7084                                    dback->bytes);
7085                 if (!entry) {
7086                         entry = malloc(sizeof(struct extent_entry));
7087                         if (!entry) {
7088                                 ret = -ENOMEM;
7089                                 goto out;
7090                         }
7091                         memset(entry, 0, sizeof(*entry));
7092                         entry->bytenr = dback->disk_bytenr;
7093                         entry->bytes = dback->bytes;
7094                         list_add_tail(&entry->list, &entries);
7095                         nr_entries++;
7096                 }
7097
7098                 /*
7099                  * If we only have on entry we may think the entries agree when
7100                  * in reality they don't so we have to do some extra checking.
7101                  */
7102                 if (dback->disk_bytenr != rec->start ||
7103                     dback->bytes != rec->nr || back->broken)
7104                         mismatch = 1;
7105
7106                 if (back->broken) {
7107                         entry->broken++;
7108                         broken_entries++;
7109                 }
7110
7111                 entry->count++;
7112         }
7113
7114         /* Yay all the backrefs agree, carry on good sir */
7115         if (nr_entries <= 1 && !mismatch)
7116                 goto out;
7117
7118         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7119                 "%Lu\n", rec->start);
7120
7121         /*
7122          * First we want to see if the backrefs can agree amongst themselves who
7123          * is right, so figure out which one of the entries has the highest
7124          * count.
7125          */
7126         best = find_most_right_entry(&entries);
7127
7128         /*
7129          * Ok so we may have an even split between what the backrefs think, so
7130          * this is where we use the extent ref to see what it thinks.
7131          */
7132         if (!best) {
7133                 entry = find_entry(&entries, rec->start, rec->nr);
7134                 if (!entry && (!broken_entries || !rec->found_rec)) {
7135                         fprintf(stderr, "Backrefs don't agree with each other "
7136                                 "and extent record doesn't agree with anybody,"
7137                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7138                                 rec->start, rec->nr);
7139                         ret = -EINVAL;
7140                         goto out;
7141                 } else if (!entry) {
7142                         /*
7143                          * Ok our backrefs were broken, we'll assume this is the
7144                          * correct value and add an entry for this range.
7145                          */
7146                         entry = malloc(sizeof(struct extent_entry));
7147                         if (!entry) {
7148                                 ret = -ENOMEM;
7149                                 goto out;
7150                         }
7151                         memset(entry, 0, sizeof(*entry));
7152                         entry->bytenr = rec->start;
7153                         entry->bytes = rec->nr;
7154                         list_add_tail(&entry->list, &entries);
7155                         nr_entries++;
7156                 }
7157                 entry->count++;
7158                 best = find_most_right_entry(&entries);
7159                 if (!best) {
7160                         fprintf(stderr, "Backrefs and extent record evenly "
7161                                 "split on who is right, this is going to "
7162                                 "require user input to fix bytenr %Lu bytes "
7163                                 "%Lu\n", rec->start, rec->nr);
7164                         ret = -EINVAL;
7165                         goto out;
7166                 }
7167         }
7168
7169         /*
7170          * I don't think this can happen currently as we'll abort() if we catch
7171          * this case higher up, but in case somebody removes that we still can't
7172          * deal with it properly here yet, so just bail out of that's the case.
7173          */
7174         if (best->bytenr != rec->start) {
7175                 fprintf(stderr, "Extent start and backref starts don't match, "
7176                         "please use btrfs-image on this file system and send "
7177                         "it to a btrfs developer so they can make fsck fix "
7178                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7179                         rec->start, rec->nr);
7180                 ret = -EINVAL;
7181                 goto out;
7182         }
7183
7184         /*
7185          * Ok great we all agreed on an extent record, let's go find the real
7186          * references and fix up the ones that don't match.
7187          */
7188         list_for_each_entry(back, &rec->backrefs, list) {
7189                 if (back->full_backref || !back->is_data)
7190                         continue;
7191
7192                 dback = to_data_backref(back);
7193
7194                 /*
7195                  * Still ignoring backrefs that don't have a real ref attached
7196                  * to them.
7197                  */
7198                 if (dback->found_ref == 0)
7199                         continue;
7200
7201                 if (dback->bytes == best->bytes &&
7202                     dback->disk_bytenr == best->bytenr)
7203                         continue;
7204
7205                 ret = repair_ref(info, path, dback, best);
7206                 if (ret)
7207                         goto out;
7208         }
7209
7210         /*
7211          * Ok we messed with the actual refs, which means we need to drop our
7212          * entire cache and go back and rescan.  I know this is a huge pain and
7213          * adds a lot of extra work, but it's the only way to be safe.  Once all
7214          * the backrefs agree we may not need to do anything to the extent
7215          * record itself.
7216          */
7217         ret = -EAGAIN;
7218 out:
7219         while (!list_empty(&entries)) {
7220                 entry = list_entry(entries.next, struct extent_entry, list);
7221                 list_del_init(&entry->list);
7222                 free(entry);
7223         }
7224         return ret;
7225 }
7226
7227 static int process_duplicates(struct btrfs_root *root,
7228                               struct cache_tree *extent_cache,
7229                               struct extent_record *rec)
7230 {
7231         struct extent_record *good, *tmp;
7232         struct cache_extent *cache;
7233         int ret;
7234
7235         /*
7236          * If we found a extent record for this extent then return, or if we
7237          * have more than one duplicate we are likely going to need to delete
7238          * something.
7239          */
7240         if (rec->found_rec || rec->num_duplicates > 1)
7241                 return 0;
7242
7243         /* Shouldn't happen but just in case */
7244         BUG_ON(!rec->num_duplicates);
7245
7246         /*
7247          * So this happens if we end up with a backref that doesn't match the
7248          * actual extent entry.  So either the backref is bad or the extent
7249          * entry is bad.  Either way we want to have the extent_record actually
7250          * reflect what we found in the extent_tree, so we need to take the
7251          * duplicate out and use that as the extent_record since the only way we
7252          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7253          */
7254         remove_cache_extent(extent_cache, &rec->cache);
7255
7256         good = to_extent_record(rec->dups.next);
7257         list_del_init(&good->list);
7258         INIT_LIST_HEAD(&good->backrefs);
7259         INIT_LIST_HEAD(&good->dups);
7260         good->cache.start = good->start;
7261         good->cache.size = good->nr;
7262         good->content_checked = 0;
7263         good->owner_ref_checked = 0;
7264         good->num_duplicates = 0;
7265         good->refs = rec->refs;
7266         list_splice_init(&rec->backrefs, &good->backrefs);
7267         while (1) {
7268                 cache = lookup_cache_extent(extent_cache, good->start,
7269                                             good->nr);
7270                 if (!cache)
7271                         break;
7272                 tmp = container_of(cache, struct extent_record, cache);
7273
7274                 /*
7275                  * If we find another overlapping extent and it's found_rec is
7276                  * set then it's a duplicate and we need to try and delete
7277                  * something.
7278                  */
7279                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7280                         if (list_empty(&good->list))
7281                                 list_add_tail(&good->list,
7282                                               &duplicate_extents);
7283                         good->num_duplicates += tmp->num_duplicates + 1;
7284                         list_splice_init(&tmp->dups, &good->dups);
7285                         list_del_init(&tmp->list);
7286                         list_add_tail(&tmp->list, &good->dups);
7287                         remove_cache_extent(extent_cache, &tmp->cache);
7288                         continue;
7289                 }
7290
7291                 /*
7292                  * Ok we have another non extent item backed extent rec, so lets
7293                  * just add it to this extent and carry on like we did above.
7294                  */
7295                 good->refs += tmp->refs;
7296                 list_splice_init(&tmp->backrefs, &good->backrefs);
7297                 remove_cache_extent(extent_cache, &tmp->cache);
7298                 free(tmp);
7299         }
7300         ret = insert_cache_extent(extent_cache, &good->cache);
7301         BUG_ON(ret);
7302         free(rec);
7303         return good->num_duplicates ? 0 : 1;
7304 }
7305
7306 static int delete_duplicate_records(struct btrfs_root *root,
7307                                     struct extent_record *rec)
7308 {
7309         struct btrfs_trans_handle *trans;
7310         LIST_HEAD(delete_list);
7311         struct btrfs_path *path;
7312         struct extent_record *tmp, *good, *n;
7313         int nr_del = 0;
7314         int ret = 0, err;
7315         struct btrfs_key key;
7316
7317         path = btrfs_alloc_path();
7318         if (!path) {
7319                 ret = -ENOMEM;
7320                 goto out;
7321         }
7322
7323         good = rec;
7324         /* Find the record that covers all of the duplicates. */
7325         list_for_each_entry(tmp, &rec->dups, list) {
7326                 if (good->start < tmp->start)
7327                         continue;
7328                 if (good->nr > tmp->nr)
7329                         continue;
7330
7331                 if (tmp->start + tmp->nr < good->start + good->nr) {
7332                         fprintf(stderr, "Ok we have overlapping extents that "
7333                                 "aren't completely covered by each other, this "
7334                                 "is going to require more careful thought.  "
7335                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7336                                 tmp->start, tmp->nr, good->start, good->nr);
7337                         abort();
7338                 }
7339                 good = tmp;
7340         }
7341
7342         if (good != rec)
7343                 list_add_tail(&rec->list, &delete_list);
7344
7345         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7346                 if (tmp == good)
7347                         continue;
7348                 list_move_tail(&tmp->list, &delete_list);
7349         }
7350
7351         root = root->fs_info->extent_root;
7352         trans = btrfs_start_transaction(root, 1);
7353         if (IS_ERR(trans)) {
7354                 ret = PTR_ERR(trans);
7355                 goto out;
7356         }
7357
7358         list_for_each_entry(tmp, &delete_list, list) {
7359                 if (tmp->found_rec == 0)
7360                         continue;
7361                 key.objectid = tmp->start;
7362                 key.type = BTRFS_EXTENT_ITEM_KEY;
7363                 key.offset = tmp->nr;
7364
7365                 /* Shouldn't happen but just in case */
7366                 if (tmp->metadata) {
7367                         fprintf(stderr, "Well this shouldn't happen, extent "
7368                                 "record overlaps but is metadata? "
7369                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7370                         abort();
7371                 }
7372
7373                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7374                 if (ret) {
7375                         if (ret > 0)
7376                                 ret = -EINVAL;
7377                         break;
7378                 }
7379                 ret = btrfs_del_item(trans, root, path);
7380                 if (ret)
7381                         break;
7382                 btrfs_release_path(path);
7383                 nr_del++;
7384         }
7385         err = btrfs_commit_transaction(trans, root);
7386         if (err && !ret)
7387                 ret = err;
7388 out:
7389         while (!list_empty(&delete_list)) {
7390                 tmp = to_extent_record(delete_list.next);
7391                 list_del_init(&tmp->list);
7392                 if (tmp == rec)
7393                         continue;
7394                 free(tmp);
7395         }
7396
7397         while (!list_empty(&rec->dups)) {
7398                 tmp = to_extent_record(rec->dups.next);
7399                 list_del_init(&tmp->list);
7400                 free(tmp);
7401         }
7402
7403         btrfs_free_path(path);
7404
7405         if (!ret && !nr_del)
7406                 rec->num_duplicates = 0;
7407
7408         return ret ? ret : nr_del;
7409 }
7410
7411 static int find_possible_backrefs(struct btrfs_fs_info *info,
7412                                   struct btrfs_path *path,
7413                                   struct cache_tree *extent_cache,
7414                                   struct extent_record *rec)
7415 {
7416         struct btrfs_root *root;
7417         struct extent_backref *back;
7418         struct data_backref *dback;
7419         struct cache_extent *cache;
7420         struct btrfs_file_extent_item *fi;
7421         struct btrfs_key key;
7422         u64 bytenr, bytes;
7423         int ret;
7424
7425         list_for_each_entry(back, &rec->backrefs, list) {
7426                 /* Don't care about full backrefs (poor unloved backrefs) */
7427                 if (back->full_backref || !back->is_data)
7428                         continue;
7429
7430                 dback = to_data_backref(back);
7431
7432                 /* We found this one, we don't need to do a lookup */
7433                 if (dback->found_ref)
7434                         continue;
7435
7436                 key.objectid = dback->root;
7437                 key.type = BTRFS_ROOT_ITEM_KEY;
7438                 key.offset = (u64)-1;
7439
7440                 root = btrfs_read_fs_root(info, &key);
7441
7442                 /* No root, definitely a bad ref, skip */
7443                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7444                         continue;
7445                 /* Other err, exit */
7446                 if (IS_ERR(root))
7447                         return PTR_ERR(root);
7448
7449                 key.objectid = dback->owner;
7450                 key.type = BTRFS_EXTENT_DATA_KEY;
7451                 key.offset = dback->offset;
7452                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7453                 if (ret) {
7454                         btrfs_release_path(path);
7455                         if (ret < 0)
7456                                 return ret;
7457                         /* Didn't find it, we can carry on */
7458                         ret = 0;
7459                         continue;
7460                 }
7461
7462                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7463                                     struct btrfs_file_extent_item);
7464                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7465                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7466                 btrfs_release_path(path);
7467                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7468                 if (cache) {
7469                         struct extent_record *tmp;
7470                         tmp = container_of(cache, struct extent_record, cache);
7471
7472                         /*
7473                          * If we found an extent record for the bytenr for this
7474                          * particular backref then we can't add it to our
7475                          * current extent record.  We only want to add backrefs
7476                          * that don't have a corresponding extent item in the
7477                          * extent tree since they likely belong to this record
7478                          * and we need to fix it if it doesn't match bytenrs.
7479                          */
7480                         if  (tmp->found_rec)
7481                                 continue;
7482                 }
7483
7484                 dback->found_ref += 1;
7485                 dback->disk_bytenr = bytenr;
7486                 dback->bytes = bytes;
7487
7488                 /*
7489                  * Set this so the verify backref code knows not to trust the
7490                  * values in this backref.
7491                  */
7492                 back->broken = 1;
7493         }
7494
7495         return 0;
7496 }
7497
7498 /*
7499  * Record orphan data ref into corresponding root.
7500  *
7501  * Return 0 if the extent item contains data ref and recorded.
7502  * Return 1 if the extent item contains no useful data ref
7503  *   On that case, it may contains only shared_dataref or metadata backref
7504  *   or the file extent exists(this should be handled by the extent bytenr
7505  *   recovery routine)
7506  * Return <0 if something goes wrong.
7507  */
7508 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7509                                       struct extent_record *rec)
7510 {
7511         struct btrfs_key key;
7512         struct btrfs_root *dest_root;
7513         struct extent_backref *back;
7514         struct data_backref *dback;
7515         struct orphan_data_extent *orphan;
7516         struct btrfs_path *path;
7517         int recorded_data_ref = 0;
7518         int ret = 0;
7519
7520         if (rec->metadata)
7521                 return 1;
7522         path = btrfs_alloc_path();
7523         if (!path)
7524                 return -ENOMEM;
7525         list_for_each_entry(back, &rec->backrefs, list) {
7526                 if (back->full_backref || !back->is_data ||
7527                     !back->found_extent_tree)
7528                         continue;
7529                 dback = to_data_backref(back);
7530                 if (dback->found_ref)
7531                         continue;
7532                 key.objectid = dback->root;
7533                 key.type = BTRFS_ROOT_ITEM_KEY;
7534                 key.offset = (u64)-1;
7535
7536                 dest_root = btrfs_read_fs_root(fs_info, &key);
7537
7538                 /* For non-exist root we just skip it */
7539                 if (IS_ERR(dest_root) || !dest_root)
7540                         continue;
7541
7542                 key.objectid = dback->owner;
7543                 key.type = BTRFS_EXTENT_DATA_KEY;
7544                 key.offset = dback->offset;
7545
7546                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7547                 btrfs_release_path(path);
7548                 /*
7549                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7550                  * we need to record it for inode/file extent rebuild.
7551                  * For ret > 0, we record it only for file extent rebuild.
7552                  * For ret == 0, the file extent exists but only bytenr
7553                  * mismatch, let the original bytenr fix routine to handle,
7554                  * don't record it.
7555                  */
7556                 if (ret == 0)
7557                         continue;
7558                 ret = 0;
7559                 orphan = malloc(sizeof(*orphan));
7560                 if (!orphan) {
7561                         ret = -ENOMEM;
7562                         goto out;
7563                 }
7564                 INIT_LIST_HEAD(&orphan->list);
7565                 orphan->root = dback->root;
7566                 orphan->objectid = dback->owner;
7567                 orphan->offset = dback->offset;
7568                 orphan->disk_bytenr = rec->cache.start;
7569                 orphan->disk_len = rec->cache.size;
7570                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7571                 recorded_data_ref = 1;
7572         }
7573 out:
7574         btrfs_free_path(path);
7575         if (!ret)
7576                 return !recorded_data_ref;
7577         else
7578                 return ret;
7579 }
7580
7581 /*
7582  * when an incorrect extent item is found, this will delete
7583  * all of the existing entries for it and recreate them
7584  * based on what the tree scan found.
7585  */
7586 static int fixup_extent_refs(struct btrfs_fs_info *info,
7587                              struct cache_tree *extent_cache,
7588                              struct extent_record *rec)
7589 {
7590         struct btrfs_trans_handle *trans = NULL;
7591         int ret;
7592         struct btrfs_path *path;
7593         struct list_head *cur = rec->backrefs.next;
7594         struct cache_extent *cache;
7595         struct extent_backref *back;
7596         int allocated = 0;
7597         u64 flags = 0;
7598
7599         if (rec->flag_block_full_backref)
7600                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7601
7602         path = btrfs_alloc_path();
7603         if (!path)
7604                 return -ENOMEM;
7605
7606         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7607                 /*
7608                  * Sometimes the backrefs themselves are so broken they don't
7609                  * get attached to any meaningful rec, so first go back and
7610                  * check any of our backrefs that we couldn't find and throw
7611                  * them into the list if we find the backref so that
7612                  * verify_backrefs can figure out what to do.
7613                  */
7614                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7615                 if (ret < 0)
7616                         goto out;
7617         }
7618
7619         /* step one, make sure all of the backrefs agree */
7620         ret = verify_backrefs(info, path, rec);
7621         if (ret < 0)
7622                 goto out;
7623
7624         trans = btrfs_start_transaction(info->extent_root, 1);
7625         if (IS_ERR(trans)) {
7626                 ret = PTR_ERR(trans);
7627                 goto out;
7628         }
7629
7630         /* step two, delete all the existing records */
7631         ret = delete_extent_records(trans, info->extent_root, path,
7632                                     rec->start, rec->max_size);
7633
7634         if (ret < 0)
7635                 goto out;
7636
7637         /* was this block corrupt?  If so, don't add references to it */
7638         cache = lookup_cache_extent(info->corrupt_blocks,
7639                                     rec->start, rec->max_size);
7640         if (cache) {
7641                 ret = 0;
7642                 goto out;
7643         }
7644
7645         /* step three, recreate all the refs we did find */
7646         while(cur != &rec->backrefs) {
7647                 back = to_extent_backref(cur);
7648                 cur = cur->next;
7649
7650                 /*
7651                  * if we didn't find any references, don't create a
7652                  * new extent record
7653                  */
7654                 if (!back->found_ref)
7655                         continue;
7656
7657                 rec->bad_full_backref = 0;
7658                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7659                 allocated = 1;
7660
7661                 if (ret)
7662                         goto out;
7663         }
7664 out:
7665         if (trans) {
7666                 int err = btrfs_commit_transaction(trans, info->extent_root);
7667                 if (!ret)
7668                         ret = err;
7669         }
7670
7671         btrfs_free_path(path);
7672         return ret;
7673 }
7674
7675 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7676                               struct extent_record *rec)
7677 {
7678         struct btrfs_trans_handle *trans;
7679         struct btrfs_root *root = fs_info->extent_root;
7680         struct btrfs_path *path;
7681         struct btrfs_extent_item *ei;
7682         struct btrfs_key key;
7683         u64 flags;
7684         int ret = 0;
7685
7686         key.objectid = rec->start;
7687         if (rec->metadata) {
7688                 key.type = BTRFS_METADATA_ITEM_KEY;
7689                 key.offset = rec->info_level;
7690         } else {
7691                 key.type = BTRFS_EXTENT_ITEM_KEY;
7692                 key.offset = rec->max_size;
7693         }
7694
7695         path = btrfs_alloc_path();
7696         if (!path)
7697                 return -ENOMEM;
7698
7699         trans = btrfs_start_transaction(root, 0);
7700         if (IS_ERR(trans)) {
7701                 btrfs_free_path(path);
7702                 return PTR_ERR(trans);
7703         }
7704
7705         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7706         if (ret < 0) {
7707                 btrfs_free_path(path);
7708                 btrfs_commit_transaction(trans, root);
7709                 return ret;
7710         } else if (ret) {
7711                 fprintf(stderr, "Didn't find extent for %llu\n",
7712                         (unsigned long long)rec->start);
7713                 btrfs_free_path(path);
7714                 btrfs_commit_transaction(trans, root);
7715                 return -ENOENT;
7716         }
7717
7718         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7719                             struct btrfs_extent_item);
7720         flags = btrfs_extent_flags(path->nodes[0], ei);
7721         if (rec->flag_block_full_backref) {
7722                 fprintf(stderr, "setting full backref on %llu\n",
7723                         (unsigned long long)key.objectid);
7724                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7725         } else {
7726                 fprintf(stderr, "clearing full backref on %llu\n",
7727                         (unsigned long long)key.objectid);
7728                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7729         }
7730         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7731         btrfs_mark_buffer_dirty(path->nodes[0]);
7732         btrfs_free_path(path);
7733         return btrfs_commit_transaction(trans, root);
7734 }
7735
7736 /* right now we only prune from the extent allocation tree */
7737 static int prune_one_block(struct btrfs_trans_handle *trans,
7738                            struct btrfs_fs_info *info,
7739                            struct btrfs_corrupt_block *corrupt)
7740 {
7741         int ret;
7742         struct btrfs_path path;
7743         struct extent_buffer *eb;
7744         u64 found;
7745         int slot;
7746         int nritems;
7747         int level = corrupt->level + 1;
7748
7749         btrfs_init_path(&path);
7750 again:
7751         /* we want to stop at the parent to our busted block */
7752         path.lowest_level = level;
7753
7754         ret = btrfs_search_slot(trans, info->extent_root,
7755                                 &corrupt->key, &path, -1, 1);
7756
7757         if (ret < 0)
7758                 goto out;
7759
7760         eb = path.nodes[level];
7761         if (!eb) {
7762                 ret = -ENOENT;
7763                 goto out;
7764         }
7765
7766         /*
7767          * hopefully the search gave us the block we want to prune,
7768          * lets try that first
7769          */
7770         slot = path.slots[level];
7771         found =  btrfs_node_blockptr(eb, slot);
7772         if (found == corrupt->cache.start)
7773                 goto del_ptr;
7774
7775         nritems = btrfs_header_nritems(eb);
7776
7777         /* the search failed, lets scan this node and hope we find it */
7778         for (slot = 0; slot < nritems; slot++) {
7779                 found =  btrfs_node_blockptr(eb, slot);
7780                 if (found == corrupt->cache.start)
7781                         goto del_ptr;
7782         }
7783         /*
7784          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7785          * to this block
7786          */
7787         if (eb == info->extent_root->node) {
7788                 ret = -ENOENT;
7789                 goto out;
7790         } else {
7791                 level++;
7792                 btrfs_release_path(&path);
7793                 goto again;
7794         }
7795
7796 del_ptr:
7797         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7798         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7799
7800 out:
7801         btrfs_release_path(&path);
7802         return ret;
7803 }
7804
7805 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7806 {
7807         struct btrfs_trans_handle *trans = NULL;
7808         struct cache_extent *cache;
7809         struct btrfs_corrupt_block *corrupt;
7810
7811         while (1) {
7812                 cache = search_cache_extent(info->corrupt_blocks, 0);
7813                 if (!cache)
7814                         break;
7815                 if (!trans) {
7816                         trans = btrfs_start_transaction(info->extent_root, 1);
7817                         if (IS_ERR(trans))
7818                                 return PTR_ERR(trans);
7819                 }
7820                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7821                 prune_one_block(trans, info, corrupt);
7822                 remove_cache_extent(info->corrupt_blocks, cache);
7823         }
7824         if (trans)
7825                 return btrfs_commit_transaction(trans, info->extent_root);
7826         return 0;
7827 }
7828
7829 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7830 {
7831         struct btrfs_block_group_cache *cache;
7832         u64 start, end;
7833         int ret;
7834
7835         while (1) {
7836                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7837                                             &start, &end, EXTENT_DIRTY);
7838                 if (ret)
7839                         break;
7840                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7841                                    GFP_NOFS);
7842         }
7843
7844         start = 0;
7845         while (1) {
7846                 cache = btrfs_lookup_first_block_group(fs_info, start);
7847                 if (!cache)
7848                         break;
7849                 if (cache->cached)
7850                         cache->cached = 0;
7851                 start = cache->key.objectid + cache->key.offset;
7852         }
7853 }
7854
7855 static int check_extent_refs(struct btrfs_root *root,
7856                              struct cache_tree *extent_cache)
7857 {
7858         struct extent_record *rec;
7859         struct cache_extent *cache;
7860         int err = 0;
7861         int ret = 0;
7862         int fixed = 0;
7863         int had_dups = 0;
7864         int recorded = 0;
7865
7866         if (repair) {
7867                 /*
7868                  * if we're doing a repair, we have to make sure
7869                  * we don't allocate from the problem extents.
7870                  * In the worst case, this will be all the
7871                  * extents in the FS
7872                  */
7873                 cache = search_cache_extent(extent_cache, 0);
7874                 while(cache) {
7875                         rec = container_of(cache, struct extent_record, cache);
7876                         set_extent_dirty(root->fs_info->excluded_extents,
7877                                          rec->start,
7878                                          rec->start + rec->max_size - 1,
7879                                          GFP_NOFS);
7880                         cache = next_cache_extent(cache);
7881                 }
7882
7883                 /* pin down all the corrupted blocks too */
7884                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7885                 while(cache) {
7886                         set_extent_dirty(root->fs_info->excluded_extents,
7887                                          cache->start,
7888                                          cache->start + cache->size - 1,
7889                                          GFP_NOFS);
7890                         cache = next_cache_extent(cache);
7891                 }
7892                 prune_corrupt_blocks(root->fs_info);
7893                 reset_cached_block_groups(root->fs_info);
7894         }
7895
7896         reset_cached_block_groups(root->fs_info);
7897
7898         /*
7899          * We need to delete any duplicate entries we find first otherwise we
7900          * could mess up the extent tree when we have backrefs that actually
7901          * belong to a different extent item and not the weird duplicate one.
7902          */
7903         while (repair && !list_empty(&duplicate_extents)) {
7904                 rec = to_extent_record(duplicate_extents.next);
7905                 list_del_init(&rec->list);
7906
7907                 /* Sometimes we can find a backref before we find an actual
7908                  * extent, so we need to process it a little bit to see if there
7909                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7910                  * if this is a backref screwup.  If we need to delete stuff
7911                  * process_duplicates() will return 0, otherwise it will return
7912                  * 1 and we
7913                  */
7914                 if (process_duplicates(root, extent_cache, rec))
7915                         continue;
7916                 ret = delete_duplicate_records(root, rec);
7917                 if (ret < 0)
7918                         return ret;
7919                 /*
7920                  * delete_duplicate_records will return the number of entries
7921                  * deleted, so if it's greater than 0 then we know we actually
7922                  * did something and we need to remove.
7923                  */
7924                 if (ret)
7925                         had_dups = 1;
7926         }
7927
7928         if (had_dups)
7929                 return -EAGAIN;
7930
7931         while(1) {
7932                 int cur_err = 0;
7933
7934                 fixed = 0;
7935                 recorded = 0;
7936                 cache = search_cache_extent(extent_cache, 0);
7937                 if (!cache)
7938                         break;
7939                 rec = container_of(cache, struct extent_record, cache);
7940                 if (rec->num_duplicates) {
7941                         fprintf(stderr, "extent item %llu has multiple extent "
7942                                 "items\n", (unsigned long long)rec->start);
7943                         err = 1;
7944                         cur_err = 1;
7945                 }
7946
7947                 if (rec->refs != rec->extent_item_refs) {
7948                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7949                                 (unsigned long long)rec->start,
7950                                 (unsigned long long)rec->nr);
7951                         fprintf(stderr, "extent item %llu, found %llu\n",
7952                                 (unsigned long long)rec->extent_item_refs,
7953                                 (unsigned long long)rec->refs);
7954                         ret = record_orphan_data_extents(root->fs_info, rec);
7955                         if (ret < 0)
7956                                 goto repair_abort;
7957                         if (ret == 0) {
7958                                 recorded = 1;
7959                         } else {
7960                                 /*
7961                                  * we can't use the extent to repair file
7962                                  * extent, let the fallback method handle it.
7963                                  */
7964                                 if (!fixed && repair) {
7965                                         ret = fixup_extent_refs(
7966                                                         root->fs_info,
7967                                                         extent_cache, rec);
7968                                         if (ret)
7969                                                 goto repair_abort;
7970                                         fixed = 1;
7971                                 }
7972                         }
7973                         err = 1;
7974                         cur_err = 1;
7975                 }
7976                 if (all_backpointers_checked(rec, 1)) {
7977                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7978                                 (unsigned long long)rec->start,
7979                                 (unsigned long long)rec->nr);
7980
7981                         if (!fixed && !recorded && repair) {
7982                                 ret = fixup_extent_refs(root->fs_info,
7983                                                         extent_cache, rec);
7984                                 if (ret)
7985                                         goto repair_abort;
7986                                 fixed = 1;
7987                         }
7988                         cur_err = 1;
7989                         err = 1;
7990                 }
7991                 if (!rec->owner_ref_checked) {
7992                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7993                                 (unsigned long long)rec->start,
7994                                 (unsigned long long)rec->nr);
7995                         if (!fixed && !recorded && repair) {
7996                                 ret = fixup_extent_refs(root->fs_info,
7997                                                         extent_cache, rec);
7998                                 if (ret)
7999                                         goto repair_abort;
8000                                 fixed = 1;
8001                         }
8002                         err = 1;
8003                         cur_err = 1;
8004                 }
8005                 if (rec->bad_full_backref) {
8006                         fprintf(stderr, "bad full backref, on [%llu]\n",
8007                                 (unsigned long long)rec->start);
8008                         if (repair) {
8009                                 ret = fixup_extent_flags(root->fs_info, rec);
8010                                 if (ret)
8011                                         goto repair_abort;
8012                                 fixed = 1;
8013                         }
8014                         err = 1;
8015                         cur_err = 1;
8016                 }
8017                 /*
8018                  * Although it's not a extent ref's problem, we reuse this
8019                  * routine for error reporting.
8020                  * No repair function yet.
8021                  */
8022                 if (rec->crossing_stripes) {
8023                         fprintf(stderr,
8024                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8025                                 rec->start, rec->start + rec->max_size);
8026                         err = 1;
8027                         cur_err = 1;
8028                 }
8029
8030                 if (rec->wrong_chunk_type) {
8031                         fprintf(stderr,
8032                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8033                                 rec->start, rec->start + rec->max_size);
8034                         err = 1;
8035                         cur_err = 1;
8036                 }
8037
8038                 remove_cache_extent(extent_cache, cache);
8039                 free_all_extent_backrefs(rec);
8040                 if (!init_extent_tree && repair && (!cur_err || fixed))
8041                         clear_extent_dirty(root->fs_info->excluded_extents,
8042                                            rec->start,
8043                                            rec->start + rec->max_size - 1,
8044                                            GFP_NOFS);
8045                 free(rec);
8046         }
8047 repair_abort:
8048         if (repair) {
8049                 if (ret && ret != -EAGAIN) {
8050                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8051                         exit(1);
8052                 } else if (!ret) {
8053                         struct btrfs_trans_handle *trans;
8054
8055                         root = root->fs_info->extent_root;
8056                         trans = btrfs_start_transaction(root, 1);
8057                         if (IS_ERR(trans)) {
8058                                 ret = PTR_ERR(trans);
8059                                 goto repair_abort;
8060                         }
8061
8062                         btrfs_fix_block_accounting(trans, root);
8063                         ret = btrfs_commit_transaction(trans, root);
8064                         if (ret)
8065                                 goto repair_abort;
8066                 }
8067                 if (err)
8068                         fprintf(stderr, "repaired damaged extent references\n");
8069                 return ret;
8070         }
8071         return err;
8072 }
8073
8074 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8075 {
8076         u64 stripe_size;
8077
8078         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8079                 stripe_size = length;
8080                 stripe_size /= num_stripes;
8081         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8082                 stripe_size = length * 2;
8083                 stripe_size /= num_stripes;
8084         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8085                 stripe_size = length;
8086                 stripe_size /= (num_stripes - 1);
8087         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8088                 stripe_size = length;
8089                 stripe_size /= (num_stripes - 2);
8090         } else {
8091                 stripe_size = length;
8092         }
8093         return stripe_size;
8094 }
8095
8096 /*
8097  * Check the chunk with its block group/dev list ref:
8098  * Return 0 if all refs seems valid.
8099  * Return 1 if part of refs seems valid, need later check for rebuild ref
8100  * like missing block group and needs to search extent tree to rebuild them.
8101  * Return -1 if essential refs are missing and unable to rebuild.
8102  */
8103 static int check_chunk_refs(struct chunk_record *chunk_rec,
8104                             struct block_group_tree *block_group_cache,
8105                             struct device_extent_tree *dev_extent_cache,
8106                             int silent)
8107 {
8108         struct cache_extent *block_group_item;
8109         struct block_group_record *block_group_rec;
8110         struct cache_extent *dev_extent_item;
8111         struct device_extent_record *dev_extent_rec;
8112         u64 devid;
8113         u64 offset;
8114         u64 length;
8115         int metadump_v2 = 0;
8116         int i;
8117         int ret = 0;
8118
8119         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8120                                                chunk_rec->offset,
8121                                                chunk_rec->length);
8122         if (block_group_item) {
8123                 block_group_rec = container_of(block_group_item,
8124                                                struct block_group_record,
8125                                                cache);
8126                 if (chunk_rec->length != block_group_rec->offset ||
8127                     chunk_rec->offset != block_group_rec->objectid ||
8128                     (!metadump_v2 &&
8129                      chunk_rec->type_flags != block_group_rec->flags)) {
8130                         if (!silent)
8131                                 fprintf(stderr,
8132                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8133                                         chunk_rec->objectid,
8134                                         chunk_rec->type,
8135                                         chunk_rec->offset,
8136                                         chunk_rec->length,
8137                                         chunk_rec->offset,
8138                                         chunk_rec->type_flags,
8139                                         block_group_rec->objectid,
8140                                         block_group_rec->type,
8141                                         block_group_rec->offset,
8142                                         block_group_rec->offset,
8143                                         block_group_rec->objectid,
8144                                         block_group_rec->flags);
8145                         ret = -1;
8146                 } else {
8147                         list_del_init(&block_group_rec->list);
8148                         chunk_rec->bg_rec = block_group_rec;
8149                 }
8150         } else {
8151                 if (!silent)
8152                         fprintf(stderr,
8153                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8154                                 chunk_rec->objectid,
8155                                 chunk_rec->type,
8156                                 chunk_rec->offset,
8157                                 chunk_rec->length,
8158                                 chunk_rec->offset,
8159                                 chunk_rec->type_flags);
8160                 ret = 1;
8161         }
8162
8163         if (metadump_v2)
8164                 return ret;
8165
8166         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8167                                     chunk_rec->num_stripes);
8168         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8169                 devid = chunk_rec->stripes[i].devid;
8170                 offset = chunk_rec->stripes[i].offset;
8171                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8172                                                        devid, offset, length);
8173                 if (dev_extent_item) {
8174                         dev_extent_rec = container_of(dev_extent_item,
8175                                                 struct device_extent_record,
8176                                                 cache);
8177                         if (dev_extent_rec->objectid != devid ||
8178                             dev_extent_rec->offset != offset ||
8179                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8180                             dev_extent_rec->length != length) {
8181                                 if (!silent)
8182                                         fprintf(stderr,
8183                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8184                                                 chunk_rec->objectid,
8185                                                 chunk_rec->type,
8186                                                 chunk_rec->offset,
8187                                                 chunk_rec->stripes[i].devid,
8188                                                 chunk_rec->stripes[i].offset,
8189                                                 dev_extent_rec->objectid,
8190                                                 dev_extent_rec->offset,
8191                                                 dev_extent_rec->length);
8192                                 ret = -1;
8193                         } else {
8194                                 list_move(&dev_extent_rec->chunk_list,
8195                                           &chunk_rec->dextents);
8196                         }
8197                 } else {
8198                         if (!silent)
8199                                 fprintf(stderr,
8200                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8201                                         chunk_rec->objectid,
8202                                         chunk_rec->type,
8203                                         chunk_rec->offset,
8204                                         chunk_rec->stripes[i].devid,
8205                                         chunk_rec->stripes[i].offset);
8206                         ret = -1;
8207                 }
8208         }
8209         return ret;
8210 }
8211
8212 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8213 int check_chunks(struct cache_tree *chunk_cache,
8214                  struct block_group_tree *block_group_cache,
8215                  struct device_extent_tree *dev_extent_cache,
8216                  struct list_head *good, struct list_head *bad,
8217                  struct list_head *rebuild, int silent)
8218 {
8219         struct cache_extent *chunk_item;
8220         struct chunk_record *chunk_rec;
8221         struct block_group_record *bg_rec;
8222         struct device_extent_record *dext_rec;
8223         int err;
8224         int ret = 0;
8225
8226         chunk_item = first_cache_extent(chunk_cache);
8227         while (chunk_item) {
8228                 chunk_rec = container_of(chunk_item, struct chunk_record,
8229                                          cache);
8230                 err = check_chunk_refs(chunk_rec, block_group_cache,
8231                                        dev_extent_cache, silent);
8232                 if (err < 0)
8233                         ret = err;
8234                 if (err == 0 && good)
8235                         list_add_tail(&chunk_rec->list, good);
8236                 if (err > 0 && rebuild)
8237                         list_add_tail(&chunk_rec->list, rebuild);
8238                 if (err < 0 && bad)
8239                         list_add_tail(&chunk_rec->list, bad);
8240                 chunk_item = next_cache_extent(chunk_item);
8241         }
8242
8243         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8244                 if (!silent)
8245                         fprintf(stderr,
8246                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8247                                 bg_rec->objectid,
8248                                 bg_rec->offset,
8249                                 bg_rec->flags);
8250                 if (!ret)
8251                         ret = 1;
8252         }
8253
8254         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8255                             chunk_list) {
8256                 if (!silent)
8257                         fprintf(stderr,
8258                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8259                                 dext_rec->objectid,
8260                                 dext_rec->offset,
8261                                 dext_rec->length);
8262                 if (!ret)
8263                         ret = 1;
8264         }
8265         return ret;
8266 }
8267
8268
8269 static int check_device_used(struct device_record *dev_rec,
8270                              struct device_extent_tree *dext_cache)
8271 {
8272         struct cache_extent *cache;
8273         struct device_extent_record *dev_extent_rec;
8274         u64 total_byte = 0;
8275
8276         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8277         while (cache) {
8278                 dev_extent_rec = container_of(cache,
8279                                               struct device_extent_record,
8280                                               cache);
8281                 if (dev_extent_rec->objectid != dev_rec->devid)
8282                         break;
8283
8284                 list_del_init(&dev_extent_rec->device_list);
8285                 total_byte += dev_extent_rec->length;
8286                 cache = next_cache_extent(cache);
8287         }
8288
8289         if (total_byte != dev_rec->byte_used) {
8290                 fprintf(stderr,
8291                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8292                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8293                         dev_rec->type, dev_rec->offset);
8294                 return -1;
8295         } else {
8296                 return 0;
8297         }
8298 }
8299
8300 /* check btrfs_dev_item -> btrfs_dev_extent */
8301 static int check_devices(struct rb_root *dev_cache,
8302                          struct device_extent_tree *dev_extent_cache)
8303 {
8304         struct rb_node *dev_node;
8305         struct device_record *dev_rec;
8306         struct device_extent_record *dext_rec;
8307         int err;
8308         int ret = 0;
8309
8310         dev_node = rb_first(dev_cache);
8311         while (dev_node) {
8312                 dev_rec = container_of(dev_node, struct device_record, node);
8313                 err = check_device_used(dev_rec, dev_extent_cache);
8314                 if (err)
8315                         ret = err;
8316
8317                 dev_node = rb_next(dev_node);
8318         }
8319         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8320                             device_list) {
8321                 fprintf(stderr,
8322                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8323                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8324                 if (!ret)
8325                         ret = 1;
8326         }
8327         return ret;
8328 }
8329
8330 static int add_root_item_to_list(struct list_head *head,
8331                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8332                                   u8 level, u8 drop_level,
8333                                   int level_size, struct btrfs_key *drop_key)
8334 {
8335
8336         struct root_item_record *ri_rec;
8337         ri_rec = malloc(sizeof(*ri_rec));
8338         if (!ri_rec)
8339                 return -ENOMEM;
8340         ri_rec->bytenr = bytenr;
8341         ri_rec->objectid = objectid;
8342         ri_rec->level = level;
8343         ri_rec->level_size = level_size;
8344         ri_rec->drop_level = drop_level;
8345         ri_rec->last_snapshot = last_snapshot;
8346         if (drop_key)
8347                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8348         list_add_tail(&ri_rec->list, head);
8349
8350         return 0;
8351 }
8352
8353 static void free_root_item_list(struct list_head *list)
8354 {
8355         struct root_item_record *ri_rec;
8356
8357         while (!list_empty(list)) {
8358                 ri_rec = list_first_entry(list, struct root_item_record,
8359                                           list);
8360                 list_del_init(&ri_rec->list);
8361                 free(ri_rec);
8362         }
8363 }
8364
8365 static int deal_root_from_list(struct list_head *list,
8366                                struct btrfs_root *root,
8367                                struct block_info *bits,
8368                                int bits_nr,
8369                                struct cache_tree *pending,
8370                                struct cache_tree *seen,
8371                                struct cache_tree *reada,
8372                                struct cache_tree *nodes,
8373                                struct cache_tree *extent_cache,
8374                                struct cache_tree *chunk_cache,
8375                                struct rb_root *dev_cache,
8376                                struct block_group_tree *block_group_cache,
8377                                struct device_extent_tree *dev_extent_cache)
8378 {
8379         int ret = 0;
8380         u64 last;
8381
8382         while (!list_empty(list)) {
8383                 struct root_item_record *rec;
8384                 struct extent_buffer *buf;
8385                 rec = list_entry(list->next,
8386                                  struct root_item_record, list);
8387                 last = 0;
8388                 buf = read_tree_block(root->fs_info->tree_root,
8389                                       rec->bytenr, rec->level_size, 0);
8390                 if (!extent_buffer_uptodate(buf)) {
8391                         free_extent_buffer(buf);
8392                         ret = -EIO;
8393                         break;
8394                 }
8395                 ret = add_root_to_pending(buf, extent_cache, pending,
8396                                     seen, nodes, rec->objectid);
8397                 if (ret < 0)
8398                         break;
8399                 /*
8400                  * To rebuild extent tree, we need deal with snapshot
8401                  * one by one, otherwise we deal with node firstly which
8402                  * can maximize readahead.
8403                  */
8404                 while (1) {
8405                         ret = run_next_block(root, bits, bits_nr, &last,
8406                                              pending, seen, reada, nodes,
8407                                              extent_cache, chunk_cache,
8408                                              dev_cache, block_group_cache,
8409                                              dev_extent_cache, rec);
8410                         if (ret != 0)
8411                                 break;
8412                 }
8413                 free_extent_buffer(buf);
8414                 list_del(&rec->list);
8415                 free(rec);
8416                 if (ret < 0)
8417                         break;
8418         }
8419         while (ret >= 0) {
8420                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8421                                      reada, nodes, extent_cache, chunk_cache,
8422                                      dev_cache, block_group_cache,
8423                                      dev_extent_cache, NULL);
8424                 if (ret != 0) {
8425                         if (ret > 0)
8426                                 ret = 0;
8427                         break;
8428                 }
8429         }
8430         return ret;
8431 }
8432
8433 static int check_chunks_and_extents(struct btrfs_root *root)
8434 {
8435         struct rb_root dev_cache;
8436         struct cache_tree chunk_cache;
8437         struct block_group_tree block_group_cache;
8438         struct device_extent_tree dev_extent_cache;
8439         struct cache_tree extent_cache;
8440         struct cache_tree seen;
8441         struct cache_tree pending;
8442         struct cache_tree reada;
8443         struct cache_tree nodes;
8444         struct extent_io_tree excluded_extents;
8445         struct cache_tree corrupt_blocks;
8446         struct btrfs_path path;
8447         struct btrfs_key key;
8448         struct btrfs_key found_key;
8449         int ret, err = 0;
8450         struct block_info *bits;
8451         int bits_nr;
8452         struct extent_buffer *leaf;
8453         int slot;
8454         struct btrfs_root_item ri;
8455         struct list_head dropping_trees;
8456         struct list_head normal_trees;
8457         struct btrfs_root *root1;
8458         u64 objectid;
8459         u32 level_size;
8460         u8 level;
8461
8462         dev_cache = RB_ROOT;
8463         cache_tree_init(&chunk_cache);
8464         block_group_tree_init(&block_group_cache);
8465         device_extent_tree_init(&dev_extent_cache);
8466
8467         cache_tree_init(&extent_cache);
8468         cache_tree_init(&seen);
8469         cache_tree_init(&pending);
8470         cache_tree_init(&nodes);
8471         cache_tree_init(&reada);
8472         cache_tree_init(&corrupt_blocks);
8473         extent_io_tree_init(&excluded_extents);
8474         INIT_LIST_HEAD(&dropping_trees);
8475         INIT_LIST_HEAD(&normal_trees);
8476
8477         if (repair) {
8478                 root->fs_info->excluded_extents = &excluded_extents;
8479                 root->fs_info->fsck_extent_cache = &extent_cache;
8480                 root->fs_info->free_extent_hook = free_extent_hook;
8481                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8482         }
8483
8484         bits_nr = 1024;
8485         bits = malloc(bits_nr * sizeof(struct block_info));
8486         if (!bits) {
8487                 perror("malloc");
8488                 exit(1);
8489         }
8490
8491         if (ctx.progress_enabled) {
8492                 ctx.tp = TASK_EXTENTS;
8493                 task_start(ctx.info);
8494         }
8495
8496 again:
8497         root1 = root->fs_info->tree_root;
8498         level = btrfs_header_level(root1->node);
8499         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8500                                     root1->node->start, 0, level, 0,
8501                                     root1->nodesize, NULL);
8502         if (ret < 0)
8503                 goto out;
8504         root1 = root->fs_info->chunk_root;
8505         level = btrfs_header_level(root1->node);
8506         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8507                                     root1->node->start, 0, level, 0,
8508                                     root1->nodesize, NULL);
8509         if (ret < 0)
8510                 goto out;
8511         btrfs_init_path(&path);
8512         key.offset = 0;
8513         key.objectid = 0;
8514         key.type = BTRFS_ROOT_ITEM_KEY;
8515         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8516                                         &key, &path, 0, 0);
8517         if (ret < 0)
8518                 goto out;
8519         while(1) {
8520                 leaf = path.nodes[0];
8521                 slot = path.slots[0];
8522                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8523                         ret = btrfs_next_leaf(root, &path);
8524                         if (ret != 0)
8525                                 break;
8526                         leaf = path.nodes[0];
8527                         slot = path.slots[0];
8528                 }
8529                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8530                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8531                         unsigned long offset;
8532                         u64 last_snapshot;
8533
8534                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8535                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8536                         last_snapshot = btrfs_root_last_snapshot(&ri);
8537                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8538                                 level = btrfs_root_level(&ri);
8539                                 level_size = root->nodesize;
8540                                 ret = add_root_item_to_list(&normal_trees,
8541                                                 found_key.objectid,
8542                                                 btrfs_root_bytenr(&ri),
8543                                                 last_snapshot, level,
8544                                                 0, level_size, NULL);
8545                                 if (ret < 0)
8546                                         goto out;
8547                         } else {
8548                                 level = btrfs_root_level(&ri);
8549                                 level_size = root->nodesize;
8550                                 objectid = found_key.objectid;
8551                                 btrfs_disk_key_to_cpu(&found_key,
8552                                                       &ri.drop_progress);
8553                                 ret = add_root_item_to_list(&dropping_trees,
8554                                                 objectid,
8555                                                 btrfs_root_bytenr(&ri),
8556                                                 last_snapshot, level,
8557                                                 ri.drop_level,
8558                                                 level_size, &found_key);
8559                                 if (ret < 0)
8560                                         goto out;
8561                         }
8562                 }
8563                 path.slots[0]++;
8564         }
8565         btrfs_release_path(&path);
8566
8567         /*
8568          * check_block can return -EAGAIN if it fixes something, please keep
8569          * this in mind when dealing with return values from these functions, if
8570          * we get -EAGAIN we want to fall through and restart the loop.
8571          */
8572         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8573                                   &seen, &reada, &nodes, &extent_cache,
8574                                   &chunk_cache, &dev_cache, &block_group_cache,
8575                                   &dev_extent_cache);
8576         if (ret < 0) {
8577                 if (ret == -EAGAIN)
8578                         goto loop;
8579                 goto out;
8580         }
8581         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8582                                   &pending, &seen, &reada, &nodes,
8583                                   &extent_cache, &chunk_cache, &dev_cache,
8584                                   &block_group_cache, &dev_extent_cache);
8585         if (ret < 0) {
8586                 if (ret == -EAGAIN)
8587                         goto loop;
8588                 goto out;
8589         }
8590
8591         ret = check_chunks(&chunk_cache, &block_group_cache,
8592                            &dev_extent_cache, NULL, NULL, NULL, 0);
8593         if (ret) {
8594                 if (ret == -EAGAIN)
8595                         goto loop;
8596                 err = ret;
8597         }
8598
8599         ret = check_extent_refs(root, &extent_cache);
8600         if (ret < 0) {
8601                 if (ret == -EAGAIN)
8602                         goto loop;
8603                 goto out;
8604         }
8605
8606         ret = check_devices(&dev_cache, &dev_extent_cache);
8607         if (ret && err)
8608                 ret = err;
8609
8610 out:
8611         task_stop(ctx.info);
8612         if (repair) {
8613                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8614                 extent_io_tree_cleanup(&excluded_extents);
8615                 root->fs_info->fsck_extent_cache = NULL;
8616                 root->fs_info->free_extent_hook = NULL;
8617                 root->fs_info->corrupt_blocks = NULL;
8618                 root->fs_info->excluded_extents = NULL;
8619         }
8620         free(bits);
8621         free_chunk_cache_tree(&chunk_cache);
8622         free_device_cache_tree(&dev_cache);
8623         free_block_group_tree(&block_group_cache);
8624         free_device_extent_tree(&dev_extent_cache);
8625         free_extent_cache_tree(&seen);
8626         free_extent_cache_tree(&pending);
8627         free_extent_cache_tree(&reada);
8628         free_extent_cache_tree(&nodes);
8629         return ret;
8630 loop:
8631         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8632         free_extent_cache_tree(&seen);
8633         free_extent_cache_tree(&pending);
8634         free_extent_cache_tree(&reada);
8635         free_extent_cache_tree(&nodes);
8636         free_chunk_cache_tree(&chunk_cache);
8637         free_block_group_tree(&block_group_cache);
8638         free_device_cache_tree(&dev_cache);
8639         free_device_extent_tree(&dev_extent_cache);
8640         free_extent_record_cache(root->fs_info, &extent_cache);
8641         free_root_item_list(&normal_trees);
8642         free_root_item_list(&dropping_trees);
8643         extent_io_tree_cleanup(&excluded_extents);
8644         goto again;
8645 }
8646
8647 /*
8648  * Check backrefs of a tree block given by @bytenr or @eb.
8649  *
8650  * @root:       the root containing the @bytenr or @eb
8651  * @eb:         tree block extent buffer, can be NULL
8652  * @bytenr:     bytenr of the tree block to search
8653  * @level:      tree level of the tree block
8654  * @owner:      owner of the tree block
8655  *
8656  * Return >0 for any error found and output error message
8657  * Return 0 for no error found
8658  */
8659 static int check_tree_block_ref(struct btrfs_root *root,
8660                                 struct extent_buffer *eb, u64 bytenr,
8661                                 int level, u64 owner)
8662 {
8663         struct btrfs_key key;
8664         struct btrfs_root *extent_root = root->fs_info->extent_root;
8665         struct btrfs_path path;
8666         struct btrfs_extent_item *ei;
8667         struct btrfs_extent_inline_ref *iref;
8668         struct extent_buffer *leaf;
8669         unsigned long end;
8670         unsigned long ptr;
8671         int slot;
8672         int skinny_level;
8673         int type;
8674         u32 nodesize = root->nodesize;
8675         u32 item_size;
8676         u64 offset;
8677         int found_ref = 0;
8678         int err = 0;
8679         int ret;
8680
8681         btrfs_init_path(&path);
8682         key.objectid = bytenr;
8683         if (btrfs_fs_incompat(root->fs_info,
8684                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8685                 key.type = BTRFS_METADATA_ITEM_KEY;
8686         else
8687                 key.type = BTRFS_EXTENT_ITEM_KEY;
8688         key.offset = (u64)-1;
8689
8690         /* Search for the backref in extent tree */
8691         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8692         if (ret < 0) {
8693                 err |= BACKREF_MISSING;
8694                 goto out;
8695         }
8696         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8697         if (ret) {
8698                 err |= BACKREF_MISSING;
8699                 goto out;
8700         }
8701
8702         leaf = path.nodes[0];
8703         slot = path.slots[0];
8704         btrfs_item_key_to_cpu(leaf, &key, slot);
8705
8706         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8707
8708         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8709                 skinny_level = (int)key.offset;
8710                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8711         } else {
8712                 struct btrfs_tree_block_info *info;
8713
8714                 info = (struct btrfs_tree_block_info *)(ei + 1);
8715                 skinny_level = btrfs_tree_block_level(leaf, info);
8716                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8717         }
8718
8719         if (eb) {
8720                 u64 header_gen;
8721                 u64 extent_gen;
8722
8723                 if (!(btrfs_extent_flags(leaf, ei) &
8724                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8725                         error(
8726                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8727                                 key.objectid, nodesize,
8728                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8729                         err = BACKREF_MISMATCH;
8730                 }
8731                 header_gen = btrfs_header_generation(eb);
8732                 extent_gen = btrfs_extent_generation(leaf, ei);
8733                 if (header_gen != extent_gen) {
8734                         error(
8735         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8736                                 key.objectid, nodesize, header_gen,
8737                                 extent_gen);
8738                         err = BACKREF_MISMATCH;
8739                 }
8740                 if (level != skinny_level) {
8741                         error(
8742                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8743                                 key.objectid, nodesize, level, skinny_level);
8744                         err = BACKREF_MISMATCH;
8745                 }
8746                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8747                         error(
8748                         "extent[%llu %u] is referred by other roots than %llu",
8749                                 key.objectid, nodesize, root->objectid);
8750                         err = BACKREF_MISMATCH;
8751                 }
8752         }
8753
8754         /*
8755          * Iterate the extent/metadata item to find the exact backref
8756          */
8757         item_size = btrfs_item_size_nr(leaf, slot);
8758         ptr = (unsigned long)iref;
8759         end = (unsigned long)ei + item_size;
8760         while (ptr < end) {
8761                 iref = (struct btrfs_extent_inline_ref *)ptr;
8762                 type = btrfs_extent_inline_ref_type(leaf, iref);
8763                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8764
8765                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8766                         (offset == root->objectid || offset == owner)) {
8767                         found_ref = 1;
8768                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8769                         /* Check if the backref points to valid referencer */
8770                         found_ref = !check_tree_block_ref(root, NULL, offset,
8771                                                           level + 1, owner);
8772                 }
8773
8774                 if (found_ref)
8775                         break;
8776                 ptr += btrfs_extent_inline_ref_size(type);
8777         }
8778
8779         /*
8780          * Inlined extent item doesn't have what we need, check
8781          * TREE_BLOCK_REF_KEY
8782          */
8783         if (!found_ref) {
8784                 btrfs_release_path(&path);
8785                 key.objectid = bytenr;
8786                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8787                 key.offset = root->objectid;
8788
8789                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8790                 if (!ret)
8791                         found_ref = 1;
8792         }
8793         if (!found_ref)
8794                 err |= BACKREF_MISSING;
8795 out:
8796         btrfs_release_path(&path);
8797         if (eb && (err & BACKREF_MISSING))
8798                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8799                         bytenr, nodesize, owner, level);
8800         return err;
8801 }
8802
8803 /*
8804  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8805  *
8806  * Return >0 any error found and output error message
8807  * Return 0 for no error found
8808  */
8809 static int check_extent_data_item(struct btrfs_root *root,
8810                                   struct extent_buffer *eb, int slot)
8811 {
8812         struct btrfs_file_extent_item *fi;
8813         struct btrfs_path path;
8814         struct btrfs_root *extent_root = root->fs_info->extent_root;
8815         struct btrfs_key fi_key;
8816         struct btrfs_key dbref_key;
8817         struct extent_buffer *leaf;
8818         struct btrfs_extent_item *ei;
8819         struct btrfs_extent_inline_ref *iref;
8820         struct btrfs_extent_data_ref *dref;
8821         u64 owner;
8822         u64 file_extent_gen;
8823         u64 disk_bytenr;
8824         u64 disk_num_bytes;
8825         u64 extent_num_bytes;
8826         u64 extent_flags;
8827         u64 extent_gen;
8828         u32 item_size;
8829         unsigned long end;
8830         unsigned long ptr;
8831         int type;
8832         u64 ref_root;
8833         int found_dbackref = 0;
8834         int err = 0;
8835         int ret;
8836
8837         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8838         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8839         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8840
8841         /* Nothing to check for hole and inline data extents */
8842         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8843             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8844                 return 0;
8845
8846         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8847         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8848         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8849
8850         /* Check unaligned disk_num_bytes and num_bytes */
8851         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8852                 error(
8853 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8854                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8855                         root->sectorsize);
8856                 err |= BYTES_UNALIGNED;
8857         } else {
8858                 data_bytes_allocated += disk_num_bytes;
8859         }
8860         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8861                 error(
8862 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8863                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8864                         root->sectorsize);
8865                 err |= BYTES_UNALIGNED;
8866         } else {
8867                 data_bytes_referenced += extent_num_bytes;
8868         }
8869         owner = btrfs_header_owner(eb);
8870
8871         /* Check the extent item of the file extent in extent tree */
8872         btrfs_init_path(&path);
8873         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8874         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8875         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8876
8877         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8878         if (ret) {
8879                 err |= BACKREF_MISSING;
8880                 goto error;
8881         }
8882
8883         leaf = path.nodes[0];
8884         slot = path.slots[0];
8885         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8886
8887         extent_flags = btrfs_extent_flags(leaf, ei);
8888         extent_gen = btrfs_extent_generation(leaf, ei);
8889
8890         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8891                 error(
8892                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8893                     disk_bytenr, disk_num_bytes,
8894                     BTRFS_EXTENT_FLAG_DATA);
8895                 err |= BACKREF_MISMATCH;
8896         }
8897
8898         if (file_extent_gen < extent_gen) {
8899                 error(
8900 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8901                         disk_bytenr, disk_num_bytes, file_extent_gen,
8902                         extent_gen);
8903                 err |= BACKREF_MISMATCH;
8904         }
8905
8906         /* Check data backref inside that extent item */
8907         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8908         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8909         ptr = (unsigned long)iref;
8910         end = (unsigned long)ei + item_size;
8911         while (ptr < end) {
8912                 iref = (struct btrfs_extent_inline_ref *)ptr;
8913                 type = btrfs_extent_inline_ref_type(leaf, iref);
8914                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8915
8916                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8917                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8918                         if (ref_root == owner || ref_root == root->objectid)
8919                                 found_dbackref = 1;
8920                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8921                         found_dbackref = !check_tree_block_ref(root, NULL,
8922                                 btrfs_extent_inline_ref_offset(leaf, iref),
8923                                 0, owner);
8924                 }
8925
8926                 if (found_dbackref)
8927                         break;
8928                 ptr += btrfs_extent_inline_ref_size(type);
8929         }
8930
8931         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8932         if (!found_dbackref) {
8933                 btrfs_release_path(&path);
8934
8935                 btrfs_init_path(&path);
8936                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8937                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8938                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8939                                 fi_key.objectid, fi_key.offset);
8940
8941                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8942                                         &dbref_key, &path, 0, 0);
8943                 if (!ret)
8944                         found_dbackref = 1;
8945         }
8946
8947         if (!found_dbackref)
8948                 err |= BACKREF_MISSING;
8949 error:
8950         btrfs_release_path(&path);
8951         if (err & BACKREF_MISSING) {
8952                 error("data extent[%llu %llu] backref lost",
8953                       disk_bytenr, disk_num_bytes);
8954         }
8955         return err;
8956 }
8957
8958 /*
8959  * Get real tree block level for the case like shared block
8960  * Return >= 0 as tree level
8961  * Return <0 for error
8962  */
8963 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8964 {
8965         struct extent_buffer *eb;
8966         struct btrfs_path path;
8967         struct btrfs_key key;
8968         struct btrfs_extent_item *ei;
8969         u64 flags;
8970         u64 transid;
8971         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8972         u8 backref_level;
8973         u8 header_level;
8974         int ret;
8975
8976         /* Search extent tree for extent generation and level */
8977         key.objectid = bytenr;
8978         key.type = BTRFS_METADATA_ITEM_KEY;
8979         key.offset = (u64)-1;
8980
8981         btrfs_init_path(&path);
8982         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8983         if (ret < 0)
8984                 goto release_out;
8985         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8986         if (ret < 0)
8987                 goto release_out;
8988         if (ret > 0) {
8989                 ret = -ENOENT;
8990                 goto release_out;
8991         }
8992
8993         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8994         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8995                             struct btrfs_extent_item);
8996         flags = btrfs_extent_flags(path.nodes[0], ei);
8997         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8998                 ret = -ENOENT;
8999                 goto release_out;
9000         }
9001
9002         /* Get transid for later read_tree_block() check */
9003         transid = btrfs_extent_generation(path.nodes[0], ei);
9004
9005         /* Get backref level as one source */
9006         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9007                 backref_level = key.offset;
9008         } else {
9009                 struct btrfs_tree_block_info *info;
9010
9011                 info = (struct btrfs_tree_block_info *)(ei + 1);
9012                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9013         }
9014         btrfs_release_path(&path);
9015
9016         /* Get level from tree block as an alternative source */
9017         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9018         if (!extent_buffer_uptodate(eb)) {
9019                 free_extent_buffer(eb);
9020                 return -EIO;
9021         }
9022         header_level = btrfs_header_level(eb);
9023         free_extent_buffer(eb);
9024
9025         if (header_level != backref_level)
9026                 return -EIO;
9027         return header_level;
9028
9029 release_out:
9030         btrfs_release_path(&path);
9031         return ret;
9032 }
9033
9034 /*
9035  * Check if a tree block backref is valid (points to a valid tree block)
9036  * if level == -1, level will be resolved
9037  * Return >0 for any error found and print error message
9038  */
9039 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9040                                     u64 bytenr, int level)
9041 {
9042         struct btrfs_root *root;
9043         struct btrfs_key key;
9044         struct btrfs_path path;
9045         struct extent_buffer *eb;
9046         struct extent_buffer *node;
9047         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9048         int err = 0;
9049         int ret;
9050
9051         /* Query level for level == -1 special case */
9052         if (level == -1)
9053                 level = query_tree_block_level(fs_info, bytenr);
9054         if (level < 0) {
9055                 err |= REFERENCER_MISSING;
9056                 goto out;
9057         }
9058
9059         key.objectid = root_id;
9060         key.type = BTRFS_ROOT_ITEM_KEY;
9061         key.offset = (u64)-1;
9062
9063         root = btrfs_read_fs_root(fs_info, &key);
9064         if (IS_ERR(root)) {
9065                 err |= REFERENCER_MISSING;
9066                 goto out;
9067         }
9068
9069         /* Read out the tree block to get item/node key */
9070         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9071         if (!extent_buffer_uptodate(eb)) {
9072                 err |= REFERENCER_MISSING;
9073                 free_extent_buffer(eb);
9074                 goto out;
9075         }
9076
9077         /* Empty tree, no need to check key */
9078         if (!btrfs_header_nritems(eb) && !level) {
9079                 free_extent_buffer(eb);
9080                 goto out;
9081         }
9082
9083         if (level)
9084                 btrfs_node_key_to_cpu(eb, &key, 0);
9085         else
9086                 btrfs_item_key_to_cpu(eb, &key, 0);
9087
9088         free_extent_buffer(eb);
9089
9090         btrfs_init_path(&path);
9091         path.lowest_level = level;
9092         /* Search with the first key, to ensure we can reach it */
9093         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9094         if (ret < 0) {
9095                 err |= REFERENCER_MISSING;
9096                 goto release_out;
9097         }
9098
9099         node = path.nodes[level];
9100         if (btrfs_header_bytenr(node) != bytenr) {
9101                 error(
9102         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9103                         bytenr, nodesize, bytenr,
9104                         btrfs_header_bytenr(node));
9105                 err |= REFERENCER_MISMATCH;
9106         }
9107         if (btrfs_header_level(node) != level) {
9108                 error(
9109         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9110                         bytenr, nodesize, level,
9111                         btrfs_header_level(node));
9112                 err |= REFERENCER_MISMATCH;
9113         }
9114
9115 release_out:
9116         btrfs_release_path(&path);
9117 out:
9118         if (err & REFERENCER_MISSING) {
9119                 if (level < 0)
9120                         error("extent [%llu %d] lost referencer (owner: %llu)",
9121                                 bytenr, nodesize, root_id);
9122                 else
9123                         error(
9124                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9125                                 bytenr, nodesize, root_id, level);
9126         }
9127
9128         return err;
9129 }
9130
9131 /*
9132  * Check referencer for shared block backref
9133  * If level == -1, this function will resolve the level.
9134  */
9135 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9136                                      u64 parent, u64 bytenr, int level)
9137 {
9138         struct extent_buffer *eb;
9139         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9140         u32 nr;
9141         int found_parent = 0;
9142         int i;
9143
9144         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9145         if (!extent_buffer_uptodate(eb))
9146                 goto out;
9147
9148         if (level == -1)
9149                 level = query_tree_block_level(fs_info, bytenr);
9150         if (level < 0)
9151                 goto out;
9152
9153         if (level + 1 != btrfs_header_level(eb))
9154                 goto out;
9155
9156         nr = btrfs_header_nritems(eb);
9157         for (i = 0; i < nr; i++) {
9158                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9159                         found_parent = 1;
9160                         break;
9161                 }
9162         }
9163 out:
9164         free_extent_buffer(eb);
9165         if (!found_parent) {
9166                 error(
9167         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9168                         bytenr, nodesize, parent, level);
9169                 return REFERENCER_MISSING;
9170         }
9171         return 0;
9172 }
9173
9174 /*
9175  * Check referencer for normal (inlined) data ref
9176  * If len == 0, it will be resolved by searching in extent tree
9177  */
9178 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9179                                      u64 root_id, u64 objectid, u64 offset,
9180                                      u64 bytenr, u64 len, u32 count)
9181 {
9182         struct btrfs_root *root;
9183         struct btrfs_root *extent_root = fs_info->extent_root;
9184         struct btrfs_key key;
9185         struct btrfs_path path;
9186         struct extent_buffer *leaf;
9187         struct btrfs_file_extent_item *fi;
9188         u32 found_count = 0;
9189         int slot;
9190         int ret = 0;
9191
9192         if (!len) {
9193                 key.objectid = bytenr;
9194                 key.type = BTRFS_EXTENT_ITEM_KEY;
9195                 key.offset = (u64)-1;
9196
9197                 btrfs_init_path(&path);
9198                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9199                 if (ret < 0)
9200                         goto out;
9201                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9202                 if (ret)
9203                         goto out;
9204                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9205                 if (key.objectid != bytenr ||
9206                     key.type != BTRFS_EXTENT_ITEM_KEY)
9207                         goto out;
9208                 len = key.offset;
9209                 btrfs_release_path(&path);
9210         }
9211         key.objectid = root_id;
9212         key.type = BTRFS_ROOT_ITEM_KEY;
9213         key.offset = (u64)-1;
9214         btrfs_init_path(&path);
9215
9216         root = btrfs_read_fs_root(fs_info, &key);
9217         if (IS_ERR(root))
9218                 goto out;
9219
9220         key.objectid = objectid;
9221         key.type = BTRFS_EXTENT_DATA_KEY;
9222         /*
9223          * It can be nasty as data backref offset is
9224          * file offset - file extent offset, which is smaller or
9225          * equal to original backref offset.  The only special case is
9226          * overflow.  So we need to special check and do further search.
9227          */
9228         key.offset = offset & (1ULL << 63) ? 0 : offset;
9229
9230         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9231         if (ret < 0)
9232                 goto out;
9233
9234         /*
9235          * Search afterwards to get correct one
9236          * NOTE: As we must do a comprehensive check on the data backref to
9237          * make sure the dref count also matches, we must iterate all file
9238          * extents for that inode.
9239          */
9240         while (1) {
9241                 leaf = path.nodes[0];
9242                 slot = path.slots[0];
9243
9244                 btrfs_item_key_to_cpu(leaf, &key, slot);
9245                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9246                         break;
9247                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9248                 /*
9249                  * Except normal disk bytenr and disk num bytes, we still
9250                  * need to do extra check on dbackref offset as
9251                  * dbackref offset = file_offset - file_extent_offset
9252                  */
9253                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9254                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9255                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9256                     offset)
9257                         found_count++;
9258
9259                 ret = btrfs_next_item(root, &path);
9260                 if (ret)
9261                         break;
9262         }
9263 out:
9264         btrfs_release_path(&path);
9265         if (found_count != count) {
9266                 error(
9267 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9268                         bytenr, len, root_id, objectid, offset, count, found_count);
9269                 return REFERENCER_MISSING;
9270         }
9271         return 0;
9272 }
9273
9274 /*
9275  * Check if the referencer of a shared data backref exists
9276  */
9277 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9278                                      u64 parent, u64 bytenr)
9279 {
9280         struct extent_buffer *eb;
9281         struct btrfs_key key;
9282         struct btrfs_file_extent_item *fi;
9283         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9284         u32 nr;
9285         int found_parent = 0;
9286         int i;
9287
9288         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9289         if (!extent_buffer_uptodate(eb))
9290                 goto out;
9291
9292         nr = btrfs_header_nritems(eb);
9293         for (i = 0; i < nr; i++) {
9294                 btrfs_item_key_to_cpu(eb, &key, i);
9295                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9296                         continue;
9297
9298                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9299                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9300                         continue;
9301
9302                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9303                         found_parent = 1;
9304                         break;
9305                 }
9306         }
9307
9308 out:
9309         free_extent_buffer(eb);
9310         if (!found_parent) {
9311                 error("shared extent %llu referencer lost (parent: %llu)",
9312                         bytenr, parent);
9313                 return REFERENCER_MISSING;
9314         }
9315         return 0;
9316 }
9317
9318 /*
9319  * This function will check a given extent item, including its backref and
9320  * itself (like crossing stripe boundary and type)
9321  *
9322  * Since we don't use extent_record anymore, introduce new error bit
9323  */
9324 static int check_extent_item(struct btrfs_fs_info *fs_info,
9325                              struct extent_buffer *eb, int slot)
9326 {
9327         struct btrfs_extent_item *ei;
9328         struct btrfs_extent_inline_ref *iref;
9329         struct btrfs_extent_data_ref *dref;
9330         unsigned long end;
9331         unsigned long ptr;
9332         int type;
9333         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9334         u32 item_size = btrfs_item_size_nr(eb, slot);
9335         u64 flags;
9336         u64 offset;
9337         int metadata = 0;
9338         int level;
9339         struct btrfs_key key;
9340         int ret;
9341         int err = 0;
9342
9343         btrfs_item_key_to_cpu(eb, &key, slot);
9344         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9345                 bytes_used += key.offset;
9346         else
9347                 bytes_used += nodesize;
9348
9349         if (item_size < sizeof(*ei)) {
9350                 /*
9351                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9352                  * old thing when on disk format is still un-determined.
9353                  * No need to care about it anymore
9354                  */
9355                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9356                 return -ENOTTY;
9357         }
9358
9359         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9360         flags = btrfs_extent_flags(eb, ei);
9361
9362         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9363                 metadata = 1;
9364         if (metadata && check_crossing_stripes(global_info, key.objectid,
9365                                                eb->len)) {
9366                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9367                       key.objectid, key.objectid + nodesize);
9368                 err |= CROSSING_STRIPE_BOUNDARY;
9369         }
9370
9371         ptr = (unsigned long)(ei + 1);
9372
9373         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9374                 /* Old EXTENT_ITEM metadata */
9375                 struct btrfs_tree_block_info *info;
9376
9377                 info = (struct btrfs_tree_block_info *)ptr;
9378                 level = btrfs_tree_block_level(eb, info);
9379                 ptr += sizeof(struct btrfs_tree_block_info);
9380         } else {
9381                 /* New METADATA_ITEM */
9382                 level = key.offset;
9383         }
9384         end = (unsigned long)ei + item_size;
9385
9386         if (ptr >= end) {
9387                 err |= ITEM_SIZE_MISMATCH;
9388                 goto out;
9389         }
9390
9391         /* Now check every backref in this extent item */
9392 next:
9393         iref = (struct btrfs_extent_inline_ref *)ptr;
9394         type = btrfs_extent_inline_ref_type(eb, iref);
9395         offset = btrfs_extent_inline_ref_offset(eb, iref);
9396         switch (type) {
9397         case BTRFS_TREE_BLOCK_REF_KEY:
9398                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9399                                                level);
9400                 err |= ret;
9401                 break;
9402         case BTRFS_SHARED_BLOCK_REF_KEY:
9403                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9404                                                  level);
9405                 err |= ret;
9406                 break;
9407         case BTRFS_EXTENT_DATA_REF_KEY:
9408                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9409                 ret = check_extent_data_backref(fs_info,
9410                                 btrfs_extent_data_ref_root(eb, dref),
9411                                 btrfs_extent_data_ref_objectid(eb, dref),
9412                                 btrfs_extent_data_ref_offset(eb, dref),
9413                                 key.objectid, key.offset,
9414                                 btrfs_extent_data_ref_count(eb, dref));
9415                 err |= ret;
9416                 break;
9417         case BTRFS_SHARED_DATA_REF_KEY:
9418                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9419                 err |= ret;
9420                 break;
9421         default:
9422                 error("extent[%llu %d %llu] has unknown ref type: %d",
9423                         key.objectid, key.type, key.offset, type);
9424                 err |= UNKNOWN_TYPE;
9425                 goto out;
9426         }
9427
9428         ptr += btrfs_extent_inline_ref_size(type);
9429         if (ptr < end)
9430                 goto next;
9431
9432 out:
9433         return err;
9434 }
9435
9436 /*
9437  * Check if a dev extent item is referred correctly by its chunk
9438  */
9439 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9440                                  struct extent_buffer *eb, int slot)
9441 {
9442         struct btrfs_root *chunk_root = fs_info->chunk_root;
9443         struct btrfs_dev_extent *ptr;
9444         struct btrfs_path path;
9445         struct btrfs_key chunk_key;
9446         struct btrfs_key devext_key;
9447         struct btrfs_chunk *chunk;
9448         struct extent_buffer *l;
9449         int num_stripes;
9450         u64 length;
9451         int i;
9452         int found_chunk = 0;
9453         int ret;
9454
9455         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9456         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9457         length = btrfs_dev_extent_length(eb, ptr);
9458
9459         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9460         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9461         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9462
9463         btrfs_init_path(&path);
9464         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9465         if (ret)
9466                 goto out;
9467
9468         l = path.nodes[0];
9469         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9470         if (btrfs_chunk_length(l, chunk) != length)
9471                 goto out;
9472
9473         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9474         for (i = 0; i < num_stripes; i++) {
9475                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9476                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9477
9478                 if (devid == devext_key.objectid &&
9479                     offset == devext_key.offset) {
9480                         found_chunk = 1;
9481                         break;
9482                 }
9483         }
9484 out:
9485         btrfs_release_path(&path);
9486         if (!found_chunk) {
9487                 error(
9488                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9489                         devext_key.objectid, devext_key.offset, length);
9490                 return REFERENCER_MISSING;
9491         }
9492         return 0;
9493 }
9494
9495 /*
9496  * Check if the used space is correct with the dev item
9497  */
9498 static int check_dev_item(struct btrfs_fs_info *fs_info,
9499                           struct extent_buffer *eb, int slot)
9500 {
9501         struct btrfs_root *dev_root = fs_info->dev_root;
9502         struct btrfs_dev_item *dev_item;
9503         struct btrfs_path path;
9504         struct btrfs_key key;
9505         struct btrfs_dev_extent *ptr;
9506         u64 dev_id;
9507         u64 used;
9508         u64 total = 0;
9509         int ret;
9510
9511         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9512         dev_id = btrfs_device_id(eb, dev_item);
9513         used = btrfs_device_bytes_used(eb, dev_item);
9514
9515         key.objectid = dev_id;
9516         key.type = BTRFS_DEV_EXTENT_KEY;
9517         key.offset = 0;
9518
9519         btrfs_init_path(&path);
9520         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9521         if (ret < 0) {
9522                 btrfs_item_key_to_cpu(eb, &key, slot);
9523                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9524                         key.objectid, key.type, key.offset);
9525                 btrfs_release_path(&path);
9526                 return REFERENCER_MISSING;
9527         }
9528
9529         /* Iterate dev_extents to calculate the used space of a device */
9530         while (1) {
9531                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9532
9533                 if (key.objectid > dev_id)
9534                         break;
9535                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9536                         goto next;
9537
9538                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9539                                      struct btrfs_dev_extent);
9540                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9541 next:
9542                 ret = btrfs_next_item(dev_root, &path);
9543                 if (ret)
9544                         break;
9545         }
9546         btrfs_release_path(&path);
9547
9548         if (used != total) {
9549                 btrfs_item_key_to_cpu(eb, &key, slot);
9550                 error(
9551 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9552                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9553                         BTRFS_DEV_EXTENT_KEY, dev_id);
9554                 return ACCOUNTING_MISMATCH;
9555         }
9556         return 0;
9557 }
9558
9559 /*
9560  * Check a block group item with its referener (chunk) and its used space
9561  * with extent/metadata item
9562  */
9563 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9564                                   struct extent_buffer *eb, int slot)
9565 {
9566         struct btrfs_root *extent_root = fs_info->extent_root;
9567         struct btrfs_root *chunk_root = fs_info->chunk_root;
9568         struct btrfs_block_group_item *bi;
9569         struct btrfs_block_group_item bg_item;
9570         struct btrfs_path path;
9571         struct btrfs_key bg_key;
9572         struct btrfs_key chunk_key;
9573         struct btrfs_key extent_key;
9574         struct btrfs_chunk *chunk;
9575         struct extent_buffer *leaf;
9576         struct btrfs_extent_item *ei;
9577         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9578         u64 flags;
9579         u64 bg_flags;
9580         u64 used;
9581         u64 total = 0;
9582         int ret;
9583         int err = 0;
9584
9585         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9586         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9587         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9588         used = btrfs_block_group_used(&bg_item);
9589         bg_flags = btrfs_block_group_flags(&bg_item);
9590
9591         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9592         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9593         chunk_key.offset = bg_key.objectid;
9594
9595         btrfs_init_path(&path);
9596         /* Search for the referencer chunk */
9597         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9598         if (ret) {
9599                 error(
9600                 "block group[%llu %llu] did not find the related chunk item",
9601                         bg_key.objectid, bg_key.offset);
9602                 err |= REFERENCER_MISSING;
9603         } else {
9604                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9605                                         struct btrfs_chunk);
9606                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9607                                                 bg_key.offset) {
9608                         error(
9609         "block group[%llu %llu] related chunk item length does not match",
9610                                 bg_key.objectid, bg_key.offset);
9611                         err |= REFERENCER_MISMATCH;
9612                 }
9613         }
9614         btrfs_release_path(&path);
9615
9616         /* Search from the block group bytenr */
9617         extent_key.objectid = bg_key.objectid;
9618         extent_key.type = 0;
9619         extent_key.offset = 0;
9620
9621         btrfs_init_path(&path);
9622         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9623         if (ret < 0)
9624                 goto out;
9625
9626         /* Iterate extent tree to account used space */
9627         while (1) {
9628                 leaf = path.nodes[0];
9629                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9630                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9631                         break;
9632
9633                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9634                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9635                         goto next;
9636                 if (extent_key.objectid < bg_key.objectid)
9637                         goto next;
9638
9639                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9640                         total += nodesize;
9641                 else
9642                         total += extent_key.offset;
9643
9644                 ei = btrfs_item_ptr(leaf, path.slots[0],
9645                                     struct btrfs_extent_item);
9646                 flags = btrfs_extent_flags(leaf, ei);
9647                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9648                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9649                                 error(
9650                         "bad extent[%llu, %llu) type mismatch with chunk",
9651                                         extent_key.objectid,
9652                                         extent_key.objectid + extent_key.offset);
9653                                 err |= CHUNK_TYPE_MISMATCH;
9654                         }
9655                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9656                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9657                                     BTRFS_BLOCK_GROUP_METADATA))) {
9658                                 error(
9659                         "bad extent[%llu, %llu) type mismatch with chunk",
9660                                         extent_key.objectid,
9661                                         extent_key.objectid + nodesize);
9662                                 err |= CHUNK_TYPE_MISMATCH;
9663                         }
9664                 }
9665 next:
9666                 ret = btrfs_next_item(extent_root, &path);
9667                 if (ret)
9668                         break;
9669         }
9670
9671 out:
9672         btrfs_release_path(&path);
9673
9674         if (total != used) {
9675                 error(
9676                 "block group[%llu %llu] used %llu but extent items used %llu",
9677                         bg_key.objectid, bg_key.offset, used, total);
9678                 err |= ACCOUNTING_MISMATCH;
9679         }
9680         return err;
9681 }
9682
9683 /*
9684  * Check a chunk item.
9685  * Including checking all referred dev_extents and block group
9686  */
9687 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9688                             struct extent_buffer *eb, int slot)
9689 {
9690         struct btrfs_root *extent_root = fs_info->extent_root;
9691         struct btrfs_root *dev_root = fs_info->dev_root;
9692         struct btrfs_path path;
9693         struct btrfs_key chunk_key;
9694         struct btrfs_key bg_key;
9695         struct btrfs_key devext_key;
9696         struct btrfs_chunk *chunk;
9697         struct extent_buffer *leaf;
9698         struct btrfs_block_group_item *bi;
9699         struct btrfs_block_group_item bg_item;
9700         struct btrfs_dev_extent *ptr;
9701         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9702         u64 length;
9703         u64 chunk_end;
9704         u64 type;
9705         u64 profile;
9706         int num_stripes;
9707         u64 offset;
9708         u64 objectid;
9709         int i;
9710         int ret;
9711         int err = 0;
9712
9713         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9714         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9715         length = btrfs_chunk_length(eb, chunk);
9716         chunk_end = chunk_key.offset + length;
9717         if (!IS_ALIGNED(length, sectorsize)) {
9718                 error("chunk[%llu %llu) not aligned to %u",
9719                         chunk_key.offset, chunk_end, sectorsize);
9720                 err |= BYTES_UNALIGNED;
9721                 goto out;
9722         }
9723
9724         type = btrfs_chunk_type(eb, chunk);
9725         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9726         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9727                 error("chunk[%llu %llu) has no chunk type",
9728                         chunk_key.offset, chunk_end);
9729                 err |= UNKNOWN_TYPE;
9730         }
9731         if (profile && (profile & (profile - 1))) {
9732                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9733                         chunk_key.offset, chunk_end, profile);
9734                 err |= UNKNOWN_TYPE;
9735         }
9736
9737         bg_key.objectid = chunk_key.offset;
9738         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9739         bg_key.offset = length;
9740
9741         btrfs_init_path(&path);
9742         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9743         if (ret) {
9744                 error(
9745                 "chunk[%llu %llu) did not find the related block group item",
9746                         chunk_key.offset, chunk_end);
9747                 err |= REFERENCER_MISSING;
9748         } else{
9749                 leaf = path.nodes[0];
9750                 bi = btrfs_item_ptr(leaf, path.slots[0],
9751                                     struct btrfs_block_group_item);
9752                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9753                                    sizeof(bg_item));
9754                 if (btrfs_block_group_flags(&bg_item) != type) {
9755                         error(
9756 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9757                                 chunk_key.offset, chunk_end, type,
9758                                 btrfs_block_group_flags(&bg_item));
9759                         err |= REFERENCER_MISSING;
9760                 }
9761         }
9762
9763         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9764         for (i = 0; i < num_stripes; i++) {
9765                 btrfs_release_path(&path);
9766                 btrfs_init_path(&path);
9767                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9768                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9769                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9770
9771                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9772                                         0, 0);
9773                 if (ret)
9774                         goto not_match_dev;
9775
9776                 leaf = path.nodes[0];
9777                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9778                                      struct btrfs_dev_extent);
9779                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9780                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9781                 if (objectid != chunk_key.objectid ||
9782                     offset != chunk_key.offset ||
9783                     btrfs_dev_extent_length(leaf, ptr) != length)
9784                         goto not_match_dev;
9785                 continue;
9786 not_match_dev:
9787                 err |= BACKREF_MISSING;
9788                 error(
9789                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9790                         chunk_key.objectid, chunk_end, i);
9791                 continue;
9792         }
9793         btrfs_release_path(&path);
9794 out:
9795         return err;
9796 }
9797
9798 /*
9799  * Main entry function to check known items and update related accounting info
9800  */
9801 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9802 {
9803         struct btrfs_fs_info *fs_info = root->fs_info;
9804         struct btrfs_key key;
9805         int slot = 0;
9806         int type;
9807         struct btrfs_extent_data_ref *dref;
9808         int ret;
9809         int err = 0;
9810
9811 next:
9812         btrfs_item_key_to_cpu(eb, &key, slot);
9813         type = key.type;
9814
9815         switch (type) {
9816         case BTRFS_EXTENT_DATA_KEY:
9817                 ret = check_extent_data_item(root, eb, slot);
9818                 err |= ret;
9819                 break;
9820         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9821                 ret = check_block_group_item(fs_info, eb, slot);
9822                 err |= ret;
9823                 break;
9824         case BTRFS_DEV_ITEM_KEY:
9825                 ret = check_dev_item(fs_info, eb, slot);
9826                 err |= ret;
9827                 break;
9828         case BTRFS_CHUNK_ITEM_KEY:
9829                 ret = check_chunk_item(fs_info, eb, slot);
9830                 err |= ret;
9831                 break;
9832         case BTRFS_DEV_EXTENT_KEY:
9833                 ret = check_dev_extent_item(fs_info, eb, slot);
9834                 err |= ret;
9835                 break;
9836         case BTRFS_EXTENT_ITEM_KEY:
9837         case BTRFS_METADATA_ITEM_KEY:
9838                 ret = check_extent_item(fs_info, eb, slot);
9839                 err |= ret;
9840                 break;
9841         case BTRFS_EXTENT_CSUM_KEY:
9842                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9843                 break;
9844         case BTRFS_TREE_BLOCK_REF_KEY:
9845                 ret = check_tree_block_backref(fs_info, key.offset,
9846                                                key.objectid, -1);
9847                 err |= ret;
9848                 break;
9849         case BTRFS_EXTENT_DATA_REF_KEY:
9850                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9851                 ret = check_extent_data_backref(fs_info,
9852                                 btrfs_extent_data_ref_root(eb, dref),
9853                                 btrfs_extent_data_ref_objectid(eb, dref),
9854                                 btrfs_extent_data_ref_offset(eb, dref),
9855                                 key.objectid, 0,
9856                                 btrfs_extent_data_ref_count(eb, dref));
9857                 err |= ret;
9858                 break;
9859         case BTRFS_SHARED_BLOCK_REF_KEY:
9860                 ret = check_shared_block_backref(fs_info, key.offset,
9861                                                  key.objectid, -1);
9862                 err |= ret;
9863                 break;
9864         case BTRFS_SHARED_DATA_REF_KEY:
9865                 ret = check_shared_data_backref(fs_info, key.offset,
9866                                                 key.objectid);
9867                 err |= ret;
9868                 break;
9869         default:
9870                 break;
9871         }
9872
9873         if (++slot < btrfs_header_nritems(eb))
9874                 goto next;
9875
9876         return err;
9877 }
9878
9879 /*
9880  * Helper function for later fs/subvol tree check.  To determine if a tree
9881  * block should be checked.
9882  * This function will ensure only the direct referencer with lowest rootid to
9883  * check a fs/subvolume tree block.
9884  *
9885  * Backref check at extent tree would detect errors like missing subvolume
9886  * tree, so we can do aggressive check to reduce duplicated checks.
9887  */
9888 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9889 {
9890         struct btrfs_root *extent_root = root->fs_info->extent_root;
9891         struct btrfs_key key;
9892         struct btrfs_path path;
9893         struct extent_buffer *leaf;
9894         int slot;
9895         struct btrfs_extent_item *ei;
9896         unsigned long ptr;
9897         unsigned long end;
9898         int type;
9899         u32 item_size;
9900         u64 offset;
9901         struct btrfs_extent_inline_ref *iref;
9902         int ret;
9903
9904         btrfs_init_path(&path);
9905         key.objectid = btrfs_header_bytenr(eb);
9906         key.type = BTRFS_METADATA_ITEM_KEY;
9907         key.offset = (u64)-1;
9908
9909         /*
9910          * Any failure in backref resolving means we can't determine
9911          * whom the tree block belongs to.
9912          * So in that case, we need to check that tree block
9913          */
9914         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9915         if (ret < 0)
9916                 goto need_check;
9917
9918         ret = btrfs_previous_extent_item(extent_root, &path,
9919                                          btrfs_header_bytenr(eb));
9920         if (ret)
9921                 goto need_check;
9922
9923         leaf = path.nodes[0];
9924         slot = path.slots[0];
9925         btrfs_item_key_to_cpu(leaf, &key, slot);
9926         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9927
9928         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9929                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9930         } else {
9931                 struct btrfs_tree_block_info *info;
9932
9933                 info = (struct btrfs_tree_block_info *)(ei + 1);
9934                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9935         }
9936
9937         item_size = btrfs_item_size_nr(leaf, slot);
9938         ptr = (unsigned long)iref;
9939         end = (unsigned long)ei + item_size;
9940         while (ptr < end) {
9941                 iref = (struct btrfs_extent_inline_ref *)ptr;
9942                 type = btrfs_extent_inline_ref_type(leaf, iref);
9943                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9944
9945                 /*
9946                  * We only check the tree block if current root is
9947                  * the lowest referencer of it.
9948                  */
9949                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9950                     offset < root->objectid) {
9951                         btrfs_release_path(&path);
9952                         return 0;
9953                 }
9954
9955                 ptr += btrfs_extent_inline_ref_size(type);
9956         }
9957         /*
9958          * Normally we should also check keyed tree block ref, but that may be
9959          * very time consuming.  Inlined ref should already make us skip a lot
9960          * of refs now.  So skip search keyed tree block ref.
9961          */
9962
9963 need_check:
9964         btrfs_release_path(&path);
9965         return 1;
9966 }
9967
9968 /*
9969  * Traversal function for tree block. We will do:
9970  * 1) Skip shared fs/subvolume tree blocks
9971  * 2) Update related bytes accounting
9972  * 3) Pre-order traversal
9973  */
9974 static int traverse_tree_block(struct btrfs_root *root,
9975                                 struct extent_buffer *node)
9976 {
9977         struct extent_buffer *eb;
9978         struct btrfs_key key;
9979         struct btrfs_key drop_key;
9980         int level;
9981         u64 nr;
9982         int i;
9983         int err = 0;
9984         int ret;
9985
9986         /*
9987          * Skip shared fs/subvolume tree block, in that case they will
9988          * be checked by referencer with lowest rootid
9989          */
9990         if (is_fstree(root->objectid) && !should_check(root, node))
9991                 return 0;
9992
9993         /* Update bytes accounting */
9994         total_btree_bytes += node->len;
9995         if (fs_root_objectid(btrfs_header_owner(node)))
9996                 total_fs_tree_bytes += node->len;
9997         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9998                 total_extent_tree_bytes += node->len;
9999         if (!found_old_backref &&
10000             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10001             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10002             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10003                 found_old_backref = 1;
10004
10005         /* pre-order tranversal, check itself first */
10006         level = btrfs_header_level(node);
10007         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10008                                    btrfs_header_level(node),
10009                                    btrfs_header_owner(node));
10010         err |= ret;
10011         if (err)
10012                 error(
10013         "check %s failed root %llu bytenr %llu level %d, force continue check",
10014                         level ? "node":"leaf", root->objectid,
10015                         btrfs_header_bytenr(node), btrfs_header_level(node));
10016
10017         if (!level) {
10018                 btree_space_waste += btrfs_leaf_free_space(root, node);
10019                 ret = check_leaf_items(root, node);
10020                 err |= ret;
10021                 return err;
10022         }
10023
10024         nr = btrfs_header_nritems(node);
10025         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10026         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10027                 sizeof(struct btrfs_key_ptr);
10028
10029         /* Then check all its children */
10030         for (i = 0; i < nr; i++) {
10031                 u64 blocknr = btrfs_node_blockptr(node, i);
10032
10033                 btrfs_node_key_to_cpu(node, &key, i);
10034                 if (level == root->root_item.drop_level &&
10035                     is_dropped_key(&key, &drop_key))
10036                         continue;
10037
10038                 /*
10039                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10040                  * to call the function itself.
10041                  */
10042                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10043                 if (extent_buffer_uptodate(eb)) {
10044                         ret = traverse_tree_block(root, eb);
10045                         err |= ret;
10046                 }
10047                 free_extent_buffer(eb);
10048         }
10049
10050         return err;
10051 }
10052
10053 /*
10054  * Low memory usage version check_chunks_and_extents.
10055  */
10056 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10057 {
10058         struct btrfs_path path;
10059         struct btrfs_key key;
10060         struct btrfs_root *root1;
10061         struct btrfs_root *cur_root;
10062         int err = 0;
10063         int ret;
10064
10065         root1 = root->fs_info->chunk_root;
10066         ret = traverse_tree_block(root1, root1->node);
10067         err |= ret;
10068
10069         root1 = root->fs_info->tree_root;
10070         ret = traverse_tree_block(root1, root1->node);
10071         err |= ret;
10072
10073         btrfs_init_path(&path);
10074         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10075         key.offset = 0;
10076         key.type = BTRFS_ROOT_ITEM_KEY;
10077
10078         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10079         if (ret) {
10080                 error("cannot find extent treet in tree_root");
10081                 goto out;
10082         }
10083
10084         while (1) {
10085                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10086                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10087                         goto next;
10088                 key.offset = (u64)-1;
10089
10090                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10091                 if (IS_ERR(cur_root) || !cur_root) {
10092                         error("failed to read tree: %lld", key.objectid);
10093                         goto next;
10094                 }
10095
10096                 ret = traverse_tree_block(cur_root, cur_root->node);
10097                 err |= ret;
10098
10099 next:
10100                 ret = btrfs_next_item(root1, &path);
10101                 if (ret)
10102                         goto out;
10103         }
10104
10105 out:
10106         btrfs_release_path(&path);
10107         return err;
10108 }
10109
10110 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10111                            struct btrfs_root *root, int overwrite)
10112 {
10113         struct extent_buffer *c;
10114         struct extent_buffer *old = root->node;
10115         int level;
10116         int ret;
10117         struct btrfs_disk_key disk_key = {0,0,0};
10118
10119         level = 0;
10120
10121         if (overwrite) {
10122                 c = old;
10123                 extent_buffer_get(c);
10124                 goto init;
10125         }
10126         c = btrfs_alloc_free_block(trans, root,
10127                                    root->nodesize,
10128                                    root->root_key.objectid,
10129                                    &disk_key, level, 0, 0);
10130         if (IS_ERR(c)) {
10131                 c = old;
10132                 extent_buffer_get(c);
10133                 overwrite = 1;
10134         }
10135 init:
10136         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10137         btrfs_set_header_level(c, level);
10138         btrfs_set_header_bytenr(c, c->start);
10139         btrfs_set_header_generation(c, trans->transid);
10140         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10141         btrfs_set_header_owner(c, root->root_key.objectid);
10142
10143         write_extent_buffer(c, root->fs_info->fsid,
10144                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10145
10146         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10147                             btrfs_header_chunk_tree_uuid(c),
10148                             BTRFS_UUID_SIZE);
10149
10150         btrfs_mark_buffer_dirty(c);
10151         /*
10152          * this case can happen in the following case:
10153          *
10154          * 1.overwrite previous root.
10155          *
10156          * 2.reinit reloc data root, this is because we skip pin
10157          * down reloc data tree before which means we can allocate
10158          * same block bytenr here.
10159          */
10160         if (old->start == c->start) {
10161                 btrfs_set_root_generation(&root->root_item,
10162                                           trans->transid);
10163                 root->root_item.level = btrfs_header_level(root->node);
10164                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10165                                         &root->root_key, &root->root_item);
10166                 if (ret) {
10167                         free_extent_buffer(c);
10168                         return ret;
10169                 }
10170         }
10171         free_extent_buffer(old);
10172         root->node = c;
10173         add_root_to_dirty_list(root);
10174         return 0;
10175 }
10176
10177 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10178                                 struct extent_buffer *eb, int tree_root)
10179 {
10180         struct extent_buffer *tmp;
10181         struct btrfs_root_item *ri;
10182         struct btrfs_key key;
10183         u64 bytenr;
10184         u32 nodesize;
10185         int level = btrfs_header_level(eb);
10186         int nritems;
10187         int ret;
10188         int i;
10189
10190         /*
10191          * If we have pinned this block before, don't pin it again.
10192          * This can not only avoid forever loop with broken filesystem
10193          * but also give us some speedups.
10194          */
10195         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10196                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10197                 return 0;
10198
10199         btrfs_pin_extent(fs_info, eb->start, eb->len);
10200
10201         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10202         nritems = btrfs_header_nritems(eb);
10203         for (i = 0; i < nritems; i++) {
10204                 if (level == 0) {
10205                         btrfs_item_key_to_cpu(eb, &key, i);
10206                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10207                                 continue;
10208                         /* Skip the extent root and reloc roots */
10209                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10210                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10211                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10212                                 continue;
10213                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10214                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10215
10216                         /*
10217                          * If at any point we start needing the real root we
10218                          * will have to build a stump root for the root we are
10219                          * in, but for now this doesn't actually use the root so
10220                          * just pass in extent_root.
10221                          */
10222                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10223                                               nodesize, 0);
10224                         if (!extent_buffer_uptodate(tmp)) {
10225                                 fprintf(stderr, "Error reading root block\n");
10226                                 return -EIO;
10227                         }
10228                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10229                         free_extent_buffer(tmp);
10230                         if (ret)
10231                                 return ret;
10232                 } else {
10233                         bytenr = btrfs_node_blockptr(eb, i);
10234
10235                         /* If we aren't the tree root don't read the block */
10236                         if (level == 1 && !tree_root) {
10237                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10238                                 continue;
10239                         }
10240
10241                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10242                                               nodesize, 0);
10243                         if (!extent_buffer_uptodate(tmp)) {
10244                                 fprintf(stderr, "Error reading tree block\n");
10245                                 return -EIO;
10246                         }
10247                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10248                         free_extent_buffer(tmp);
10249                         if (ret)
10250                                 return ret;
10251                 }
10252         }
10253
10254         return 0;
10255 }
10256
10257 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10258 {
10259         int ret;
10260
10261         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10262         if (ret)
10263                 return ret;
10264
10265         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10266 }
10267
10268 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10269 {
10270         struct btrfs_block_group_cache *cache;
10271         struct btrfs_path *path;
10272         struct extent_buffer *leaf;
10273         struct btrfs_chunk *chunk;
10274         struct btrfs_key key;
10275         int ret;
10276         u64 start;
10277
10278         path = btrfs_alloc_path();
10279         if (!path)
10280                 return -ENOMEM;
10281
10282         key.objectid = 0;
10283         key.type = BTRFS_CHUNK_ITEM_KEY;
10284         key.offset = 0;
10285
10286         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10287         if (ret < 0) {
10288                 btrfs_free_path(path);
10289                 return ret;
10290         }
10291
10292         /*
10293          * We do this in case the block groups were screwed up and had alloc
10294          * bits that aren't actually set on the chunks.  This happens with
10295          * restored images every time and could happen in real life I guess.
10296          */
10297         fs_info->avail_data_alloc_bits = 0;
10298         fs_info->avail_metadata_alloc_bits = 0;
10299         fs_info->avail_system_alloc_bits = 0;
10300
10301         /* First we need to create the in-memory block groups */
10302         while (1) {
10303                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10304                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10305                         if (ret < 0) {
10306                                 btrfs_free_path(path);
10307                                 return ret;
10308                         }
10309                         if (ret) {
10310                                 ret = 0;
10311                                 break;
10312                         }
10313                 }
10314                 leaf = path->nodes[0];
10315                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10316                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10317                         path->slots[0]++;
10318                         continue;
10319                 }
10320
10321                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10322                                        struct btrfs_chunk);
10323                 btrfs_add_block_group(fs_info, 0,
10324                                       btrfs_chunk_type(leaf, chunk),
10325                                       key.objectid, key.offset,
10326                                       btrfs_chunk_length(leaf, chunk));
10327                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10328                                  key.offset + btrfs_chunk_length(leaf, chunk),
10329                                  GFP_NOFS);
10330                 path->slots[0]++;
10331         }
10332         start = 0;
10333         while (1) {
10334                 cache = btrfs_lookup_first_block_group(fs_info, start);
10335                 if (!cache)
10336                         break;
10337                 cache->cached = 1;
10338                 start = cache->key.objectid + cache->key.offset;
10339         }
10340
10341         btrfs_free_path(path);
10342         return 0;
10343 }
10344
10345 static int reset_balance(struct btrfs_trans_handle *trans,
10346                          struct btrfs_fs_info *fs_info)
10347 {
10348         struct btrfs_root *root = fs_info->tree_root;
10349         struct btrfs_path *path;
10350         struct extent_buffer *leaf;
10351         struct btrfs_key key;
10352         int del_slot, del_nr = 0;
10353         int ret;
10354         int found = 0;
10355
10356         path = btrfs_alloc_path();
10357         if (!path)
10358                 return -ENOMEM;
10359
10360         key.objectid = BTRFS_BALANCE_OBJECTID;
10361         key.type = BTRFS_BALANCE_ITEM_KEY;
10362         key.offset = 0;
10363
10364         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10365         if (ret) {
10366                 if (ret > 0)
10367                         ret = 0;
10368                 if (!ret)
10369                         goto reinit_data_reloc;
10370                 else
10371                         goto out;
10372         }
10373
10374         ret = btrfs_del_item(trans, root, path);
10375         if (ret)
10376                 goto out;
10377         btrfs_release_path(path);
10378
10379         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10380         key.type = BTRFS_ROOT_ITEM_KEY;
10381         key.offset = 0;
10382
10383         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10384         if (ret < 0)
10385                 goto out;
10386         while (1) {
10387                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10388                         if (!found)
10389                                 break;
10390
10391                         if (del_nr) {
10392                                 ret = btrfs_del_items(trans, root, path,
10393                                                       del_slot, del_nr);
10394                                 del_nr = 0;
10395                                 if (ret)
10396                                         goto out;
10397                         }
10398                         key.offset++;
10399                         btrfs_release_path(path);
10400
10401                         found = 0;
10402                         ret = btrfs_search_slot(trans, root, &key, path,
10403                                                 -1, 1);
10404                         if (ret < 0)
10405                                 goto out;
10406                         continue;
10407                 }
10408                 found = 1;
10409                 leaf = path->nodes[0];
10410                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10411                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10412                         break;
10413                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10414                         path->slots[0]++;
10415                         continue;
10416                 }
10417                 if (!del_nr) {
10418                         del_slot = path->slots[0];
10419                         del_nr = 1;
10420                 } else {
10421                         del_nr++;
10422                 }
10423                 path->slots[0]++;
10424         }
10425
10426         if (del_nr) {
10427                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10428                 if (ret)
10429                         goto out;
10430         }
10431         btrfs_release_path(path);
10432
10433 reinit_data_reloc:
10434         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10435         key.type = BTRFS_ROOT_ITEM_KEY;
10436         key.offset = (u64)-1;
10437         root = btrfs_read_fs_root(fs_info, &key);
10438         if (IS_ERR(root)) {
10439                 fprintf(stderr, "Error reading data reloc tree\n");
10440                 ret = PTR_ERR(root);
10441                 goto out;
10442         }
10443         record_root_in_trans(trans, root);
10444         ret = btrfs_fsck_reinit_root(trans, root, 0);
10445         if (ret)
10446                 goto out;
10447         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10448 out:
10449         btrfs_free_path(path);
10450         return ret;
10451 }
10452
10453 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10454                               struct btrfs_fs_info *fs_info)
10455 {
10456         u64 start = 0;
10457         int ret;
10458
10459         /*
10460          * The only reason we don't do this is because right now we're just
10461          * walking the trees we find and pinning down their bytes, we don't look
10462          * at any of the leaves.  In order to do mixed groups we'd have to check
10463          * the leaves of any fs roots and pin down the bytes for any file
10464          * extents we find.  Not hard but why do it if we don't have to?
10465          */
10466         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10467                 fprintf(stderr, "We don't support re-initing the extent tree "
10468                         "for mixed block groups yet, please notify a btrfs "
10469                         "developer you want to do this so they can add this "
10470                         "functionality.\n");
10471                 return -EINVAL;
10472         }
10473
10474         /*
10475          * first we need to walk all of the trees except the extent tree and pin
10476          * down the bytes that are in use so we don't overwrite any existing
10477          * metadata.
10478          */
10479         ret = pin_metadata_blocks(fs_info);
10480         if (ret) {
10481                 fprintf(stderr, "error pinning down used bytes\n");
10482                 return ret;
10483         }
10484
10485         /*
10486          * Need to drop all the block groups since we're going to recreate all
10487          * of them again.
10488          */
10489         btrfs_free_block_groups(fs_info);
10490         ret = reset_block_groups(fs_info);
10491         if (ret) {
10492                 fprintf(stderr, "error resetting the block groups\n");
10493                 return ret;
10494         }
10495
10496         /* Ok we can allocate now, reinit the extent root */
10497         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10498         if (ret) {
10499                 fprintf(stderr, "extent root initialization failed\n");
10500                 /*
10501                  * When the transaction code is updated we should end the
10502                  * transaction, but for now progs only knows about commit so
10503                  * just return an error.
10504                  */
10505                 return ret;
10506         }
10507
10508         /*
10509          * Now we have all the in-memory block groups setup so we can make
10510          * allocations properly, and the metadata we care about is safe since we
10511          * pinned all of it above.
10512          */
10513         while (1) {
10514                 struct btrfs_block_group_cache *cache;
10515
10516                 cache = btrfs_lookup_first_block_group(fs_info, start);
10517                 if (!cache)
10518                         break;
10519                 start = cache->key.objectid + cache->key.offset;
10520                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10521                                         &cache->key, &cache->item,
10522                                         sizeof(cache->item));
10523                 if (ret) {
10524                         fprintf(stderr, "Error adding block group\n");
10525                         return ret;
10526                 }
10527                 btrfs_extent_post_op(trans, fs_info->extent_root);
10528         }
10529
10530         ret = reset_balance(trans, fs_info);
10531         if (ret)
10532                 fprintf(stderr, "error resetting the pending balance\n");
10533
10534         return ret;
10535 }
10536
10537 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10538 {
10539         struct btrfs_path *path;
10540         struct btrfs_trans_handle *trans;
10541         struct btrfs_key key;
10542         int ret;
10543
10544         printf("Recowing metadata block %llu\n", eb->start);
10545         key.objectid = btrfs_header_owner(eb);
10546         key.type = BTRFS_ROOT_ITEM_KEY;
10547         key.offset = (u64)-1;
10548
10549         root = btrfs_read_fs_root(root->fs_info, &key);
10550         if (IS_ERR(root)) {
10551                 fprintf(stderr, "Couldn't find owner root %llu\n",
10552                         key.objectid);
10553                 return PTR_ERR(root);
10554         }
10555
10556         path = btrfs_alloc_path();
10557         if (!path)
10558                 return -ENOMEM;
10559
10560         trans = btrfs_start_transaction(root, 1);
10561         if (IS_ERR(trans)) {
10562                 btrfs_free_path(path);
10563                 return PTR_ERR(trans);
10564         }
10565
10566         path->lowest_level = btrfs_header_level(eb);
10567         if (path->lowest_level)
10568                 btrfs_node_key_to_cpu(eb, &key, 0);
10569         else
10570                 btrfs_item_key_to_cpu(eb, &key, 0);
10571
10572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10573         btrfs_commit_transaction(trans, root);
10574         btrfs_free_path(path);
10575         return ret;
10576 }
10577
10578 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10579 {
10580         struct btrfs_path *path;
10581         struct btrfs_trans_handle *trans;
10582         struct btrfs_key key;
10583         int ret;
10584
10585         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10586                bad->key.type, bad->key.offset);
10587         key.objectid = bad->root_id;
10588         key.type = BTRFS_ROOT_ITEM_KEY;
10589         key.offset = (u64)-1;
10590
10591         root = btrfs_read_fs_root(root->fs_info, &key);
10592         if (IS_ERR(root)) {
10593                 fprintf(stderr, "Couldn't find owner root %llu\n",
10594                         key.objectid);
10595                 return PTR_ERR(root);
10596         }
10597
10598         path = btrfs_alloc_path();
10599         if (!path)
10600                 return -ENOMEM;
10601
10602         trans = btrfs_start_transaction(root, 1);
10603         if (IS_ERR(trans)) {
10604                 btrfs_free_path(path);
10605                 return PTR_ERR(trans);
10606         }
10607
10608         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10609         if (ret) {
10610                 if (ret > 0)
10611                         ret = 0;
10612                 goto out;
10613         }
10614         ret = btrfs_del_item(trans, root, path);
10615 out:
10616         btrfs_commit_transaction(trans, root);
10617         btrfs_free_path(path);
10618         return ret;
10619 }
10620
10621 static int zero_log_tree(struct btrfs_root *root)
10622 {
10623         struct btrfs_trans_handle *trans;
10624         int ret;
10625
10626         trans = btrfs_start_transaction(root, 1);
10627         if (IS_ERR(trans)) {
10628                 ret = PTR_ERR(trans);
10629                 return ret;
10630         }
10631         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10632         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10633         ret = btrfs_commit_transaction(trans, root);
10634         return ret;
10635 }
10636
10637 static int populate_csum(struct btrfs_trans_handle *trans,
10638                          struct btrfs_root *csum_root, char *buf, u64 start,
10639                          u64 len)
10640 {
10641         u64 offset = 0;
10642         u64 sectorsize;
10643         int ret = 0;
10644
10645         while (offset < len) {
10646                 sectorsize = csum_root->sectorsize;
10647                 ret = read_extent_data(csum_root, buf, start + offset,
10648                                        &sectorsize, 0);
10649                 if (ret)
10650                         break;
10651                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10652                                             start + offset, buf, sectorsize);
10653                 if (ret)
10654                         break;
10655                 offset += sectorsize;
10656         }
10657         return ret;
10658 }
10659
10660 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10661                                       struct btrfs_root *csum_root,
10662                                       struct btrfs_root *cur_root)
10663 {
10664         struct btrfs_path *path;
10665         struct btrfs_key key;
10666         struct extent_buffer *node;
10667         struct btrfs_file_extent_item *fi;
10668         char *buf = NULL;
10669         u64 start = 0;
10670         u64 len = 0;
10671         int slot = 0;
10672         int ret = 0;
10673
10674         path = btrfs_alloc_path();
10675         if (!path)
10676                 return -ENOMEM;
10677         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10678         if (!buf) {
10679                 ret = -ENOMEM;
10680                 goto out;
10681         }
10682
10683         key.objectid = 0;
10684         key.offset = 0;
10685         key.type = 0;
10686
10687         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10688         if (ret < 0)
10689                 goto out;
10690         /* Iterate all regular file extents and fill its csum */
10691         while (1) {
10692                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10693
10694                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10695                         goto next;
10696                 node = path->nodes[0];
10697                 slot = path->slots[0];
10698                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10699                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10700                         goto next;
10701                 start = btrfs_file_extent_disk_bytenr(node, fi);
10702                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10703
10704                 ret = populate_csum(trans, csum_root, buf, start, len);
10705                 if (ret == -EEXIST)
10706                         ret = 0;
10707                 if (ret < 0)
10708                         goto out;
10709 next:
10710                 /*
10711                  * TODO: if next leaf is corrupted, jump to nearest next valid
10712                  * leaf.
10713                  */
10714                 ret = btrfs_next_item(cur_root, path);
10715                 if (ret < 0)
10716                         goto out;
10717                 if (ret > 0) {
10718                         ret = 0;
10719                         goto out;
10720                 }
10721         }
10722
10723 out:
10724         btrfs_free_path(path);
10725         free(buf);
10726         return ret;
10727 }
10728
10729 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10730                                   struct btrfs_root *csum_root)
10731 {
10732         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10733         struct btrfs_path *path;
10734         struct btrfs_root *tree_root = fs_info->tree_root;
10735         struct btrfs_root *cur_root;
10736         struct extent_buffer *node;
10737         struct btrfs_key key;
10738         int slot = 0;
10739         int ret = 0;
10740
10741         path = btrfs_alloc_path();
10742         if (!path)
10743                 return -ENOMEM;
10744
10745         key.objectid = BTRFS_FS_TREE_OBJECTID;
10746         key.offset = 0;
10747         key.type = BTRFS_ROOT_ITEM_KEY;
10748
10749         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10750         if (ret < 0)
10751                 goto out;
10752         if (ret > 0) {
10753                 ret = -ENOENT;
10754                 goto out;
10755         }
10756
10757         while (1) {
10758                 node = path->nodes[0];
10759                 slot = path->slots[0];
10760                 btrfs_item_key_to_cpu(node, &key, slot);
10761                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10762                         goto out;
10763                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10764                         goto next;
10765                 if (!is_fstree(key.objectid))
10766                         goto next;
10767                 key.offset = (u64)-1;
10768
10769                 cur_root = btrfs_read_fs_root(fs_info, &key);
10770                 if (IS_ERR(cur_root) || !cur_root) {
10771                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10772                                 key.objectid);
10773                         goto out;
10774                 }
10775                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10776                                 cur_root);
10777                 if (ret < 0)
10778                         goto out;
10779 next:
10780                 ret = btrfs_next_item(tree_root, path);
10781                 if (ret > 0) {
10782                         ret = 0;
10783                         goto out;
10784                 }
10785                 if (ret < 0)
10786                         goto out;
10787         }
10788
10789 out:
10790         btrfs_free_path(path);
10791         return ret;
10792 }
10793
10794 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10795                                       struct btrfs_root *csum_root)
10796 {
10797         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10798         struct btrfs_path *path;
10799         struct btrfs_extent_item *ei;
10800         struct extent_buffer *leaf;
10801         char *buf;
10802         struct btrfs_key key;
10803         int ret;
10804
10805         path = btrfs_alloc_path();
10806         if (!path)
10807                 return -ENOMEM;
10808
10809         key.objectid = 0;
10810         key.type = BTRFS_EXTENT_ITEM_KEY;
10811         key.offset = 0;
10812
10813         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10814         if (ret < 0) {
10815                 btrfs_free_path(path);
10816                 return ret;
10817         }
10818
10819         buf = malloc(csum_root->sectorsize);
10820         if (!buf) {
10821                 btrfs_free_path(path);
10822                 return -ENOMEM;
10823         }
10824
10825         while (1) {
10826                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10827                         ret = btrfs_next_leaf(extent_root, path);
10828                         if (ret < 0)
10829                                 break;
10830                         if (ret) {
10831                                 ret = 0;
10832                                 break;
10833                         }
10834                 }
10835                 leaf = path->nodes[0];
10836
10837                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10838                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10839                         path->slots[0]++;
10840                         continue;
10841                 }
10842
10843                 ei = btrfs_item_ptr(leaf, path->slots[0],
10844                                     struct btrfs_extent_item);
10845                 if (!(btrfs_extent_flags(leaf, ei) &
10846                       BTRFS_EXTENT_FLAG_DATA)) {
10847                         path->slots[0]++;
10848                         continue;
10849                 }
10850
10851                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10852                                     key.offset);
10853                 if (ret)
10854                         break;
10855                 path->slots[0]++;
10856         }
10857
10858         btrfs_free_path(path);
10859         free(buf);
10860         return ret;
10861 }
10862
10863 /*
10864  * Recalculate the csum and put it into the csum tree.
10865  *
10866  * Extent tree init will wipe out all the extent info, so in that case, we
10867  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10868  * will use fs/subvol trees to init the csum tree.
10869  */
10870 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10871                           struct btrfs_root *csum_root,
10872                           int search_fs_tree)
10873 {
10874         if (search_fs_tree)
10875                 return fill_csum_tree_from_fs(trans, csum_root);
10876         else
10877                 return fill_csum_tree_from_extent(trans, csum_root);
10878 }
10879
10880 static void free_roots_info_cache(void)
10881 {
10882         if (!roots_info_cache)
10883                 return;
10884
10885         while (!cache_tree_empty(roots_info_cache)) {
10886                 struct cache_extent *entry;
10887                 struct root_item_info *rii;
10888
10889                 entry = first_cache_extent(roots_info_cache);
10890                 if (!entry)
10891                         break;
10892                 remove_cache_extent(roots_info_cache, entry);
10893                 rii = container_of(entry, struct root_item_info, cache_extent);
10894                 free(rii);
10895         }
10896
10897         free(roots_info_cache);
10898         roots_info_cache = NULL;
10899 }
10900
10901 static int build_roots_info_cache(struct btrfs_fs_info *info)
10902 {
10903         int ret = 0;
10904         struct btrfs_key key;
10905         struct extent_buffer *leaf;
10906         struct btrfs_path *path;
10907
10908         if (!roots_info_cache) {
10909                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10910                 if (!roots_info_cache)
10911                         return -ENOMEM;
10912                 cache_tree_init(roots_info_cache);
10913         }
10914
10915         path = btrfs_alloc_path();
10916         if (!path)
10917                 return -ENOMEM;
10918
10919         key.objectid = 0;
10920         key.type = BTRFS_EXTENT_ITEM_KEY;
10921         key.offset = 0;
10922
10923         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10924         if (ret < 0)
10925                 goto out;
10926         leaf = path->nodes[0];
10927
10928         while (1) {
10929                 struct btrfs_key found_key;
10930                 struct btrfs_extent_item *ei;
10931                 struct btrfs_extent_inline_ref *iref;
10932                 int slot = path->slots[0];
10933                 int type;
10934                 u64 flags;
10935                 u64 root_id;
10936                 u8 level;
10937                 struct cache_extent *entry;
10938                 struct root_item_info *rii;
10939
10940                 if (slot >= btrfs_header_nritems(leaf)) {
10941                         ret = btrfs_next_leaf(info->extent_root, path);
10942                         if (ret < 0) {
10943                                 break;
10944                         } else if (ret) {
10945                                 ret = 0;
10946                                 break;
10947                         }
10948                         leaf = path->nodes[0];
10949                         slot = path->slots[0];
10950                 }
10951
10952                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10953
10954                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10955                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10956                         goto next;
10957
10958                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10959                 flags = btrfs_extent_flags(leaf, ei);
10960
10961                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10962                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10963                         goto next;
10964
10965                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10966                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10967                         level = found_key.offset;
10968                 } else {
10969                         struct btrfs_tree_block_info *binfo;
10970
10971                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10972                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10973                         level = btrfs_tree_block_level(leaf, binfo);
10974                 }
10975
10976                 /*
10977                  * For a root extent, it must be of the following type and the
10978                  * first (and only one) iref in the item.
10979                  */
10980                 type = btrfs_extent_inline_ref_type(leaf, iref);
10981                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10982                         goto next;
10983
10984                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10985                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10986                 if (!entry) {
10987                         rii = malloc(sizeof(struct root_item_info));
10988                         if (!rii) {
10989                                 ret = -ENOMEM;
10990                                 goto out;
10991                         }
10992                         rii->cache_extent.start = root_id;
10993                         rii->cache_extent.size = 1;
10994                         rii->level = (u8)-1;
10995                         entry = &rii->cache_extent;
10996                         ret = insert_cache_extent(roots_info_cache, entry);
10997                         ASSERT(ret == 0);
10998                 } else {
10999                         rii = container_of(entry, struct root_item_info,
11000                                            cache_extent);
11001                 }
11002
11003                 ASSERT(rii->cache_extent.start == root_id);
11004                 ASSERT(rii->cache_extent.size == 1);
11005
11006                 if (level > rii->level || rii->level == (u8)-1) {
11007                         rii->level = level;
11008                         rii->bytenr = found_key.objectid;
11009                         rii->gen = btrfs_extent_generation(leaf, ei);
11010                         rii->node_count = 1;
11011                 } else if (level == rii->level) {
11012                         rii->node_count++;
11013                 }
11014 next:
11015                 path->slots[0]++;
11016         }
11017
11018 out:
11019         btrfs_free_path(path);
11020
11021         return ret;
11022 }
11023
11024 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11025                                   struct btrfs_path *path,
11026                                   const struct btrfs_key *root_key,
11027                                   const int read_only_mode)
11028 {
11029         const u64 root_id = root_key->objectid;
11030         struct cache_extent *entry;
11031         struct root_item_info *rii;
11032         struct btrfs_root_item ri;
11033         unsigned long offset;
11034
11035         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11036         if (!entry) {
11037                 fprintf(stderr,
11038                         "Error: could not find extent items for root %llu\n",
11039                         root_key->objectid);
11040                 return -ENOENT;
11041         }
11042
11043         rii = container_of(entry, struct root_item_info, cache_extent);
11044         ASSERT(rii->cache_extent.start == root_id);
11045         ASSERT(rii->cache_extent.size == 1);
11046
11047         if (rii->node_count != 1) {
11048                 fprintf(stderr,
11049                         "Error: could not find btree root extent for root %llu\n",
11050                         root_id);
11051                 return -ENOENT;
11052         }
11053
11054         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11055         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11056
11057         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11058             btrfs_root_level(&ri) != rii->level ||
11059             btrfs_root_generation(&ri) != rii->gen) {
11060
11061                 /*
11062                  * If we're in repair mode but our caller told us to not update
11063                  * the root item, i.e. just check if it needs to be updated, don't
11064                  * print this message, since the caller will call us again shortly
11065                  * for the same root item without read only mode (the caller will
11066                  * open a transaction first).
11067                  */
11068                 if (!(read_only_mode && repair))
11069                         fprintf(stderr,
11070                                 "%sroot item for root %llu,"
11071                                 " current bytenr %llu, current gen %llu, current level %u,"
11072                                 " new bytenr %llu, new gen %llu, new level %u\n",
11073                                 (read_only_mode ? "" : "fixing "),
11074                                 root_id,
11075                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11076                                 btrfs_root_level(&ri),
11077                                 rii->bytenr, rii->gen, rii->level);
11078
11079                 if (btrfs_root_generation(&ri) > rii->gen) {
11080                         fprintf(stderr,
11081                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11082                                 root_id, btrfs_root_generation(&ri), rii->gen);
11083                         return -EINVAL;
11084                 }
11085
11086                 if (!read_only_mode) {
11087                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11088                         btrfs_set_root_level(&ri, rii->level);
11089                         btrfs_set_root_generation(&ri, rii->gen);
11090                         write_extent_buffer(path->nodes[0], &ri,
11091                                             offset, sizeof(ri));
11092                 }
11093
11094                 return 1;
11095         }
11096
11097         return 0;
11098 }
11099
11100 /*
11101  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11102  * caused read-only snapshots to be corrupted if they were created at a moment
11103  * when the source subvolume/snapshot had orphan items. The issue was that the
11104  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11105  * node instead of the post orphan cleanup root node.
11106  * So this function, and its callees, just detects and fixes those cases. Even
11107  * though the regression was for read-only snapshots, this function applies to
11108  * any snapshot/subvolume root.
11109  * This must be run before any other repair code - not doing it so, makes other
11110  * repair code delete or modify backrefs in the extent tree for example, which
11111  * will result in an inconsistent fs after repairing the root items.
11112  */
11113 static int repair_root_items(struct btrfs_fs_info *info)
11114 {
11115         struct btrfs_path *path = NULL;
11116         struct btrfs_key key;
11117         struct extent_buffer *leaf;
11118         struct btrfs_trans_handle *trans = NULL;
11119         int ret = 0;
11120         int bad_roots = 0;
11121         int need_trans = 0;
11122
11123         ret = build_roots_info_cache(info);
11124         if (ret)
11125                 goto out;
11126
11127         path = btrfs_alloc_path();
11128         if (!path) {
11129                 ret = -ENOMEM;
11130                 goto out;
11131         }
11132
11133         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11134         key.type = BTRFS_ROOT_ITEM_KEY;
11135         key.offset = 0;
11136
11137 again:
11138         /*
11139          * Avoid opening and committing transactions if a leaf doesn't have
11140          * any root items that need to be fixed, so that we avoid rotating
11141          * backup roots unnecessarily.
11142          */
11143         if (need_trans) {
11144                 trans = btrfs_start_transaction(info->tree_root, 1);
11145                 if (IS_ERR(trans)) {
11146                         ret = PTR_ERR(trans);
11147                         goto out;
11148                 }
11149         }
11150
11151         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11152                                 0, trans ? 1 : 0);
11153         if (ret < 0)
11154                 goto out;
11155         leaf = path->nodes[0];
11156
11157         while (1) {
11158                 struct btrfs_key found_key;
11159
11160                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11161                         int no_more_keys = find_next_key(path, &key);
11162
11163                         btrfs_release_path(path);
11164                         if (trans) {
11165                                 ret = btrfs_commit_transaction(trans,
11166                                                                info->tree_root);
11167                                 trans = NULL;
11168                                 if (ret < 0)
11169                                         goto out;
11170                         }
11171                         need_trans = 0;
11172                         if (no_more_keys)
11173                                 break;
11174                         goto again;
11175                 }
11176
11177                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11178
11179                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11180                         goto next;
11181                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11182                         goto next;
11183
11184                 ret = maybe_repair_root_item(info, path, &found_key,
11185                                              trans ? 0 : 1);
11186                 if (ret < 0)
11187                         goto out;
11188                 if (ret) {
11189                         if (!trans && repair) {
11190                                 need_trans = 1;
11191                                 key = found_key;
11192                                 btrfs_release_path(path);
11193                                 goto again;
11194                         }
11195                         bad_roots++;
11196                 }
11197 next:
11198                 path->slots[0]++;
11199         }
11200         ret = 0;
11201 out:
11202         free_roots_info_cache();
11203         btrfs_free_path(path);
11204         if (trans)
11205                 btrfs_commit_transaction(trans, info->tree_root);
11206         if (ret < 0)
11207                 return ret;
11208
11209         return bad_roots;
11210 }
11211
11212 const char * const cmd_check_usage[] = {
11213         "btrfs check [options] <device>",
11214         "Check structural integrity of a filesystem (unmounted).",
11215         "Check structural integrity of an unmounted filesystem. Verify internal",
11216         "trees' consistency and item connectivity. In the repair mode try to",
11217         "fix the problems found. ",
11218         "WARNING: the repair mode is considered dangerous",
11219         "",
11220         "-s|--super <superblock>     use this superblock copy",
11221         "-b|--backup                 use the first valid backup root copy",
11222         "--repair                    try to repair the filesystem",
11223         "--readonly                  run in read-only mode (default)",
11224         "--init-csum-tree            create a new CRC tree",
11225         "--init-extent-tree          create a new extent tree",
11226         "--mode <MODE>               allows choice of memory/IO trade-offs",
11227         "                            where MODE is one of:",
11228         "                            original - read inodes and extents to memory (requires",
11229         "                                       more memory, does less IO)",
11230         "                            lowmem   - try to use less memory but read blocks again",
11231         "                                       when needed",
11232         "--check-data-csum           verify checksums of data blocks",
11233         "-Q|--qgroup-report          print a report on qgroup consistency",
11234         "-E|--subvol-extents <subvolid>",
11235         "                            print subvolume extents and sharing state",
11236         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11237         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11238         "-p|--progress               indicate progress",
11239         NULL
11240 };
11241
11242 int cmd_check(int argc, char **argv)
11243 {
11244         struct cache_tree root_cache;
11245         struct btrfs_root *root;
11246         struct btrfs_fs_info *info;
11247         u64 bytenr = 0;
11248         u64 subvolid = 0;
11249         u64 tree_root_bytenr = 0;
11250         u64 chunk_root_bytenr = 0;
11251         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11252         int ret;
11253         u64 num;
11254         int init_csum_tree = 0;
11255         int readonly = 0;
11256         int qgroup_report = 0;
11257         int qgroups_repaired = 0;
11258         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11259
11260         while(1) {
11261                 int c;
11262                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11263                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11264                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11265                         GETOPT_VAL_MODE };
11266                 static const struct option long_options[] = {
11267                         { "super", required_argument, NULL, 's' },
11268                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11269                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11270                         { "init-csum-tree", no_argument, NULL,
11271                                 GETOPT_VAL_INIT_CSUM },
11272                         { "init-extent-tree", no_argument, NULL,
11273                                 GETOPT_VAL_INIT_EXTENT },
11274                         { "check-data-csum", no_argument, NULL,
11275                                 GETOPT_VAL_CHECK_CSUM },
11276                         { "backup", no_argument, NULL, 'b' },
11277                         { "subvol-extents", required_argument, NULL, 'E' },
11278                         { "qgroup-report", no_argument, NULL, 'Q' },
11279                         { "tree-root", required_argument, NULL, 'r' },
11280                         { "chunk-root", required_argument, NULL,
11281                                 GETOPT_VAL_CHUNK_TREE },
11282                         { "progress", no_argument, NULL, 'p' },
11283                         { "mode", required_argument, NULL,
11284                                 GETOPT_VAL_MODE },
11285                         { NULL, 0, NULL, 0}
11286                 };
11287
11288                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11289                 if (c < 0)
11290                         break;
11291                 switch(c) {
11292                         case 'a': /* ignored */ break;
11293                         case 'b':
11294                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11295                                 break;
11296                         case 's':
11297                                 num = arg_strtou64(optarg);
11298                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11299                                         error(
11300                                         "super mirror should be less than %d",
11301                                                 BTRFS_SUPER_MIRROR_MAX);
11302                                         exit(1);
11303                                 }
11304                                 bytenr = btrfs_sb_offset(((int)num));
11305                                 printf("using SB copy %llu, bytenr %llu\n", num,
11306                                        (unsigned long long)bytenr);
11307                                 break;
11308                         case 'Q':
11309                                 qgroup_report = 1;
11310                                 break;
11311                         case 'E':
11312                                 subvolid = arg_strtou64(optarg);
11313                                 break;
11314                         case 'r':
11315                                 tree_root_bytenr = arg_strtou64(optarg);
11316                                 break;
11317                         case GETOPT_VAL_CHUNK_TREE:
11318                                 chunk_root_bytenr = arg_strtou64(optarg);
11319                                 break;
11320                         case 'p':
11321                                 ctx.progress_enabled = true;
11322                                 break;
11323                         case '?':
11324                         case 'h':
11325                                 usage(cmd_check_usage);
11326                         case GETOPT_VAL_REPAIR:
11327                                 printf("enabling repair mode\n");
11328                                 repair = 1;
11329                                 ctree_flags |= OPEN_CTREE_WRITES;
11330                                 break;
11331                         case GETOPT_VAL_READONLY:
11332                                 readonly = 1;
11333                                 break;
11334                         case GETOPT_VAL_INIT_CSUM:
11335                                 printf("Creating a new CRC tree\n");
11336                                 init_csum_tree = 1;
11337                                 repair = 1;
11338                                 ctree_flags |= OPEN_CTREE_WRITES;
11339                                 break;
11340                         case GETOPT_VAL_INIT_EXTENT:
11341                                 init_extent_tree = 1;
11342                                 ctree_flags |= (OPEN_CTREE_WRITES |
11343                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11344                                 repair = 1;
11345                                 break;
11346                         case GETOPT_VAL_CHECK_CSUM:
11347                                 check_data_csum = 1;
11348                                 break;
11349                         case GETOPT_VAL_MODE:
11350                                 check_mode = parse_check_mode(optarg);
11351                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11352                                         error("unknown mode: %s", optarg);
11353                                         exit(1);
11354                                 }
11355                                 break;
11356                 }
11357         }
11358
11359         if (check_argc_exact(argc - optind, 1))
11360                 usage(cmd_check_usage);
11361
11362         if (ctx.progress_enabled) {
11363                 ctx.tp = TASK_NOTHING;
11364                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11365         }
11366
11367         /* This check is the only reason for --readonly to exist */
11368         if (readonly && repair) {
11369                 error("repair options are not compatible with --readonly");
11370                 exit(1);
11371         }
11372
11373         /*
11374          * Not supported yet
11375          */
11376         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11377                 error("low memory mode doesn't support repair yet");
11378                 exit(1);
11379         }
11380
11381         radix_tree_init();
11382         cache_tree_init(&root_cache);
11383
11384         if((ret = check_mounted(argv[optind])) < 0) {
11385                 error("could not check mount status: %s", strerror(-ret));
11386                 goto err_out;
11387         } else if(ret) {
11388                 error("%s is currently mounted, aborting", argv[optind]);
11389                 ret = -EBUSY;
11390                 goto err_out;
11391         }
11392
11393         /* only allow partial opening under repair mode */
11394         if (repair)
11395                 ctree_flags |= OPEN_CTREE_PARTIAL;
11396
11397         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11398                                   chunk_root_bytenr, ctree_flags);
11399         if (!info) {
11400                 error("cannot open file system");
11401                 ret = -EIO;
11402                 goto err_out;
11403         }
11404
11405         global_info = info;
11406         root = info->fs_root;
11407
11408         /*
11409          * repair mode will force us to commit transaction which
11410          * will make us fail to load log tree when mounting.
11411          */
11412         if (repair && btrfs_super_log_root(info->super_copy)) {
11413                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11414                 if (!ret) {
11415                         ret = 1;
11416                         goto close_out;
11417                 }
11418                 ret = zero_log_tree(root);
11419                 if (ret) {
11420                         error("failed to zero log tree: %d", ret);
11421                         goto close_out;
11422                 }
11423         }
11424
11425         uuid_unparse(info->super_copy->fsid, uuidbuf);
11426         if (qgroup_report) {
11427                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11428                        uuidbuf);
11429                 ret = qgroup_verify_all(info);
11430                 if (ret == 0)
11431                         report_qgroups(1);
11432                 goto close_out;
11433         }
11434         if (subvolid) {
11435                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11436                        subvolid, argv[optind], uuidbuf);
11437                 ret = print_extent_state(info, subvolid);
11438                 goto close_out;
11439         }
11440         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11441
11442         if (!extent_buffer_uptodate(info->tree_root->node) ||
11443             !extent_buffer_uptodate(info->dev_root->node) ||
11444             !extent_buffer_uptodate(info->chunk_root->node)) {
11445                 error("critical roots corrupted, unable to check the filesystem");
11446                 ret = -EIO;
11447                 goto close_out;
11448         }
11449
11450         if (init_extent_tree || init_csum_tree) {
11451                 struct btrfs_trans_handle *trans;
11452
11453                 trans = btrfs_start_transaction(info->extent_root, 0);
11454                 if (IS_ERR(trans)) {
11455                         error("error starting transaction");
11456                         ret = PTR_ERR(trans);
11457                         goto close_out;
11458                 }
11459
11460                 if (init_extent_tree) {
11461                         printf("Creating a new extent tree\n");
11462                         ret = reinit_extent_tree(trans, info);
11463                         if (ret)
11464                                 goto close_out;
11465                 }
11466
11467                 if (init_csum_tree) {
11468                         printf("Reinitialize checksum tree\n");
11469                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11470                         if (ret) {
11471                                 error("checksum tree initialization failed: %d",
11472                                                 ret);
11473                                 ret = -EIO;
11474                                 goto close_out;
11475                         }
11476
11477                         ret = fill_csum_tree(trans, info->csum_root,
11478                                              init_extent_tree);
11479                         if (ret) {
11480                                 error("checksum tree refilling failed: %d", ret);
11481                                 return -EIO;
11482                         }
11483                 }
11484                 /*
11485                  * Ok now we commit and run the normal fsck, which will add
11486                  * extent entries for all of the items it finds.
11487                  */
11488                 ret = btrfs_commit_transaction(trans, info->extent_root);
11489                 if (ret)
11490                         goto close_out;
11491         }
11492         if (!extent_buffer_uptodate(info->extent_root->node)) {
11493                 error("critical: extent_root, unable to check the filesystem");
11494                 ret = -EIO;
11495                 goto close_out;
11496         }
11497         if (!extent_buffer_uptodate(info->csum_root->node)) {
11498                 error("critical: csum_root, unable to check the filesystem");
11499                 ret = -EIO;
11500                 goto close_out;
11501         }
11502
11503         if (!ctx.progress_enabled)
11504                 printf("checking extents");
11505         if (check_mode == CHECK_MODE_LOWMEM)
11506                 ret = check_chunks_and_extents_v2(root);
11507         else
11508                 ret = check_chunks_and_extents(root);
11509         if (ret)
11510                 printf("Errors found in extent allocation tree or chunk allocation");
11511
11512         ret = repair_root_items(info);
11513         if (ret < 0)
11514                 goto close_out;
11515         if (repair) {
11516                 fprintf(stderr, "Fixed %d roots.\n", ret);
11517                 ret = 0;
11518         } else if (ret > 0) {
11519                 fprintf(stderr,
11520                        "Found %d roots with an outdated root item.\n",
11521                        ret);
11522                 fprintf(stderr,
11523                         "Please run a filesystem check with the option --repair to fix them.\n");
11524                 ret = 1;
11525                 goto close_out;
11526         }
11527
11528         if (!ctx.progress_enabled) {
11529                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11530                         fprintf(stderr, "checking free space tree\n");
11531                 else
11532                         fprintf(stderr, "checking free space cache\n");
11533         }
11534         ret = check_space_cache(root);
11535         if (ret)
11536                 goto out;
11537
11538         /*
11539          * We used to have to have these hole extents in between our real
11540          * extents so if we don't have this flag set we need to make sure there
11541          * are no gaps in the file extents for inodes, otherwise we can just
11542          * ignore it when this happens.
11543          */
11544         no_holes = btrfs_fs_incompat(root->fs_info,
11545                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11546         if (!ctx.progress_enabled)
11547                 fprintf(stderr, "checking fs roots\n");
11548         ret = check_fs_roots(root, &root_cache);
11549         if (ret)
11550                 goto out;
11551
11552         fprintf(stderr, "checking csums\n");
11553         ret = check_csums(root);
11554         if (ret)
11555                 goto out;
11556
11557         fprintf(stderr, "checking root refs\n");
11558         ret = check_root_refs(root, &root_cache);
11559         if (ret)
11560                 goto out;
11561
11562         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11563                 struct extent_buffer *eb;
11564
11565                 eb = list_first_entry(&root->fs_info->recow_ebs,
11566                                       struct extent_buffer, recow);
11567                 list_del_init(&eb->recow);
11568                 ret = recow_extent_buffer(root, eb);
11569                 if (ret)
11570                         break;
11571         }
11572
11573         while (!list_empty(&delete_items)) {
11574                 struct bad_item *bad;
11575
11576                 bad = list_first_entry(&delete_items, struct bad_item, list);
11577                 list_del_init(&bad->list);
11578                 if (repair)
11579                         ret = delete_bad_item(root, bad);
11580                 free(bad);
11581         }
11582
11583         if (info->quota_enabled) {
11584                 int err;
11585                 fprintf(stderr, "checking quota groups\n");
11586                 err = qgroup_verify_all(info);
11587                 if (err)
11588                         goto out;
11589                 report_qgroups(0);
11590                 err = repair_qgroups(info, &qgroups_repaired);
11591                 if (err)
11592                         goto out;
11593         }
11594
11595         if (!list_empty(&root->fs_info->recow_ebs)) {
11596                 error("transid errors in file system");
11597                 ret = 1;
11598         }
11599 out:
11600         /* Don't override original ret */
11601         if (!ret && qgroups_repaired)
11602                 ret = qgroups_repaired;
11603
11604         if (found_old_backref) { /*
11605                  * there was a disk format change when mixed
11606                  * backref was in testing tree. The old format
11607                  * existed about one week.
11608                  */
11609                 printf("\n * Found old mixed backref format. "
11610                        "The old format is not supported! *"
11611                        "\n * Please mount the FS in readonly mode, "
11612                        "backup data and re-format the FS. *\n\n");
11613                 ret = 1;
11614         }
11615         printf("found %llu bytes used err is %d\n",
11616                (unsigned long long)bytes_used, ret);
11617         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11618         printf("total tree bytes: %llu\n",
11619                (unsigned long long)total_btree_bytes);
11620         printf("total fs tree bytes: %llu\n",
11621                (unsigned long long)total_fs_tree_bytes);
11622         printf("total extent tree bytes: %llu\n",
11623                (unsigned long long)total_extent_tree_bytes);
11624         printf("btree space waste bytes: %llu\n",
11625                (unsigned long long)btree_space_waste);
11626         printf("file data blocks allocated: %llu\n referenced %llu\n",
11627                 (unsigned long long)data_bytes_allocated,
11628                 (unsigned long long)data_bytes_referenced);
11629
11630         free_qgroup_counts();
11631         free_root_recs_tree(&root_cache);
11632 close_out:
11633         close_ctree(root);
11634 err_out:
11635         if (ctx.progress_enabled)
11636                 task_deinit(ctx.info);
11637
11638         return ret;
11639 }