btrfs-progs: check: lowmem: Fix silent error if first inode item missing
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216                              int *level, struct node_refs *nrefs, int ext_ref)
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct extent_buffer *next;
2222         struct extent_buffer *cur;
2223         u32 blocksize;
2224         int ret;
2225
2226         WARN_ON(*level < 0);
2227         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2228
2229         ret = update_nodes_refs(root, path->nodes[*level]->start,
2230                                 nrefs, *level);
2231         if (ret < 0)
2232                 return ret;
2233
2234         while (*level >= 0) {
2235                 WARN_ON(*level < 0);
2236                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237                 cur = path->nodes[*level];
2238
2239                 if (btrfs_header_level(cur) != *level)
2240                         WARN_ON(1);
2241
2242                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243                         break;
2244                 /* Don't forgot to check leaf/node validation */
2245                 if (*level == 0) {
2246                         ret = btrfs_check_leaf(root, NULL, cur);
2247                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248                                 ret = -EIO;
2249                                 break;
2250                         }
2251                         ret = process_one_leaf_v2(root, path, nrefs,
2252                                                   level, ext_ref);
2253                         break;
2254                 } else {
2255                         ret = btrfs_check_node(root, NULL, cur);
2256                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257                                 ret = -EIO;
2258                                 break;
2259                         }
2260                 }
2261                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263                 blocksize = root->nodesize;
2264
2265                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266                 if (ret)
2267                         break;
2268                 if (!nrefs->need_check[*level - 1]) {
2269                         path->slots[*level]++;
2270                         continue;
2271                 }
2272
2273                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root, bytenr, blocksize,
2278                                                ptr_gen);
2279                         if (!extent_buffer_uptodate(next)) {
2280                                 struct btrfs_key node_key;
2281
2282                                 btrfs_node_key_to_cpu(path->nodes[*level],
2283                                                       &node_key,
2284                                                       path->slots[*level]);
2285                                 btrfs_add_corrupt_extent_record(root->fs_info,
2286                                                 &node_key,
2287                                                 path->nodes[*level]->start,
2288                                                 root->nodesize, *level);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret < 0) 
2296                         break;
2297
2298                 if (btrfs_is_leaf(next))
2299                         status = btrfs_check_leaf(root, NULL, next);
2300                 else
2301                         status = btrfs_check_node(root, NULL, next);
2302                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303                         free_extent_buffer(next);
2304                         ret = -EIO;
2305                         break;
2306                 }
2307
2308                 *level = *level - 1;
2309                 free_extent_buffer(path->nodes[*level]);
2310                 path->nodes[*level] = next;
2311                 path->slots[*level] = 0;
2312         }
2313         return ret;
2314 }
2315
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317                         struct walk_control *wc, int *level)
2318 {
2319         int i;
2320         struct extent_buffer *leaf;
2321
2322         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323                 leaf = path->nodes[i];
2324                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325                         path->slots[i]++;
2326                         *level = i;
2327                         return 0;
2328                 } else {
2329                         free_extent_buffer(path->nodes[*level]);
2330                         path->nodes[*level] = NULL;
2331                         BUG_ON(*level > wc->active_node);
2332                         if (*level == wc->active_node)
2333                                 leave_shared_node(root, wc, *level);
2334                         *level = i + 1;
2335                 }
2336         }
2337         return 1;
2338 }
2339
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341                            int *level)
2342 {
2343         int i;
2344         struct extent_buffer *leaf;
2345
2346         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347                 leaf = path->nodes[i];
2348                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349                         path->slots[i]++;
2350                         *level = i;
2351                         return 0;
2352                 } else {
2353                         free_extent_buffer(path->nodes[*level]);
2354                         path->nodes[*level] = NULL;
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int check_root_dir(struct inode_record *rec)
2362 {
2363         struct inode_backref *backref;
2364         int ret = -1;
2365
2366         if (!rec->found_inode_item || rec->errors)
2367                 goto out;
2368         if (rec->nlink != 1 || rec->found_link != 0)
2369                 goto out;
2370         if (list_empty(&rec->backrefs))
2371                 goto out;
2372         backref = to_inode_backref(rec->backrefs.next);
2373         if (!backref->found_inode_ref)
2374                 goto out;
2375         if (backref->index != 0 || backref->namelen != 2 ||
2376             memcmp(backref->name, "..", 2))
2377                 goto out;
2378         if (backref->found_dir_index || backref->found_dir_item)
2379                 goto out;
2380         ret = 0;
2381 out:
2382         return ret;
2383 }
2384
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386                               struct btrfs_root *root, struct btrfs_path *path,
2387                               struct inode_record *rec)
2388 {
2389         struct btrfs_inode_item *ei;
2390         struct btrfs_key key;
2391         int ret;
2392
2393         key.objectid = rec->ino;
2394         key.type = BTRFS_INODE_ITEM_KEY;
2395         key.offset = (u64)-1;
2396
2397         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398         if (ret < 0)
2399                 goto out;
2400         if (ret) {
2401                 if (!path->slots[0]) {
2402                         ret = -ENOENT;
2403                         goto out;
2404                 }
2405                 path->slots[0]--;
2406                 ret = 0;
2407         }
2408         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409         if (key.objectid != rec->ino) {
2410                 ret = -ENOENT;
2411                 goto out;
2412         }
2413
2414         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415                             struct btrfs_inode_item);
2416         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417         btrfs_mark_buffer_dirty(path->nodes[0]);
2418         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420                root->root_key.objectid);
2421 out:
2422         btrfs_release_path(path);
2423         return ret;
2424 }
2425
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427                                     struct btrfs_root *root,
2428                                     struct btrfs_path *path,
2429                                     struct inode_record *rec)
2430 {
2431         int ret;
2432
2433         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434         btrfs_release_path(path);
2435         if (!ret)
2436                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437         return ret;
2438 }
2439
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441                                struct btrfs_root *root,
2442                                struct btrfs_path *path,
2443                                struct inode_record *rec)
2444 {
2445         struct btrfs_inode_item *ei;
2446         struct btrfs_key key;
2447         int ret = 0;
2448
2449         key.objectid = rec->ino;
2450         key.type = BTRFS_INODE_ITEM_KEY;
2451         key.offset = 0;
2452
2453         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454         if (ret) {
2455                 if (ret > 0)
2456                         ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         /* Since ret == 0, no need to check anything */
2461         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462                             struct btrfs_inode_item);
2463         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464         btrfs_mark_buffer_dirty(path->nodes[0]);
2465         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466         printf("reset nbytes for ino %llu root %llu\n",
2467                rec->ino, root->root_key.objectid);
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474                                  struct cache_tree *inode_cache,
2475                                  struct inode_record *rec,
2476                                  struct inode_backref *backref)
2477 {
2478         struct btrfs_path path;
2479         struct btrfs_trans_handle *trans;
2480         struct btrfs_dir_item *dir_item;
2481         struct extent_buffer *leaf;
2482         struct btrfs_key key;
2483         struct btrfs_disk_key disk_key;
2484         struct inode_record *dir_rec;
2485         unsigned long name_ptr;
2486         u32 data_size = sizeof(*dir_item) + backref->namelen;
2487         int ret;
2488
2489         trans = btrfs_start_transaction(root, 1);
2490         if (IS_ERR(trans))
2491                 return PTR_ERR(trans);
2492
2493         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494                 (unsigned long long)rec->ino);
2495
2496         btrfs_init_path(&path);
2497         key.objectid = backref->dir;
2498         key.type = BTRFS_DIR_INDEX_KEY;
2499         key.offset = backref->index;
2500         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501         BUG_ON(ret);
2502
2503         leaf = path.nodes[0];
2504         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2505
2506         disk_key.objectid = cpu_to_le64(rec->ino);
2507         disk_key.type = BTRFS_INODE_ITEM_KEY;
2508         disk_key.offset = 0;
2509
2510         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512         btrfs_set_dir_data_len(leaf, dir_item, 0);
2513         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514         name_ptr = (unsigned long)(dir_item + 1);
2515         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516         btrfs_mark_buffer_dirty(leaf);
2517         btrfs_release_path(&path);
2518         btrfs_commit_transaction(trans, root);
2519
2520         backref->found_dir_index = 1;
2521         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522         BUG_ON(IS_ERR(dir_rec));
2523         if (!dir_rec)
2524                 return 0;
2525         dir_rec->found_size += backref->namelen;
2526         if (dir_rec->found_size == dir_rec->isize &&
2527             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529         if (dir_rec->found_size != dir_rec->isize)
2530                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2531
2532         return 0;
2533 }
2534
2535 static int delete_dir_index(struct btrfs_root *root,
2536                             struct inode_backref *backref)
2537 {
2538         struct btrfs_trans_handle *trans;
2539         struct btrfs_dir_item *di;
2540         struct btrfs_path path;
2541         int ret = 0;
2542
2543         trans = btrfs_start_transaction(root, 1);
2544         if (IS_ERR(trans))
2545                 return PTR_ERR(trans);
2546
2547         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548                 (unsigned long long)backref->dir,
2549                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550                 (unsigned long long)root->objectid);
2551
2552         btrfs_init_path(&path);
2553         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554                                     backref->name, backref->namelen,
2555                                     backref->index, -1);
2556         if (IS_ERR(di)) {
2557                 ret = PTR_ERR(di);
2558                 btrfs_release_path(&path);
2559                 btrfs_commit_transaction(trans, root);
2560                 if (ret == -ENOENT)
2561                         return 0;
2562                 return ret;
2563         }
2564
2565         if (!di)
2566                 ret = btrfs_del_item(trans, root, &path);
2567         else
2568                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569         BUG_ON(ret);
2570         btrfs_release_path(&path);
2571         btrfs_commit_transaction(trans, root);
2572         return ret;
2573 }
2574
2575 static int create_inode_item(struct btrfs_root *root,
2576                              struct inode_record *rec,
2577                              int root_dir)
2578 {
2579         struct btrfs_trans_handle *trans;
2580         struct btrfs_inode_item inode_item;
2581         time_t now = time(NULL);
2582         int ret;
2583
2584         trans = btrfs_start_transaction(root, 1);
2585         if (IS_ERR(trans)) {
2586                 ret = PTR_ERR(trans);
2587                 return ret;
2588         }
2589
2590         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591                 "be incomplete, please check permissions and content after "
2592                 "the fsck completes.\n", (unsigned long long)root->objectid,
2593                 (unsigned long long)rec->ino);
2594
2595         memset(&inode_item, 0, sizeof(inode_item));
2596         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597         if (root_dir)
2598                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599         else
2600                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602         if (rec->found_dir_item) {
2603                 if (rec->found_file_extent)
2604                         fprintf(stderr, "root %llu inode %llu has both a dir "
2605                                 "item and extents, unsure if it is a dir or a "
2606                                 "regular file so setting it as a directory\n",
2607                                 (unsigned long long)root->objectid,
2608                                 (unsigned long long)rec->ino);
2609                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611         } else if (!rec->found_dir_item) {
2612                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2614         }
2615         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2623
2624         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625         BUG_ON(ret);
2626         btrfs_commit_transaction(trans, root);
2627         return 0;
2628 }
2629
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631                                  struct inode_record *rec,
2632                                  struct cache_tree *inode_cache,
2633                                  int delete)
2634 {
2635         struct inode_backref *tmp, *backref;
2636         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637         int ret = 0;
2638         int repaired = 0;
2639
2640         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641                 if (!delete && rec->ino == root_dirid) {
2642                         if (!rec->found_inode_item) {
2643                                 ret = create_inode_item(root, rec, 1);
2644                                 if (ret)
2645                                         break;
2646                                 repaired++;
2647                         }
2648                 }
2649
2650                 /* Index 0 for root dir's are special, don't mess with it */
2651                 if (rec->ino == root_dirid && backref->index == 0)
2652                         continue;
2653
2654                 if (delete &&
2655                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2656                      (backref->found_dir_index && backref->found_inode_ref &&
2657                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658                         ret = delete_dir_index(root, backref);
2659                         if (ret)
2660                                 break;
2661                         repaired++;
2662                         list_del(&backref->list);
2663                         free(backref);
2664                 }
2665
2666                 if (!delete && !backref->found_dir_index &&
2667                     backref->found_dir_item && backref->found_inode_ref) {
2668                         ret = add_missing_dir_index(root, inode_cache, rec,
2669                                                     backref);
2670                         if (ret)
2671                                 break;
2672                         repaired++;
2673                         if (backref->found_dir_item &&
2674                             backref->found_dir_index &&
2675                             backref->found_dir_index) {
2676                                 if (!backref->errors &&
2677                                     backref->found_inode_ref) {
2678                                         list_del(&backref->list);
2679                                         free(backref);
2680                                 }
2681                         }
2682                 }
2683
2684                 if (!delete && (!backref->found_dir_index &&
2685                                 !backref->found_dir_item &&
2686                                 backref->found_inode_ref)) {
2687                         struct btrfs_trans_handle *trans;
2688                         struct btrfs_key location;
2689
2690                         ret = check_dir_conflict(root, backref->name,
2691                                                  backref->namelen,
2692                                                  backref->dir,
2693                                                  backref->index);
2694                         if (ret) {
2695                                 /*
2696                                  * let nlink fixing routine to handle it,
2697                                  * which can do it better.
2698                                  */
2699                                 ret = 0;
2700                                 break;
2701                         }
2702                         location.objectid = rec->ino;
2703                         location.type = BTRFS_INODE_ITEM_KEY;
2704                         location.offset = 0;
2705
2706                         trans = btrfs_start_transaction(root, 1);
2707                         if (IS_ERR(trans)) {
2708                                 ret = PTR_ERR(trans);
2709                                 break;
2710                         }
2711                         fprintf(stderr, "adding missing dir index/item pair "
2712                                 "for inode %llu\n",
2713                                 (unsigned long long)rec->ino);
2714                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2715                                                     backref->namelen,
2716                                                     backref->dir, &location,
2717                                                     imode_to_type(rec->imode),
2718                                                     backref->index);
2719                         BUG_ON(ret);
2720                         btrfs_commit_transaction(trans, root);
2721                         repaired++;
2722                 }
2723
2724                 if (!delete && (backref->found_inode_ref &&
2725                                 backref->found_dir_index &&
2726                                 backref->found_dir_item &&
2727                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728                                 !rec->found_inode_item)) {
2729                         ret = create_inode_item(root, rec, 0);
2730                         if (ret)
2731                                 break;
2732                         repaired++;
2733                 }
2734
2735         }
2736         return ret ? ret : repaired;
2737 }
2738
2739 /*
2740  * To determine the file type for nlink/inode_item repair
2741  *
2742  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743  * Return -ENOENT if file type is not found.
2744  */
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2746 {
2747         struct inode_backref *backref;
2748
2749         /* For inode item recovered case */
2750         if (rec->found_inode_item) {
2751                 *type = imode_to_type(rec->imode);
2752                 return 0;
2753         }
2754
2755         list_for_each_entry(backref, &rec->backrefs, list) {
2756                 if (backref->found_dir_index || backref->found_dir_item) {
2757                         *type = backref->filetype;
2758                         return 0;
2759                 }
2760         }
2761         return -ENOENT;
2762 }
2763
2764 /*
2765  * To determine the file name for nlink repair
2766  *
2767  * Return 0 if file name is found, set name and namelen.
2768  * Return -ENOENT if file name is not found.
2769  */
2770 static int find_file_name(struct inode_record *rec,
2771                           char *name, int *namelen)
2772 {
2773         struct inode_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->found_dir_index || backref->found_dir_item ||
2777                     backref->found_inode_ref) {
2778                         memcpy(name, backref->name, backref->namelen);
2779                         *namelen = backref->namelen;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788                        struct btrfs_root *root,
2789                        struct btrfs_path *path,
2790                        struct inode_record *rec)
2791 {
2792         struct inode_backref *backref;
2793         struct inode_backref *tmp;
2794         struct btrfs_key key;
2795         struct btrfs_inode_item *inode_item;
2796         int ret = 0;
2797
2798         /* We don't believe this either, reset it and iterate backref */
2799         rec->found_link = 0;
2800
2801         /* Remove all backref including the valid ones */
2802         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804                                    backref->index, backref->name,
2805                                    backref->namelen, 0);
2806                 if (ret < 0)
2807                         goto out;
2808
2809                 /* remove invalid backref, so it won't be added back */
2810                 if (!(backref->found_dir_index &&
2811                       backref->found_dir_item &&
2812                       backref->found_inode_ref)) {
2813                         list_del(&backref->list);
2814                         free(backref);
2815                 } else {
2816                         rec->found_link++;
2817                 }
2818         }
2819
2820         /* Set nlink to 0 */
2821         key.objectid = rec->ino;
2822         key.type = BTRFS_INODE_ITEM_KEY;
2823         key.offset = 0;
2824         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825         if (ret < 0)
2826                 goto out;
2827         if (ret > 0) {
2828                 ret = -ENOENT;
2829                 goto out;
2830         }
2831         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                                     struct btrfs_inode_item);
2833         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         btrfs_release_path(path);
2836
2837         /*
2838          * Add back valid inode_ref/dir_item/dir_index,
2839          * add_link() will handle the nlink inc, so new nlink must be correct
2840          */
2841         list_for_each_entry(backref, &rec->backrefs, list) {
2842                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843                                      backref->name, backref->namelen,
2844                                      backref->filetype, &backref->index, 1);
2845                 if (ret < 0)
2846                         goto out;
2847         }
2848 out:
2849         btrfs_release_path(path);
2850         return ret;
2851 }
2852
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854                                 struct btrfs_root *root,
2855                                 struct btrfs_path *path,
2856                                 u64 *highest_ino)
2857 {
2858         struct btrfs_key key, found_key;
2859         int ret;
2860
2861         btrfs_init_path(path);
2862         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863         key.offset = -1;
2864         key.type = BTRFS_INODE_ITEM_KEY;
2865         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866         if (ret == 1) {
2867                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868                                 path->slots[0] - 1);
2869                 *highest_ino = found_key.objectid;
2870                 ret = 0;
2871         }
2872         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873                 ret = -EOVERFLOW;
2874         btrfs_release_path(path);
2875         return ret;
2876 }
2877
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879                                struct btrfs_root *root,
2880                                struct btrfs_path *path,
2881                                struct inode_record *rec)
2882 {
2883         char *dir_name = "lost+found";
2884         char namebuf[BTRFS_NAME_LEN] = {0};
2885         u64 lost_found_ino;
2886         u32 mode = 0700;
2887         u8 type = 0;
2888         int namelen = 0;
2889         int name_recovered = 0;
2890         int type_recovered = 0;
2891         int ret = 0;
2892
2893         /*
2894          * Get file name and type first before these invalid inode ref
2895          * are deleted by remove_all_invalid_backref()
2896          */
2897         name_recovered = !find_file_name(rec, namebuf, &namelen);
2898         type_recovered = !find_file_type(rec, &type);
2899
2900         if (!name_recovered) {
2901                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902                        rec->ino, rec->ino);
2903                 namelen = count_digits(rec->ino);
2904                 sprintf(namebuf, "%llu", rec->ino);
2905                 name_recovered = 1;
2906         }
2907         if (!type_recovered) {
2908                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909                        rec->ino);
2910                 type = BTRFS_FT_REG_FILE;
2911                 type_recovered = 1;
2912         }
2913
2914         ret = reset_nlink(trans, root, path, rec);
2915         if (ret < 0) {
2916                 fprintf(stderr,
2917                         "Failed to reset nlink for inode %llu: %s\n",
2918                         rec->ino, strerror(-ret));
2919                 goto out;
2920         }
2921
2922         if (rec->found_link == 0) {
2923                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924                 if (ret < 0)
2925                         goto out;
2926                 lost_found_ino++;
2927                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929                                   mode);
2930                 if (ret < 0) {
2931                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932                                 dir_name, strerror(-ret));
2933                         goto out;
2934                 }
2935                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936                                      namebuf, namelen, type, NULL, 1);
2937                 /*
2938                  * Add ".INO" suffix several times to handle case where
2939                  * "FILENAME.INO" is already taken by another file.
2940                  */
2941                 while (ret == -EEXIST) {
2942                         /*
2943                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2944                          */
2945                         if (namelen + count_digits(rec->ino) + 1 >
2946                             BTRFS_NAME_LEN) {
2947                                 ret = -EFBIG;
2948                                 goto out;
2949                         }
2950                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951                                  ".%llu", rec->ino);
2952                         namelen += count_digits(rec->ino) + 1;
2953                         ret = btrfs_add_link(trans, root, rec->ino,
2954                                              lost_found_ino, namebuf,
2955                                              namelen, type, NULL, 1);
2956                 }
2957                 if (ret < 0) {
2958                         fprintf(stderr,
2959                                 "Failed to link the inode %llu to %s dir: %s\n",
2960                                 rec->ino, dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 /*
2964                  * Just increase the found_link, don't actually add the
2965                  * backref. This will make things easier and this inode
2966                  * record will be freed after the repair is done.
2967                  * So fsck will not report problem about this inode.
2968                  */
2969                 rec->found_link++;
2970                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971                        namelen, namebuf, dir_name);
2972         }
2973         printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2975         /*
2976          * Clear the flag anyway, or we will loop forever for the same inode
2977          * as it will not be removed from the bad inode list and the dead loop
2978          * happens.
2979          */
2980         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981         btrfs_release_path(path);
2982         return ret;
2983 }
2984
2985 /*
2986  * Check if there is any normal(reg or prealloc) file extent for given
2987  * ino.
2988  * This is used to determine the file type when neither its dir_index/item or
2989  * inode_item exists.
2990  *
2991  * This will *NOT* report error, if any error happens, just consider it does
2992  * not have any normal file extent.
2993  */
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2995 {
2996         struct btrfs_path path;
2997         struct btrfs_key key;
2998         struct btrfs_key found_key;
2999         struct btrfs_file_extent_item *fi;
3000         u8 type;
3001         int ret = 0;
3002
3003         btrfs_init_path(&path);
3004         key.objectid = ino;
3005         key.type = BTRFS_EXTENT_DATA_KEY;
3006         key.offset = 0;
3007
3008         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009         if (ret < 0) {
3010                 ret = 0;
3011                 goto out;
3012         }
3013         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014                 ret = btrfs_next_leaf(root, &path);
3015                 if (ret) {
3016                         ret = 0;
3017                         goto out;
3018                 }
3019         }
3020         while (1) {
3021                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022                                       path.slots[0]);
3023                 if (found_key.objectid != ino ||
3024                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3025                         break;
3026                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027                                     struct btrfs_file_extent_item);
3028                 type = btrfs_file_extent_type(path.nodes[0], fi);
3029                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030                         ret = 1;
3031                         goto out;
3032                 }
3033         }
3034 out:
3035         btrfs_release_path(&path);
3036         return ret;
3037 }
3038
3039 static u32 btrfs_type_to_imode(u8 type)
3040 {
3041         static u32 imode_by_btrfs_type[] = {
3042                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3043                 [BTRFS_FT_DIR]          = S_IFDIR,
3044                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3045                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3046                 [BTRFS_FT_FIFO]         = S_IFIFO,
3047                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3048                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3049         };
3050
3051         return imode_by_btrfs_type[(type)];
3052 }
3053
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055                                 struct btrfs_root *root,
3056                                 struct btrfs_path *path,
3057                                 struct inode_record *rec)
3058 {
3059         u8 filetype;
3060         u32 mode = 0700;
3061         int type_recovered = 0;
3062         int ret = 0;
3063
3064         printf("Trying to rebuild inode:%llu\n", rec->ino);
3065
3066         type_recovered = !find_file_type(rec, &filetype);
3067
3068         /*
3069          * Try to determine inode type if type not found.
3070          *
3071          * For found regular file extent, it must be FILE.
3072          * For found dir_item/index, it must be DIR.
3073          *
3074          * For undetermined one, use FILE as fallback.
3075          *
3076          * TODO:
3077          * 1. If found backref(inode_index/item is already handled) to it,
3078          *    it must be DIR.
3079          *    Need new inode-inode ref structure to allow search for that.
3080          */
3081         if (!type_recovered) {
3082                 if (rec->found_file_extent &&
3083                     find_normal_file_extent(root, rec->ino)) {
3084                         type_recovered = 1;
3085                         filetype = BTRFS_FT_REG_FILE;
3086                 } else if (rec->found_dir_item) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_DIR;
3089                 } else if (!list_empty(&rec->orphan_extents)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else{
3093                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094                                rec->ino);
3095                         type_recovered = 1;
3096                         filetype = BTRFS_FT_REG_FILE;
3097                 }
3098         }
3099
3100         ret = btrfs_new_inode(trans, root, rec->ino,
3101                               mode | btrfs_type_to_imode(filetype));
3102         if (ret < 0)
3103                 goto out;
3104
3105         /*
3106          * Here inode rebuild is done, we only rebuild the inode item,
3107          * don't repair the nlink(like move to lost+found).
3108          * That is the job of nlink repair.
3109          *
3110          * We just fill the record and return
3111          */
3112         rec->found_dir_item = 1;
3113         rec->imode = mode | btrfs_type_to_imode(filetype);
3114         rec->nlink = 0;
3115         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116         /* Ensure the inode_nlinks repair function will be called */
3117         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119         return ret;
3120 }
3121
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123                                       struct btrfs_root *root,
3124                                       struct btrfs_path *path,
3125                                       struct inode_record *rec)
3126 {
3127         struct orphan_data_extent *orphan;
3128         struct orphan_data_extent *tmp;
3129         int ret = 0;
3130
3131         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3132                 /*
3133                  * Check for conflicting file extents
3134                  *
3135                  * Here we don't know whether the extents is compressed or not,
3136                  * so we can only assume it not compressed nor data offset,
3137                  * and use its disk_len as extent length.
3138                  */
3139                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140                                        orphan->offset, orphan->disk_len, 0);
3141                 btrfs_release_path(path);
3142                 if (ret < 0)
3143                         goto out;
3144                 if (!ret) {
3145                         fprintf(stderr,
3146                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147                                 orphan->disk_bytenr, orphan->disk_len);
3148                         ret = btrfs_free_extent(trans,
3149                                         root->fs_info->extent_root,
3150                                         orphan->disk_bytenr, orphan->disk_len,
3151                                         0, root->objectid, orphan->objectid,
3152                                         orphan->offset);
3153                         if (ret < 0)
3154                                 goto out;
3155                 }
3156                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157                                 orphan->offset, orphan->disk_bytenr,
3158                                 orphan->disk_len, orphan->disk_len);
3159                 if (ret < 0)
3160                         goto out;
3161
3162                 /* Update file size info */
3163                 rec->found_size += orphan->disk_len;
3164                 if (rec->found_size == rec->nbytes)
3165                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3166
3167                 /* Update the file extent hole info too */
3168                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169                                            orphan->disk_len);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (RB_EMPTY_ROOT(&rec->holes))
3173                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3174
3175                 list_del(&orphan->list);
3176                 free(orphan);
3177         }
3178         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180         return ret;
3181 }
3182
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184                                         struct btrfs_root *root,
3185                                         struct btrfs_path *path,
3186                                         struct inode_record *rec)
3187 {
3188         struct rb_node *node;
3189         struct file_extent_hole *hole;
3190         int found = 0;
3191         int ret = 0;
3192
3193         node = rb_first(&rec->holes);
3194
3195         while (node) {
3196                 found = 1;
3197                 hole = rb_entry(node, struct file_extent_hole, node);
3198                 ret = btrfs_punch_hole(trans, root, rec->ino,
3199                                        hole->start, hole->len);
3200                 if (ret < 0)
3201                         goto out;
3202                 ret = del_file_extent_hole(&rec->holes, hole->start,
3203                                            hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 if (RB_EMPTY_ROOT(&rec->holes))
3207                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208                 node = rb_first(&rec->holes);
3209         }
3210         /* special case for a file losing all its file extent */
3211         if (!found) {
3212                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213                                        round_up(rec->isize, root->sectorsize));
3214                 if (ret < 0)
3215                         goto out;
3216         }
3217         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218                rec->ino, root->objectid);
3219 out:
3220         return ret;
3221 }
3222
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3224 {
3225         struct btrfs_trans_handle *trans;
3226         struct btrfs_path path;
3227         int ret = 0;
3228
3229         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230                              I_ERR_NO_ORPHAN_ITEM |
3231                              I_ERR_LINK_COUNT_WRONG |
3232                              I_ERR_NO_INODE_ITEM |
3233                              I_ERR_FILE_EXTENT_ORPHAN |
3234                              I_ERR_FILE_EXTENT_DISCOUNT|
3235                              I_ERR_FILE_NBYTES_WRONG)))
3236                 return rec->errors;
3237
3238         /*
3239          * For nlink repair, it may create a dir and add link, so
3240          * 2 for parent(256)'s dir_index and dir_item
3241          * 2 for lost+found dir's inode_item and inode_ref
3242          * 1 for the new inode_ref of the file
3243          * 2 for lost+found dir's dir_index and dir_item for the file
3244          */
3245         trans = btrfs_start_transaction(root, 7);
3246         if (IS_ERR(trans))
3247                 return PTR_ERR(trans);
3248
3249         btrfs_init_path(&path);
3250         if (rec->errors & I_ERR_NO_INODE_ITEM)
3251                 ret = repair_inode_no_item(trans, root, &path, rec);
3252         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257                 ret = repair_inode_isize(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261                 ret = repair_inode_nlinks(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263                 ret = repair_inode_nbytes(trans, root, &path, rec);
3264         btrfs_commit_transaction(trans, root);
3265         btrfs_release_path(&path);
3266         return ret;
3267 }
3268
3269 static int check_inode_recs(struct btrfs_root *root,
3270                             struct cache_tree *inode_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int stage = 0;
3277         int ret = 0;
3278         int err = 0;
3279         u64 error = 0;
3280         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3281
3282         if (btrfs_root_refs(&root->root_item) == 0) {
3283                 if (!cache_tree_empty(inode_cache))
3284                         fprintf(stderr, "warning line %d\n", __LINE__);
3285                 return 0;
3286         }
3287
3288         /*
3289          * We need to repair backrefs first because we could change some of the
3290          * errors in the inode recs.
3291          *
3292          * We also need to go through and delete invalid backrefs first and then
3293          * add the correct ones second.  We do this because we may get EEXIST
3294          * when adding back the correct index because we hadn't yet deleted the
3295          * invalid index.
3296          *
3297          * For example, if we were missing a dir index then the directories
3298          * isize would be wrong, so if we fixed the isize to what we thought it
3299          * would be and then fixed the backref we'd still have a invalid fs, so
3300          * we need to add back the dir index and then check to see if the isize
3301          * is still wrong.
3302          */
3303         while (stage < 3) {
3304                 stage++;
3305                 if (stage == 3 && !err)
3306                         break;
3307
3308                 cache = search_cache_extent(inode_cache, 0);
3309                 while (repair && cache) {
3310                         node = container_of(cache, struct ptr_node, cache);
3311                         rec = node->data;
3312                         cache = next_cache_extent(cache);
3313
3314                         /* Need to free everything up and rescan */
3315                         if (stage == 3) {
3316                                 remove_cache_extent(inode_cache, &node->cache);
3317                                 free(node);
3318                                 free_inode_rec(rec);
3319                                 continue;
3320                         }
3321
3322                         if (list_empty(&rec->backrefs))
3323                                 continue;
3324
3325                         ret = repair_inode_backrefs(root, rec, inode_cache,
3326                                                     stage == 1);
3327                         if (ret < 0) {
3328                                 err = ret;
3329                                 stage = 2;
3330                                 break;
3331                         } if (ret > 0) {
3332                                 err = -EAGAIN;
3333                         }
3334                 }
3335         }
3336         if (err)
3337                 return err;
3338
3339         rec = get_inode_rec(inode_cache, root_dirid, 0);
3340         BUG_ON(IS_ERR(rec));
3341         if (rec) {
3342                 ret = check_root_dir(rec);
3343                 if (ret) {
3344                         fprintf(stderr, "root %llu root dir %llu error\n",
3345                                 (unsigned long long)root->root_key.objectid,
3346                                 (unsigned long long)root_dirid);
3347                         print_inode_error(root, rec);
3348                         error++;
3349                 }
3350         } else {
3351                 if (repair) {
3352                         struct btrfs_trans_handle *trans;
3353
3354                         trans = btrfs_start_transaction(root, 1);
3355                         if (IS_ERR(trans)) {
3356                                 err = PTR_ERR(trans);
3357                                 return err;
3358                         }
3359
3360                         fprintf(stderr,
3361                                 "root %llu missing its root dir, recreating\n",
3362                                 (unsigned long long)root->objectid);
3363
3364                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3365                         BUG_ON(ret);
3366
3367                         btrfs_commit_transaction(trans, root);
3368                         return -EAGAIN;
3369                 }
3370
3371                 fprintf(stderr, "root %llu root dir %llu not found\n",
3372                         (unsigned long long)root->root_key.objectid,
3373                         (unsigned long long)root_dirid);
3374         }
3375
3376         while (1) {
3377                 cache = search_cache_extent(inode_cache, 0);
3378                 if (!cache)
3379                         break;
3380                 node = container_of(cache, struct ptr_node, cache);
3381                 rec = node->data;
3382                 remove_cache_extent(inode_cache, &node->cache);
3383                 free(node);
3384                 if (rec->ino == root_dirid ||
3385                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386                         free_inode_rec(rec);
3387                         continue;
3388                 }
3389
3390                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391                         ret = check_orphan_item(root, rec->ino);
3392                         if (ret == 0)
3393                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394                         if (can_free_inode_rec(rec)) {
3395                                 free_inode_rec(rec);
3396                                 continue;
3397                         }
3398                 }
3399
3400                 if (!rec->found_inode_item)
3401                         rec->errors |= I_ERR_NO_INODE_ITEM;
3402                 if (rec->found_link != rec->nlink)
3403                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404                 if (repair) {
3405                         ret = try_repair_inode(root, rec);
3406                         if (ret == 0 && can_free_inode_rec(rec)) {
3407                                 free_inode_rec(rec);
3408                                 continue;
3409                         }
3410                         ret = 0;
3411                 }
3412
3413                 if (!(repair && ret == 0))
3414                         error++;
3415                 print_inode_error(root, rec);
3416                 list_for_each_entry(backref, &rec->backrefs, list) {
3417                         if (!backref->found_dir_item)
3418                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419                         if (!backref->found_dir_index)
3420                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421                         if (!backref->found_inode_ref)
3422                                 backref->errors |= REF_ERR_NO_INODE_REF;
3423                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424                                 " namelen %u name %s filetype %d errors %x",
3425                                 (unsigned long long)backref->dir,
3426                                 (unsigned long long)backref->index,
3427                                 backref->namelen, backref->name,
3428                                 backref->filetype, backref->errors);
3429                         print_ref_error(backref->errors);
3430                 }
3431                 free_inode_rec(rec);
3432         }
3433         return (error > 0) ? -1 : 0;
3434 }
3435
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437                                         u64 objectid)
3438 {
3439         struct cache_extent *cache;
3440         struct root_record *rec = NULL;
3441         int ret;
3442
3443         cache = lookup_cache_extent(root_cache, objectid, 1);
3444         if (cache) {
3445                 rec = container_of(cache, struct root_record, cache);
3446         } else {
3447                 rec = calloc(1, sizeof(*rec));
3448                 if (!rec)
3449                         return ERR_PTR(-ENOMEM);
3450                 rec->objectid = objectid;
3451                 INIT_LIST_HEAD(&rec->backrefs);
3452                 rec->cache.start = objectid;
3453                 rec->cache.size = 1;
3454
3455                 ret = insert_cache_extent(root_cache, &rec->cache);
3456                 if (ret)
3457                         return ERR_PTR(-EEXIST);
3458         }
3459         return rec;
3460 }
3461
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463                                              u64 ref_root, u64 dir, u64 index,
3464                                              const char *name, int namelen)
3465 {
3466         struct root_backref *backref;
3467
3468         list_for_each_entry(backref, &rec->backrefs, list) {
3469                 if (backref->ref_root != ref_root || backref->dir != dir ||
3470                     backref->namelen != namelen)
3471                         continue;
3472                 if (memcmp(name, backref->name, namelen))
3473                         continue;
3474                 return backref;
3475         }
3476
3477         backref = calloc(1, sizeof(*backref) + namelen + 1);
3478         if (!backref)
3479                 return NULL;
3480         backref->ref_root = ref_root;
3481         backref->dir = dir;
3482         backref->index = index;
3483         backref->namelen = namelen;
3484         memcpy(backref->name, name, namelen);
3485         backref->name[namelen] = '\0';
3486         list_add_tail(&backref->list, &rec->backrefs);
3487         return backref;
3488 }
3489
3490 static void free_root_record(struct cache_extent *cache)
3491 {
3492         struct root_record *rec;
3493         struct root_backref *backref;
3494
3495         rec = container_of(cache, struct root_record, cache);
3496         while (!list_empty(&rec->backrefs)) {
3497                 backref = to_root_backref(rec->backrefs.next);
3498                 list_del(&backref->list);
3499                 free(backref);
3500         }
3501
3502         free(rec);
3503 }
3504
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3506
3507 static int add_root_backref(struct cache_tree *root_cache,
3508                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3509                             const char *name, int namelen,
3510                             int item_type, int errors)
3511 {
3512         struct root_record *rec;
3513         struct root_backref *backref;
3514
3515         rec = get_root_rec(root_cache, root_id);
3516         BUG_ON(IS_ERR(rec));
3517         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518         BUG_ON(!backref);
3519
3520         backref->errors |= errors;
3521
3522         if (item_type != BTRFS_DIR_ITEM_KEY) {
3523                 if (backref->found_dir_index || backref->found_back_ref ||
3524                     backref->found_forward_ref) {
3525                         if (backref->index != index)
3526                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527                 } else {
3528                         backref->index = index;
3529                 }
3530         }
3531
3532         if (item_type == BTRFS_DIR_ITEM_KEY) {
3533                 if (backref->found_forward_ref)
3534                         rec->found_ref++;
3535                 backref->found_dir_item = 1;
3536         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537                 backref->found_dir_index = 1;
3538         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539                 if (backref->found_forward_ref)
3540                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3541                 else if (backref->found_dir_item)
3542                         rec->found_ref++;
3543                 backref->found_forward_ref = 1;
3544         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545                 if (backref->found_back_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547                 backref->found_back_ref = 1;
3548         } else {
3549                 BUG_ON(1);
3550         }
3551
3552         if (backref->found_forward_ref && backref->found_dir_item)
3553                 backref->reachable = 1;
3554         return 0;
3555 }
3556
3557 static int merge_root_recs(struct btrfs_root *root,
3558                            struct cache_tree *src_cache,
3559                            struct cache_tree *dst_cache)
3560 {
3561         struct cache_extent *cache;
3562         struct ptr_node *node;
3563         struct inode_record *rec;
3564         struct inode_backref *backref;
3565         int ret = 0;
3566
3567         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568                 free_inode_recs_tree(src_cache);
3569                 return 0;
3570         }
3571
3572         while (1) {
3573                 cache = search_cache_extent(src_cache, 0);
3574                 if (!cache)
3575                         break;
3576                 node = container_of(cache, struct ptr_node, cache);
3577                 rec = node->data;
3578                 remove_cache_extent(src_cache, &node->cache);
3579                 free(node);
3580
3581                 ret = is_child_root(root, root->objectid, rec->ino);
3582                 if (ret < 0)
3583                         break;
3584                 else if (ret == 0)
3585                         goto skip;
3586
3587                 list_for_each_entry(backref, &rec->backrefs, list) {
3588                         BUG_ON(backref->found_inode_ref);
3589                         if (backref->found_dir_item)
3590                                 add_root_backref(dst_cache, rec->ino,
3591                                         root->root_key.objectid, backref->dir,
3592                                         backref->index, backref->name,
3593                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3594                                         backref->errors);
3595                         if (backref->found_dir_index)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3600                                         backref->errors);
3601                 }
3602 skip:
3603                 free_inode_rec(rec);
3604         }
3605         if (ret < 0)
3606                 return ret;
3607         return 0;
3608 }
3609
3610 static int check_root_refs(struct btrfs_root *root,
3611                            struct cache_tree *root_cache)
3612 {
3613         struct root_record *rec;
3614         struct root_record *ref_root;
3615         struct root_backref *backref;
3616         struct cache_extent *cache;
3617         int loop = 1;
3618         int ret;
3619         int error;
3620         int errors = 0;
3621
3622         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623         BUG_ON(IS_ERR(rec));
3624         rec->found_ref = 1;
3625
3626         /* fixme: this can not detect circular references */
3627         while (loop) {
3628                 loop = 0;
3629                 cache = search_cache_extent(root_cache, 0);
3630                 while (1) {
3631                         if (!cache)
3632                                 break;
3633                         rec = container_of(cache, struct root_record, cache);
3634                         cache = next_cache_extent(cache);
3635
3636                         if (rec->found_ref == 0)
3637                                 continue;
3638
3639                         list_for_each_entry(backref, &rec->backrefs, list) {
3640                                 if (!backref->reachable)
3641                                         continue;
3642
3643                                 ref_root = get_root_rec(root_cache,
3644                                                         backref->ref_root);
3645                                 BUG_ON(IS_ERR(ref_root));
3646                                 if (ref_root->found_ref > 0)
3647                                         continue;
3648
3649                                 backref->reachable = 0;
3650                                 rec->found_ref--;
3651                                 if (rec->found_ref == 0)
3652                                         loop = 1;
3653                         }
3654                 }
3655         }
3656
3657         cache = search_cache_extent(root_cache, 0);
3658         while (1) {
3659                 if (!cache)
3660                         break;
3661                 rec = container_of(cache, struct root_record, cache);
3662                 cache = next_cache_extent(cache);
3663
3664                 if (rec->found_ref == 0 &&
3665                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667                         ret = check_orphan_item(root->fs_info->tree_root,
3668                                                 rec->objectid);
3669                         if (ret == 0)
3670                                 continue;
3671
3672                         /*
3673                          * If we don't have a root item then we likely just have
3674                          * a dir item in a snapshot for this root but no actual
3675                          * ref key or anything so it's meaningless.
3676                          */
3677                         if (!rec->found_root_item)
3678                                 continue;
3679                         errors++;
3680                         fprintf(stderr, "fs tree %llu not referenced\n",
3681                                 (unsigned long long)rec->objectid);
3682                 }
3683
3684                 error = 0;
3685                 if (rec->found_ref > 0 && !rec->found_root_item)
3686                         error = 1;
3687                 list_for_each_entry(backref, &rec->backrefs, list) {
3688                         if (!backref->found_dir_item)
3689                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690                         if (!backref->found_dir_index)
3691                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692                         if (!backref->found_back_ref)
3693                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694                         if (!backref->found_forward_ref)
3695                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3696                         if (backref->reachable && backref->errors)
3697                                 error = 1;
3698                 }
3699                 if (!error)
3700                         continue;
3701
3702                 errors++;
3703                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704                         (unsigned long long)rec->objectid, rec->found_ref,
3705                          rec->found_root_item ? "" : "not found");
3706
3707                 list_for_each_entry(backref, &rec->backrefs, list) {
3708                         if (!backref->reachable)
3709                                 continue;
3710                         if (!backref->errors && rec->found_root_item)
3711                                 continue;
3712                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713                                 " index %llu namelen %u name %s errors %x\n",
3714                                 (unsigned long long)backref->ref_root,
3715                                 (unsigned long long)backref->dir,
3716                                 (unsigned long long)backref->index,
3717                                 backref->namelen, backref->name,
3718                                 backref->errors);
3719                         print_ref_error(backref->errors);
3720                 }
3721         }
3722         return errors > 0 ? 1 : 0;
3723 }
3724
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726                             struct btrfs_key *key,
3727                             struct cache_tree *root_cache)
3728 {
3729         u64 dirid;
3730         u64 index;
3731         u32 len;
3732         u32 name_len;
3733         struct btrfs_root_ref *ref;
3734         char namebuf[BTRFS_NAME_LEN];
3735         int error;
3736
3737         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3738
3739         dirid = btrfs_root_ref_dirid(eb, ref);
3740         index = btrfs_root_ref_sequence(eb, ref);
3741         name_len = btrfs_root_ref_name_len(eb, ref);
3742
3743         if (name_len <= BTRFS_NAME_LEN) {
3744                 len = name_len;
3745                 error = 0;
3746         } else {
3747                 len = BTRFS_NAME_LEN;
3748                 error = REF_ERR_NAME_TOO_LONG;
3749         }
3750         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3751
3752         if (key->type == BTRFS_ROOT_REF_KEY) {
3753                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754                                  index, namebuf, len, key->type, error);
3755         } else {
3756                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         }
3759         return 0;
3760 }
3761
3762 static void free_corrupt_block(struct cache_extent *cache)
3763 {
3764         struct btrfs_corrupt_block *corrupt;
3765
3766         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767         free(corrupt);
3768 }
3769
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3771
3772 /*
3773  * Repair the btree of the given root.
3774  *
3775  * The fix is to remove the node key in corrupt_blocks cache_tree.
3776  * and rebalance the tree.
3777  * After the fix, the btree should be writeable.
3778  */
3779 static int repair_btree(struct btrfs_root *root,
3780                         struct cache_tree *corrupt_blocks)
3781 {
3782         struct btrfs_trans_handle *trans;
3783         struct btrfs_path path;
3784         struct btrfs_corrupt_block *corrupt;
3785         struct cache_extent *cache;
3786         struct btrfs_key key;
3787         u64 offset;
3788         int level;
3789         int ret = 0;
3790
3791         if (cache_tree_empty(corrupt_blocks))
3792                 return 0;
3793
3794         trans = btrfs_start_transaction(root, 1);
3795         if (IS_ERR(trans)) {
3796                 ret = PTR_ERR(trans);
3797                 fprintf(stderr, "Error starting transaction: %s\n",
3798                         strerror(-ret));
3799                 return ret;
3800         }
3801         btrfs_init_path(&path);
3802         cache = first_cache_extent(corrupt_blocks);
3803         while (cache) {
3804                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805                                        cache);
3806                 level = corrupt->level;
3807                 path.lowest_level = level;
3808                 key.objectid = corrupt->key.objectid;
3809                 key.type = corrupt->key.type;
3810                 key.offset = corrupt->key.offset;
3811
3812                 /*
3813                  * Here we don't want to do any tree balance, since it may
3814                  * cause a balance with corrupted brother leaf/node,
3815                  * so ins_len set to 0 here.
3816                  * Balance will be done after all corrupt node/leaf is deleted.
3817                  */
3818                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819                 if (ret < 0)
3820                         goto out;
3821                 offset = btrfs_node_blockptr(path.nodes[level],
3822                                              path.slots[level]);
3823
3824                 /* Remove the ptr */
3825                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826                 if (ret < 0)
3827                         goto out;
3828                 /*
3829                  * Remove the corresponding extent
3830                  * return value is not concerned.
3831                  */
3832                 btrfs_release_path(&path);
3833                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834                                         0, root->root_key.objectid,
3835                                         level - 1, 0);
3836                 cache = next_cache_extent(cache);
3837         }
3838
3839         /* Balance the btree using btrfs_search_slot() */
3840         cache = first_cache_extent(corrupt_blocks);
3841         while (cache) {
3842                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843                                        cache);
3844                 memcpy(&key, &corrupt->key, sizeof(key));
3845                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 /* return will always >0 since it won't find the item */
3849                 ret = 0;
3850                 btrfs_release_path(&path);
3851                 cache = next_cache_extent(cache);
3852         }
3853 out:
3854         btrfs_commit_transaction(trans, root);
3855         btrfs_release_path(&path);
3856         return ret;
3857 }
3858
3859 static int check_fs_root(struct btrfs_root *root,
3860                          struct cache_tree *root_cache,
3861                          struct walk_control *wc)
3862 {
3863         int ret = 0;
3864         int err = 0;
3865         int wret;
3866         int level;
3867         struct btrfs_path path;
3868         struct shared_node root_node;
3869         struct root_record *rec;
3870         struct btrfs_root_item *root_item = &root->root_item;
3871         struct cache_tree corrupt_blocks;
3872         struct orphan_data_extent *orphan;
3873         struct orphan_data_extent *tmp;
3874         enum btrfs_tree_block_status status;
3875         struct node_refs nrefs;
3876
3877         /*
3878          * Reuse the corrupt_block cache tree to record corrupted tree block
3879          *
3880          * Unlike the usage in extent tree check, here we do it in a per
3881          * fs/subvol tree base.
3882          */
3883         cache_tree_init(&corrupt_blocks);
3884         root->fs_info->corrupt_blocks = &corrupt_blocks;
3885
3886         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887                 rec = get_root_rec(root_cache, root->root_key.objectid);
3888                 BUG_ON(IS_ERR(rec));
3889                 if (btrfs_root_refs(root_item) > 0)
3890                         rec->found_root_item = 1;
3891         }
3892
3893         btrfs_init_path(&path);
3894         memset(&root_node, 0, sizeof(root_node));
3895         cache_tree_init(&root_node.root_cache);
3896         cache_tree_init(&root_node.inode_cache);
3897         memset(&nrefs, 0, sizeof(nrefs));
3898
3899         /* Move the orphan extent record to corresponding inode_record */
3900         list_for_each_entry_safe(orphan, tmp,
3901                                  &root->orphan_data_extents, list) {
3902                 struct inode_record *inode;
3903
3904                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3905                                       1);
3906                 BUG_ON(IS_ERR(inode));
3907                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908                 list_move(&orphan->list, &inode->orphan_extents);
3909         }
3910
3911         level = btrfs_header_level(root->node);
3912         memset(wc->nodes, 0, sizeof(wc->nodes));
3913         wc->nodes[level] = &root_node;
3914         wc->active_node = level;
3915         wc->root_level = level;
3916
3917         /* We may not have checked the root block, lets do that now */
3918         if (btrfs_is_leaf(root->node))
3919                 status = btrfs_check_leaf(root, NULL, root->node);
3920         else
3921                 status = btrfs_check_node(root, NULL, root->node);
3922         if (status != BTRFS_TREE_BLOCK_CLEAN)
3923                 return -EIO;
3924
3925         if (btrfs_root_refs(root_item) > 0 ||
3926             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927                 path.nodes[level] = root->node;
3928                 extent_buffer_get(root->node);
3929                 path.slots[level] = 0;
3930         } else {
3931                 struct btrfs_key key;
3932                 struct btrfs_disk_key found_key;
3933
3934                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935                 level = root_item->drop_level;
3936                 path.lowest_level = level;
3937                 if (level > btrfs_header_level(root->node) ||
3938                     level >= BTRFS_MAX_LEVEL) {
3939                         error("ignoring invalid drop level: %u", level);
3940                         goto skip_walking;
3941                 }
3942                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943                 if (wret < 0)
3944                         goto skip_walking;
3945                 btrfs_node_key(path.nodes[level], &found_key,
3946                                 path.slots[level]);
3947                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948                                         sizeof(found_key)));
3949         }
3950
3951         while (1) {
3952                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953                 if (wret < 0)
3954                         ret = wret;
3955                 if (wret != 0)
3956                         break;
3957
3958                 wret = walk_up_tree(root, &path, wc, &level);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963         }
3964 skip_walking:
3965         btrfs_release_path(&path);
3966
3967         if (!cache_tree_empty(&corrupt_blocks)) {
3968                 struct cache_extent *cache;
3969                 struct btrfs_corrupt_block *corrupt;
3970
3971                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972                        root->root_key.objectid);
3973                 cache = first_cache_extent(&corrupt_blocks);
3974                 while (cache) {
3975                         corrupt = container_of(cache,
3976                                                struct btrfs_corrupt_block,
3977                                                cache);
3978                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979                                cache->start, corrupt->level,
3980                                corrupt->key.objectid, corrupt->key.type,
3981                                corrupt->key.offset);
3982                         cache = next_cache_extent(cache);
3983                 }
3984                 if (repair) {
3985                         printf("Try to repair the btree for root %llu\n",
3986                                root->root_key.objectid);
3987                         ret = repair_btree(root, &corrupt_blocks);
3988                         if (ret < 0)
3989                                 fprintf(stderr, "Failed to repair btree: %s\n",
3990                                         strerror(-ret));
3991                         if (!ret)
3992                                 printf("Btree for root %llu is fixed\n",
3993                                        root->root_key.objectid);
3994                 }
3995         }
3996
3997         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998         if (err < 0)
3999                 ret = err;
4000
4001         if (root_node.current) {
4002                 root_node.current->checked = 1;
4003                 maybe_free_inode_rec(&root_node.inode_cache,
4004                                 root_node.current);
4005         }
4006
4007         err = check_inode_recs(root, &root_node.inode_cache);
4008         if (!ret)
4009                 ret = err;
4010
4011         free_corrupt_blocks_tree(&corrupt_blocks);
4012         root->fs_info->corrupt_blocks = NULL;
4013         free_orphan_data_extents(&root->orphan_data_extents);
4014         return ret;
4015 }
4016
4017 static int fs_root_objectid(u64 objectid)
4018 {
4019         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021                 return 1;
4022         return is_fstree(objectid);
4023 }
4024
4025 static int check_fs_roots(struct btrfs_root *root,
4026                           struct cache_tree *root_cache)
4027 {
4028         struct btrfs_path path;
4029         struct btrfs_key key;
4030         struct walk_control wc;
4031         struct extent_buffer *leaf, *tree_node;
4032         struct btrfs_root *tmp_root;
4033         struct btrfs_root *tree_root = root->fs_info->tree_root;
4034         int ret;
4035         int err = 0;
4036
4037         if (ctx.progress_enabled) {
4038                 ctx.tp = TASK_FS_ROOTS;
4039                 task_start(ctx.info);
4040         }
4041
4042         /*
4043          * Just in case we made any changes to the extent tree that weren't
4044          * reflected into the free space cache yet.
4045          */
4046         if (repair)
4047                 reset_cached_block_groups(root->fs_info);
4048         memset(&wc, 0, sizeof(wc));
4049         cache_tree_init(&wc.shared);
4050         btrfs_init_path(&path);
4051
4052 again:
4053         key.offset = 0;
4054         key.objectid = 0;
4055         key.type = BTRFS_ROOT_ITEM_KEY;
4056         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057         if (ret < 0) {
4058                 err = 1;
4059                 goto out;
4060         }
4061         tree_node = tree_root->node;
4062         while (1) {
4063                 if (tree_node != tree_root->node) {
4064                         free_root_recs_tree(root_cache);
4065                         btrfs_release_path(&path);
4066                         goto again;
4067                 }
4068                 leaf = path.nodes[0];
4069                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070                         ret = btrfs_next_leaf(tree_root, &path);
4071                         if (ret) {
4072                                 if (ret < 0)
4073                                         err = 1;
4074                                 break;
4075                         }
4076                         leaf = path.nodes[0];
4077                 }
4078                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080                     fs_root_objectid(key.objectid)) {
4081                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082                                 tmp_root = btrfs_read_fs_root_no_cache(
4083                                                 root->fs_info, &key);
4084                         } else {
4085                                 key.offset = (u64)-1;
4086                                 tmp_root = btrfs_read_fs_root(
4087                                                 root->fs_info, &key);
4088                         }
4089                         if (IS_ERR(tmp_root)) {
4090                                 err = 1;
4091                                 goto next;
4092                         }
4093                         ret = check_fs_root(tmp_root, root_cache, &wc);
4094                         if (ret == -EAGAIN) {
4095                                 free_root_recs_tree(root_cache);
4096                                 btrfs_release_path(&path);
4097                                 goto again;
4098                         }
4099                         if (ret)
4100                                 err = 1;
4101                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102                                 btrfs_free_fs_root(tmp_root);
4103                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4105                         process_root_ref(leaf, path.slots[0], &key,
4106                                          root_cache);
4107                 }
4108 next:
4109                 path.slots[0]++;
4110         }
4111 out:
4112         btrfs_release_path(&path);
4113         if (err)
4114                 free_extent_cache_tree(&wc.shared);
4115         if (!cache_tree_empty(&wc.shared))
4116                 fprintf(stderr, "warning line %d\n", __LINE__);
4117
4118         task_stop(ctx.info);
4119
4120         return err;
4121 }
4122
4123 /*
4124  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125  * INODE_REF/INODE_EXTREF match.
4126  *
4127  * @root:       the root of the fs/file tree
4128  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4129  * @key:        the key of the DIR_ITEM/DIR_INDEX
4130  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4131  *              distinguish root_dir between normal dir/file
4132  * @name:       the name in the INODE_REF/INODE_EXTREF
4133  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4134  * @mode:       the st_mode of INODE_ITEM
4135  *
4136  * Return 0 if no error occurred.
4137  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139  * dir/file.
4140  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141  * not match for normal dir/file.
4142  */
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144                          struct btrfs_key *key, u64 index, char *name,
4145                          u32 namelen, u32 mode)
4146 {
4147         struct btrfs_path path;
4148         struct extent_buffer *node;
4149         struct btrfs_dir_item *di;
4150         struct btrfs_key location;
4151         char namebuf[BTRFS_NAME_LEN] = {0};
4152         u32 total;
4153         u32 cur = 0;
4154         u32 len;
4155         u32 name_len;
4156         u32 data_len;
4157         u8 filetype;
4158         int slot;
4159         int ret;
4160
4161         btrfs_init_path(&path);
4162         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163         if (ret < 0) {
4164                 ret = DIR_ITEM_MISSING;
4165                 goto out;
4166         }
4167
4168         /* Process root dir and goto out*/
4169         if (index == 0) {
4170                 if (ret == 0) {
4171                         ret = ROOT_DIR_ERROR;
4172                         error(
4173                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174                                 root->objectid,
4175                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4176                                         "REF" : "EXTREF",
4177                                 ref_key->objectid, ref_key->offset,
4178                                 key->type == BTRFS_DIR_ITEM_KEY ?
4179                                         "DIR_ITEM" : "DIR_INDEX");
4180                 } else {
4181                         ret = 0;
4182                 }
4183
4184                 goto out;
4185         }
4186
4187         /* Process normal file/dir */
4188         if (ret > 0) {
4189                 ret = DIR_ITEM_MISSING;
4190                 error(
4191                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192                         root->objectid,
4193                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194                         ref_key->objectid, ref_key->offset,
4195                         key->type == BTRFS_DIR_ITEM_KEY ?
4196                                 "DIR_ITEM" : "DIR_INDEX",
4197                         key->objectid, key->offset, namelen, name,
4198                         imode_to_type(mode));
4199                 goto out;
4200         }
4201
4202         /* Check whether inode_id/filetype/name match */
4203         node = path.nodes[0];
4204         slot = path.slots[0];
4205         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206         total = btrfs_item_size_nr(node, slot);
4207         while (cur < total) {
4208                 ret = DIR_ITEM_MISMATCH;
4209                 name_len = btrfs_dir_name_len(node, di);
4210                 data_len = btrfs_dir_data_len(node, di);
4211
4212                 btrfs_dir_item_key_to_cpu(node, di, &location);
4213                 if (location.objectid != ref_key->objectid ||
4214                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4215                     location.offset != 0)
4216                         goto next;
4217
4218                 filetype = btrfs_dir_type(node, di);
4219                 if (imode_to_type(mode) != filetype)
4220                         goto next;
4221
4222                 if (name_len <= BTRFS_NAME_LEN) {
4223                         len = name_len;
4224                 } else {
4225                         len = BTRFS_NAME_LEN;
4226                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227                         root->objectid,
4228                         key->type == BTRFS_DIR_ITEM_KEY ?
4229                         "DIR_ITEM" : "DIR_INDEX",
4230                         key->objectid, key->offset, name_len);
4231                 }
4232                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233                 if (len != namelen || strncmp(namebuf, name, len))
4234                         goto next;
4235
4236                 ret = 0;
4237                 goto out;
4238 next:
4239                 len = sizeof(*di) + name_len + data_len;
4240                 di = (struct btrfs_dir_item *)((char *)di + len);
4241                 cur += len;
4242         }
4243         if (ret == DIR_ITEM_MISMATCH)
4244                 error(
4245                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246                         root->objectid,
4247                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248                         ref_key->objectid, ref_key->offset,
4249                         key->type == BTRFS_DIR_ITEM_KEY ?
4250                                 "DIR_ITEM" : "DIR_INDEX",
4251                         key->objectid, key->offset, namelen, name,
4252                         imode_to_type(mode));
4253 out:
4254         btrfs_release_path(&path);
4255         return ret;
4256 }
4257
4258 /*
4259  * Traverse the given INODE_REF and call find_dir_item() to find related
4260  * DIR_ITEM/DIR_INDEX.
4261  *
4262  * @root:       the root of the fs/file tree
4263  * @ref_key:    the key of the INODE_REF
4264  * @refs:       the count of INODE_REF
4265  * @mode:       the st_mode of INODE_ITEM
4266  *
4267  * Return 0 if no error occurred.
4268  */
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270                            struct extent_buffer *node, int slot, u64 *refs,
4271                            int mode)
4272 {
4273         struct btrfs_key key;
4274         struct btrfs_inode_ref *ref;
4275         char namebuf[BTRFS_NAME_LEN] = {0};
4276         u32 total;
4277         u32 cur = 0;
4278         u32 len;
4279         u32 name_len;
4280         u64 index;
4281         int ret, err = 0;
4282
4283         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284         total = btrfs_item_size_nr(node, slot);
4285
4286 next:
4287         /* Update inode ref count */
4288         (*refs)++;
4289
4290         index = btrfs_inode_ref_index(node, ref);
4291         name_len = btrfs_inode_ref_name_len(node, ref);
4292         if (name_len <= BTRFS_NAME_LEN) {
4293                 len = name_len;
4294         } else {
4295                 len = BTRFS_NAME_LEN;
4296                 warning("root %llu INODE_REF[%llu %llu] name too long",
4297                         root->objectid, ref_key->objectid, ref_key->offset);
4298         }
4299
4300         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4301
4302         /* Check root dir ref name */
4303         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305                       root->objectid, ref_key->objectid, ref_key->offset,
4306                       namebuf);
4307                 err |= ROOT_DIR_ERROR;
4308         }
4309
4310         /* Find related DIR_INDEX */
4311         key.objectid = ref_key->offset;
4312         key.type = BTRFS_DIR_INDEX_KEY;
4313         key.offset = index;
4314         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315         err |= ret;
4316
4317         /* Find related dir_item */
4318         key.objectid = ref_key->offset;
4319         key.type = BTRFS_DIR_ITEM_KEY;
4320         key.offset = btrfs_name_hash(namebuf, len);
4321         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322         err |= ret;
4323
4324         len = sizeof(*ref) + name_len;
4325         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326         cur += len;
4327         if (cur < total)
4328                 goto next;
4329
4330         return err;
4331 }
4332
4333 /*
4334  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335  * DIR_ITEM/DIR_INDEX.
4336  *
4337  * @root:       the root of the fs/file tree
4338  * @ref_key:    the key of the INODE_EXTREF
4339  * @refs:       the count of INODE_EXTREF
4340  * @mode:       the st_mode of INODE_ITEM
4341  *
4342  * Return 0 if no error occurred.
4343  */
4344 static int check_inode_extref(struct btrfs_root *root,
4345                               struct btrfs_key *ref_key,
4346                               struct extent_buffer *node, int slot, u64 *refs,
4347                               int mode)
4348 {
4349         struct btrfs_key key;
4350         struct btrfs_inode_extref *extref;
4351         char namebuf[BTRFS_NAME_LEN] = {0};
4352         u32 total;
4353         u32 cur = 0;
4354         u32 len;
4355         u32 name_len;
4356         u64 index;
4357         u64 parent;
4358         int ret;
4359         int err = 0;
4360
4361         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362         total = btrfs_item_size_nr(node, slot);
4363
4364 next:
4365         /* update inode ref count */
4366         (*refs)++;
4367         name_len = btrfs_inode_extref_name_len(node, extref);
4368         index = btrfs_inode_extref_index(node, extref);
4369         parent = btrfs_inode_extref_parent(node, extref);
4370         if (name_len <= BTRFS_NAME_LEN) {
4371                 len = name_len;
4372         } else {
4373                 len = BTRFS_NAME_LEN;
4374                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375                         root->objectid, ref_key->objectid, ref_key->offset);
4376         }
4377         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4378
4379         /* Check root dir ref name */
4380         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382                       root->objectid, ref_key->objectid, ref_key->offset,
4383                       namebuf);
4384                 err |= ROOT_DIR_ERROR;
4385         }
4386
4387         /* find related dir_index */
4388         key.objectid = parent;
4389         key.type = BTRFS_DIR_INDEX_KEY;
4390         key.offset = index;
4391         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392         err |= ret;
4393
4394         /* find related dir_item */
4395         key.objectid = parent;
4396         key.type = BTRFS_DIR_ITEM_KEY;
4397         key.offset = btrfs_name_hash(namebuf, len);
4398         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399         err |= ret;
4400
4401         len = sizeof(*extref) + name_len;
4402         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403         cur += len;
4404
4405         if (cur < total)
4406                 goto next;
4407
4408         return err;
4409 }
4410
4411 /*
4412  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413  * DIR_ITEM/DIR_INDEX match.
4414  *
4415  * @root:       the root of the fs/file tree
4416  * @key:        the key of the INODE_REF/INODE_EXTREF
4417  * @name:       the name in the INODE_REF/INODE_EXTREF
4418  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4419  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420  * to (u64)-1
4421  * @ext_ref:    the EXTENDED_IREF feature
4422  *
4423  * Return 0 if no error occurred.
4424  * Return >0 for error bitmap
4425  */
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427                           char *name, int namelen, u64 index,
4428                           unsigned int ext_ref)
4429 {
4430         struct btrfs_path path;
4431         struct btrfs_inode_ref *ref;
4432         struct btrfs_inode_extref *extref;
4433         struct extent_buffer *node;
4434         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435         u32 total;
4436         u32 cur = 0;
4437         u32 len;
4438         u32 ref_namelen;
4439         u64 ref_index;
4440         u64 parent;
4441         u64 dir_id;
4442         int slot;
4443         int ret;
4444
4445         btrfs_init_path(&path);
4446         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447         if (ret) {
4448                 ret = INODE_REF_MISSING;
4449                 goto extref;
4450         }
4451
4452         node = path.nodes[0];
4453         slot = path.slots[0];
4454
4455         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456         total = btrfs_item_size_nr(node, slot);
4457
4458         /* Iterate all entry of INODE_REF */
4459         while (cur < total) {
4460                 ret = INODE_REF_MISSING;
4461
4462                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463                 ref_index = btrfs_inode_ref_index(node, ref);
4464                 if (index != (u64)-1 && index != ref_index)
4465                         goto next_ref;
4466
4467                 if (ref_namelen <= BTRFS_NAME_LEN) {
4468                         len = ref_namelen;
4469                 } else {
4470                         len = BTRFS_NAME_LEN;
4471                         warning("root %llu INODE %s[%llu %llu] name too long",
4472                                 root->objectid,
4473                                 key->type == BTRFS_INODE_REF_KEY ?
4474                                         "REF" : "EXTREF",
4475                                 key->objectid, key->offset);
4476                 }
4477                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478                                    len);
4479
4480                 if (len != namelen || strncmp(ref_namebuf, name, len))
4481                         goto next_ref;
4482
4483                 ret = 0;
4484                 goto out;
4485 next_ref:
4486                 len = sizeof(*ref) + ref_namelen;
4487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488                 cur += len;
4489         }
4490
4491 extref:
4492         /* Skip if not support EXTENDED_IREF feature */
4493         if (!ext_ref)
4494                 goto out;
4495
4496         btrfs_release_path(&path);
4497         btrfs_init_path(&path);
4498
4499         dir_id = key->offset;
4500         key->type = BTRFS_INODE_EXTREF_KEY;
4501         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4502
4503         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504         if (ret) {
4505                 ret = INODE_REF_MISSING;
4506                 goto out;
4507         }
4508
4509         node = path.nodes[0];
4510         slot = path.slots[0];
4511
4512         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513         cur = 0;
4514         total = btrfs_item_size_nr(node, slot);
4515
4516         /* Iterate all entry of INODE_EXTREF */
4517         while (cur < total) {
4518                 ret = INODE_REF_MISSING;
4519
4520                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521                 ref_index = btrfs_inode_extref_index(node, extref);
4522                 parent = btrfs_inode_extref_parent(node, extref);
4523                 if (index != (u64)-1 && index != ref_index)
4524                         goto next_extref;
4525
4526                 if (parent != dir_id)
4527                         goto next_extref;
4528
4529                 if (ref_namelen <= BTRFS_NAME_LEN) {
4530                         len = ref_namelen;
4531                 } else {
4532                         len = BTRFS_NAME_LEN;
4533                         warning("root %llu INODE %s[%llu %llu] name too long",
4534                                 root->objectid,
4535                                 key->type == BTRFS_INODE_REF_KEY ?
4536                                         "REF" : "EXTREF",
4537                                 key->objectid, key->offset);
4538                 }
4539                 read_extent_buffer(node, ref_namebuf,
4540                                    (unsigned long)(extref + 1), len);
4541
4542                 if (len != namelen || strncmp(ref_namebuf, name, len))
4543                         goto next_extref;
4544
4545                 ret = 0;
4546                 goto out;
4547
4548 next_extref:
4549                 len = sizeof(*extref) + ref_namelen;
4550                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551                 cur += len;
4552
4553         }
4554 out:
4555         btrfs_release_path(&path);
4556         return ret;
4557 }
4558
4559 /*
4560  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4562  *
4563  * @root:       the root of the fs/file tree
4564  * @key:        the key of the INODE_REF/INODE_EXTREF
4565  * @size:       the st_size of the INODE_ITEM
4566  * @ext_ref:    the EXTENDED_IREF feature
4567  *
4568  * Return 0 if no error occurred.
4569  */
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571                           struct extent_buffer *node, int slot, u64 *size,
4572                           unsigned int ext_ref)
4573 {
4574         struct btrfs_dir_item *di;
4575         struct btrfs_inode_item *ii;
4576         struct btrfs_path path;
4577         struct btrfs_key location;
4578         char namebuf[BTRFS_NAME_LEN] = {0};
4579         u32 total;
4580         u32 cur = 0;
4581         u32 len;
4582         u32 name_len;
4583         u32 data_len;
4584         u8 filetype;
4585         u32 mode;
4586         u64 index;
4587         int ret;
4588         int err = 0;
4589
4590         /*
4591          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592          * ignore index check.
4593          */
4594         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4595
4596         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597         total = btrfs_item_size_nr(node, slot);
4598
4599         while (cur < total) {
4600                 data_len = btrfs_dir_data_len(node, di);
4601                 if (data_len)
4602                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604                               "DIR_ITEM" : "DIR_INDEX",
4605                               key->objectid, key->offset, data_len);
4606
4607                 name_len = btrfs_dir_name_len(node, di);
4608                 if (name_len <= BTRFS_NAME_LEN) {
4609                         len = name_len;
4610                 } else {
4611                         len = BTRFS_NAME_LEN;
4612                         warning("root %llu %s[%llu %llu] name too long",
4613                                 root->objectid,
4614                                 key->type == BTRFS_DIR_ITEM_KEY ?
4615                                 "DIR_ITEM" : "DIR_INDEX",
4616                                 key->objectid, key->offset);
4617                 }
4618                 (*size) += name_len;
4619
4620                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621                 filetype = btrfs_dir_type(node, di);
4622
4623                 btrfs_init_path(&path);
4624                 btrfs_dir_item_key_to_cpu(node, di, &location);
4625
4626                 /* Ignore related ROOT_ITEM check */
4627                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628                         goto next;
4629
4630                 /* Check relative INODE_ITEM(existence/filetype) */
4631                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632                 if (ret) {
4633                         err |= INODE_ITEM_MISSING;
4634                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637                               key->offset, location.objectid, name_len,
4638                               namebuf, filetype);
4639                         goto next;
4640                 }
4641
4642                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643                                     struct btrfs_inode_item);
4644                 mode = btrfs_inode_mode(path.nodes[0], ii);
4645
4646                 if (imode_to_type(mode) != filetype) {
4647                         err |= INODE_ITEM_MISMATCH;
4648                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651                               key->offset, name_len, namebuf, filetype);
4652                 }
4653
4654                 /* Check relative INODE_REF/INODE_EXTREF */
4655                 location.type = BTRFS_INODE_REF_KEY;
4656                 location.offset = key->objectid;
4657                 ret = find_inode_ref(root, &location, namebuf, len,
4658                                        index, ext_ref);
4659                 err |= ret;
4660                 if (ret & INODE_REF_MISSING)
4661                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664                               key->offset, name_len, namebuf, filetype);
4665
4666 next:
4667                 btrfs_release_path(&path);
4668                 len = sizeof(*di) + name_len + data_len;
4669                 di = (struct btrfs_dir_item *)((char *)di + len);
4670                 cur += len;
4671
4672                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674                               root->objectid, key->objectid, key->offset);
4675                         break;
4676                 }
4677         }
4678
4679         return err;
4680 }
4681
4682 /*
4683  * Check file extent datasum/hole, update the size of the file extents,
4684  * check and update the last offset of the file extent.
4685  *
4686  * @root:       the root of fs/file tree.
4687  * @fkey:       the key of the file extent.
4688  * @nodatasum:  INODE_NODATASUM feature.
4689  * @size:       the sum of all EXTENT_DATA items size for this inode.
4690  * @end:        the offset of the last extent.
4691  *
4692  * Return 0 if no error occurred.
4693  */
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695                              struct extent_buffer *node, int slot,
4696                              unsigned int nodatasum, u64 *size, u64 *end)
4697 {
4698         struct btrfs_file_extent_item *fi;
4699         u64 disk_bytenr;
4700         u64 disk_num_bytes;
4701         u64 extent_num_bytes;
4702         u64 extent_offset;
4703         u64 csum_found;         /* In byte size, sectorsize aligned */
4704         u64 search_start;       /* Logical range start we search for csum */
4705         u64 search_len;         /* Logical range len we search for csum */
4706         unsigned int extent_type;
4707         unsigned int is_hole;
4708         int compressed = 0;
4709         int ret;
4710         int err = 0;
4711
4712         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4713
4714         /* Check inline extent */
4715         extent_type = btrfs_file_extent_type(node, fi);
4716         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4717                 struct btrfs_item *e = btrfs_item_nr(slot);
4718                 u32 item_inline_len;
4719
4720                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4721                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4722                 compressed = btrfs_file_extent_compression(node, fi);
4723                 if (extent_num_bytes == 0) {
4724                         error(
4725                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4726                                 root->objectid, fkey->objectid, fkey->offset);
4727                         err |= FILE_EXTENT_ERROR;
4728                 }
4729                 if (!compressed && extent_num_bytes != item_inline_len) {
4730                         error(
4731                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4732                                 root->objectid, fkey->objectid, fkey->offset,
4733                                 extent_num_bytes, item_inline_len);
4734                         err |= FILE_EXTENT_ERROR;
4735                 }
4736                 *size += extent_num_bytes;
4737                 return err;
4738         }
4739
4740         /* Check extent type */
4741         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4742                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4743                 err |= FILE_EXTENT_ERROR;
4744                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4745                       root->objectid, fkey->objectid, fkey->offset);
4746                 return err;
4747         }
4748
4749         /* Check REG_EXTENT/PREALLOC_EXTENT */
4750         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4751         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4752         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4753         extent_offset = btrfs_file_extent_offset(node, fi);
4754         compressed = btrfs_file_extent_compression(node, fi);
4755         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4756
4757         /*
4758          * Check EXTENT_DATA csum
4759          *
4760          * For plain (uncompressed) extent, we should only check the range
4761          * we're referring to, as it's possible that part of prealloc extent
4762          * has been written, and has csum:
4763          *
4764          * |<--- Original large preallocated extent A ---->|
4765          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4766          *      No csum                         Has csum
4767          *
4768          * For compressed extent, we should check the whole range.
4769          */
4770         if (!compressed) {
4771                 search_start = disk_bytenr + extent_offset;
4772                 search_len = extent_num_bytes;
4773         } else {
4774                 search_start = disk_bytenr;
4775                 search_len = disk_num_bytes;
4776         }
4777         ret = count_csum_range(root, search_start, search_len, &csum_found);
4778         if (csum_found > 0 && nodatasum) {
4779                 err |= ODD_CSUM_ITEM;
4780                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4781                       root->objectid, fkey->objectid, fkey->offset);
4782         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4783                    !is_hole && (ret < 0 || csum_found < search_len)) {
4784                 err |= CSUM_ITEM_MISSING;
4785                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4786                       root->objectid, fkey->objectid, fkey->offset,
4787                       csum_found, search_len);
4788         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4789                 err |= ODD_CSUM_ITEM;
4790                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4791                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4792         }
4793
4794         /* Check EXTENT_DATA hole */
4795         if (no_holes && is_hole) {
4796                 err |= FILE_EXTENT_ERROR;
4797                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4798                       root->objectid, fkey->objectid, fkey->offset);
4799         } else if (!no_holes && *end != fkey->offset) {
4800                 err |= FILE_EXTENT_ERROR;
4801                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4802                       root->objectid, fkey->objectid, fkey->offset);
4803         }
4804
4805         *end += extent_num_bytes;
4806         if (!is_hole)
4807                 *size += extent_num_bytes;
4808
4809         return err;
4810 }
4811
4812 /*
4813  * Check INODE_ITEM and related ITEMs (the same inode number)
4814  * 1. check link count
4815  * 2. check inode ref/extref
4816  * 3. check dir item/index
4817  *
4818  * @ext_ref:    the EXTENDED_IREF feature
4819  *
4820  * Return 0 if no error occurred.
4821  * Return >0 for error or hit the traversal is done(by error bitmap)
4822  */
4823 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4824                             unsigned int ext_ref)
4825 {
4826         struct extent_buffer *node;
4827         struct btrfs_inode_item *ii;
4828         struct btrfs_key key;
4829         u64 inode_id;
4830         u32 mode;
4831         u64 nlink;
4832         u64 nbytes;
4833         u64 isize;
4834         u64 size = 0;
4835         u64 refs = 0;
4836         u64 extent_end = 0;
4837         u64 extent_size = 0;
4838         unsigned int dir;
4839         unsigned int nodatasum;
4840         int slot;
4841         int ret;
4842         int err = 0;
4843
4844         node = path->nodes[0];
4845         slot = path->slots[0];
4846
4847         btrfs_item_key_to_cpu(node, &key, slot);
4848         inode_id = key.objectid;
4849
4850         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4851                 ret = btrfs_next_item(root, path);
4852                 if (ret > 0)
4853                         err |= LAST_ITEM;
4854                 return err;
4855         }
4856
4857         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4858         isize = btrfs_inode_size(node, ii);
4859         nbytes = btrfs_inode_nbytes(node, ii);
4860         mode = btrfs_inode_mode(node, ii);
4861         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4862         nlink = btrfs_inode_nlink(node, ii);
4863         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4864
4865         while (1) {
4866                 ret = btrfs_next_item(root, path);
4867                 if (ret < 0) {
4868                         /* out will fill 'err' rusing current statistics */
4869                         goto out;
4870                 } else if (ret > 0) {
4871                         err |= LAST_ITEM;
4872                         goto out;
4873                 }
4874
4875                 node = path->nodes[0];
4876                 slot = path->slots[0];
4877                 btrfs_item_key_to_cpu(node, &key, slot);
4878                 if (key.objectid != inode_id)
4879                         goto out;
4880
4881                 switch (key.type) {
4882                 case BTRFS_INODE_REF_KEY:
4883                         ret = check_inode_ref(root, &key, node, slot, &refs,
4884                                               mode);
4885                         err |= ret;
4886                         break;
4887                 case BTRFS_INODE_EXTREF_KEY:
4888                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4889                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4890                                         root->objectid, key.objectid,
4891                                         key.offset);
4892                         ret = check_inode_extref(root, &key, node, slot, &refs,
4893                                                  mode);
4894                         err |= ret;
4895                         break;
4896                 case BTRFS_DIR_ITEM_KEY:
4897                 case BTRFS_DIR_INDEX_KEY:
4898                         if (!dir) {
4899                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4900                                         root->objectid, inode_id,
4901                                         imode_to_type(mode), key.objectid,
4902                                         key.offset);
4903                         }
4904                         ret = check_dir_item(root, &key, node, slot, &size,
4905                                              ext_ref);
4906                         err |= ret;
4907                         break;
4908                 case BTRFS_EXTENT_DATA_KEY:
4909                         if (dir) {
4910                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4911                                         root->objectid, inode_id, key.objectid,
4912                                         key.offset);
4913                         }
4914                         ret = check_file_extent(root, &key, node, slot,
4915                                                 nodatasum, &extent_size,
4916                                                 &extent_end);
4917                         err |= ret;
4918                         break;
4919                 case BTRFS_XATTR_ITEM_KEY:
4920                         break;
4921                 default:
4922                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4923                               key.objectid, key.type, key.offset);
4924                 }
4925         }
4926
4927 out:
4928         /* verify INODE_ITEM nlink/isize/nbytes */
4929         if (dir) {
4930                 if (nlink != 1) {
4931                         err |= LINK_COUNT_ERROR;
4932                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4933                               root->objectid, inode_id, nlink);
4934                 }
4935
4936                 /*
4937                  * Just a warning, as dir inode nbytes is just an
4938                  * instructive value.
4939                  */
4940                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4941                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4942                                 root->objectid, inode_id, root->nodesize);
4943                 }
4944
4945                 if (isize != size) {
4946                         err |= ISIZE_ERROR;
4947                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4948                               root->objectid, inode_id, isize, size);
4949                 }
4950         } else {
4951                 if (nlink != refs) {
4952                         err |= LINK_COUNT_ERROR;
4953                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4954                               root->objectid, inode_id, nlink, refs);
4955                 } else if (!nlink) {
4956                         err |= ORPHAN_ITEM;
4957                 }
4958
4959                 if (!nbytes && !no_holes && extent_end < isize) {
4960                         err |= NBYTES_ERROR;
4961                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4962                               root->objectid, inode_id, isize);
4963                 }
4964
4965                 if (nbytes != extent_size) {
4966                         err |= NBYTES_ERROR;
4967                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4968                               root->objectid, inode_id, nbytes, extent_size);
4969                 }
4970         }
4971
4972         return err;
4973 }
4974
4975 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4976 {
4977         struct btrfs_path path;
4978         struct btrfs_key key;
4979         int err = 0;
4980         int ret;
4981
4982         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4983         key.type = BTRFS_INODE_ITEM_KEY;
4984         key.offset = 0;
4985
4986         /* For root being dropped, we don't need to check first inode */
4987         if (btrfs_root_refs(&root->root_item) == 0 &&
4988             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4989             key.objectid)
4990                 return 0;
4991
4992         btrfs_init_path(&path);
4993
4994         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4995         if (ret < 0)
4996                 goto out;
4997         if (ret > 0) {
4998                 ret = 0;
4999                 err |= INODE_ITEM_MISSING;
5000                 error("first inode item of root %llu is missing",
5001                       root->objectid);
5002         }
5003
5004         err |= check_inode_item(root, &path, ext_ref);
5005         err &= ~LAST_ITEM;
5006         if (err && !ret)
5007                 ret = -EIO;
5008 out:
5009         btrfs_release_path(&path);
5010         return ret;
5011 }
5012
5013 /*
5014  * Iterate all item on the tree and call check_inode_item() to check.
5015  *
5016  * @root:       the root of the tree to be checked.
5017  * @ext_ref:    the EXTENDED_IREF feature
5018  *
5019  * Return 0 if no error found.
5020  * Return <0 for error.
5021  */
5022 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5023 {
5024         struct btrfs_path path;
5025         struct node_refs nrefs;
5026         struct btrfs_root_item *root_item = &root->root_item;
5027         int ret, wret;
5028         int level;
5029
5030         /*
5031          * We need to manually check the first inode item(256)
5032          * As the following traversal function will only start from
5033          * the first inode item in the leaf, if inode item(256) is missing
5034          * we will just skip it forever.
5035          */
5036         ret = check_fs_first_inode(root, ext_ref);
5037         if (ret < 0)
5038                 return ret;
5039
5040         memset(&nrefs, 0, sizeof(nrefs));
5041         level = btrfs_header_level(root->node);
5042         btrfs_init_path(&path);
5043
5044         if (btrfs_root_refs(root_item) > 0 ||
5045             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5046                 path.nodes[level] = root->node;
5047                 path.slots[level] = 0;
5048                 extent_buffer_get(root->node);
5049         } else {
5050                 struct btrfs_key key;
5051
5052                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5053                 level = root_item->drop_level;
5054                 path.lowest_level = level;
5055                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5056                 if (ret < 0)
5057                         goto out;
5058                 ret = 0;
5059         }
5060
5061         while (1) {
5062                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5063                 if (wret < 0)
5064                         ret = wret;
5065                 if (wret != 0)
5066                         break;
5067
5068                 wret = walk_up_tree_v2(root, &path, &level);
5069                 if (wret < 0)
5070                         ret = wret;
5071                 if (wret != 0)
5072                         break;
5073         }
5074
5075 out:
5076         btrfs_release_path(&path);
5077         return ret;
5078 }
5079
5080 /*
5081  * Find the relative ref for root_ref and root_backref.
5082  *
5083  * @root:       the root of the root tree.
5084  * @ref_key:    the key of the root ref.
5085  *
5086  * Return 0 if no error occurred.
5087  */
5088 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5089                           struct extent_buffer *node, int slot)
5090 {
5091         struct btrfs_path path;
5092         struct btrfs_key key;
5093         struct btrfs_root_ref *ref;
5094         struct btrfs_root_ref *backref;
5095         char ref_name[BTRFS_NAME_LEN] = {0};
5096         char backref_name[BTRFS_NAME_LEN] = {0};
5097         u64 ref_dirid;
5098         u64 ref_seq;
5099         u32 ref_namelen;
5100         u64 backref_dirid;
5101         u64 backref_seq;
5102         u32 backref_namelen;
5103         u32 len;
5104         int ret;
5105         int err = 0;
5106
5107         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5108         ref_dirid = btrfs_root_ref_dirid(node, ref);
5109         ref_seq = btrfs_root_ref_sequence(node, ref);
5110         ref_namelen = btrfs_root_ref_name_len(node, ref);
5111
5112         if (ref_namelen <= BTRFS_NAME_LEN) {
5113                 len = ref_namelen;
5114         } else {
5115                 len = BTRFS_NAME_LEN;
5116                 warning("%s[%llu %llu] ref_name too long",
5117                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5118                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5119                         ref_key->offset);
5120         }
5121         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5122
5123         /* Find relative root_ref */
5124         key.objectid = ref_key->offset;
5125         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5126         key.offset = ref_key->objectid;
5127
5128         btrfs_init_path(&path);
5129         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5130         if (ret) {
5131                 err |= ROOT_REF_MISSING;
5132                 error("%s[%llu %llu] couldn't find relative ref",
5133                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5134                       "ROOT_REF" : "ROOT_BACKREF",
5135                       ref_key->objectid, ref_key->offset);
5136                 goto out;
5137         }
5138
5139         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5140                                  struct btrfs_root_ref);
5141         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5142         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5143         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5144
5145         if (backref_namelen <= BTRFS_NAME_LEN) {
5146                 len = backref_namelen;
5147         } else {
5148                 len = BTRFS_NAME_LEN;
5149                 warning("%s[%llu %llu] ref_name too long",
5150                         key.type == BTRFS_ROOT_REF_KEY ?
5151                         "ROOT_REF" : "ROOT_BACKREF",
5152                         key.objectid, key.offset);
5153         }
5154         read_extent_buffer(path.nodes[0], backref_name,
5155                            (unsigned long)(backref + 1), len);
5156
5157         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5158             ref_namelen != backref_namelen ||
5159             strncmp(ref_name, backref_name, len)) {
5160                 err |= ROOT_REF_MISMATCH;
5161                 error("%s[%llu %llu] mismatch relative ref",
5162                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5163                       "ROOT_REF" : "ROOT_BACKREF",
5164                       ref_key->objectid, ref_key->offset);
5165         }
5166 out:
5167         btrfs_release_path(&path);
5168         return err;
5169 }
5170
5171 /*
5172  * Check all fs/file tree in low_memory mode.
5173  *
5174  * 1. for fs tree root item, call check_fs_root_v2()
5175  * 2. for fs tree root ref/backref, call check_root_ref()
5176  *
5177  * Return 0 if no error occurred.
5178  */
5179 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5180 {
5181         struct btrfs_root *tree_root = fs_info->tree_root;
5182         struct btrfs_root *cur_root = NULL;
5183         struct btrfs_path path;
5184         struct btrfs_key key;
5185         struct extent_buffer *node;
5186         unsigned int ext_ref;
5187         int slot;
5188         int ret;
5189         int err = 0;
5190
5191         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5192
5193         btrfs_init_path(&path);
5194         key.objectid = BTRFS_FS_TREE_OBJECTID;
5195         key.offset = 0;
5196         key.type = BTRFS_ROOT_ITEM_KEY;
5197
5198         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5199         if (ret < 0) {
5200                 err = ret;
5201                 goto out;
5202         } else if (ret > 0) {
5203                 err = -ENOENT;
5204                 goto out;
5205         }
5206
5207         while (1) {
5208                 node = path.nodes[0];
5209                 slot = path.slots[0];
5210                 btrfs_item_key_to_cpu(node, &key, slot);
5211                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5212                         goto out;
5213                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5214                     fs_root_objectid(key.objectid)) {
5215                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5216                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5217                                                                        &key);
5218                         } else {
5219                                 key.offset = (u64)-1;
5220                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5221                         }
5222
5223                         if (IS_ERR(cur_root)) {
5224                                 error("Fail to read fs/subvol tree: %lld",
5225                                       key.objectid);
5226                                 err = -EIO;
5227                                 goto next;
5228                         }
5229
5230                         ret = check_fs_root_v2(cur_root, ext_ref);
5231                         err |= ret;
5232
5233                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5234                                 btrfs_free_fs_root(cur_root);
5235                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5236                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5237                         ret = check_root_ref(tree_root, &key, node, slot);
5238                         err |= ret;
5239                 }
5240 next:
5241                 ret = btrfs_next_item(tree_root, &path);
5242                 if (ret > 0)
5243                         goto out;
5244                 if (ret < 0) {
5245                         err = ret;
5246                         goto out;
5247                 }
5248         }
5249
5250 out:
5251         btrfs_release_path(&path);
5252         return err;
5253 }
5254
5255 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5256 {
5257         struct list_head *cur = rec->backrefs.next;
5258         struct extent_backref *back;
5259         struct tree_backref *tback;
5260         struct data_backref *dback;
5261         u64 found = 0;
5262         int err = 0;
5263
5264         while(cur != &rec->backrefs) {
5265                 back = to_extent_backref(cur);
5266                 cur = cur->next;
5267                 if (!back->found_extent_tree) {
5268                         err = 1;
5269                         if (!print_errs)
5270                                 goto out;
5271                         if (back->is_data) {
5272                                 dback = to_data_backref(back);
5273                                 fprintf(stderr, "Backref %llu %s %llu"
5274                                         " owner %llu offset %llu num_refs %lu"
5275                                         " not found in extent tree\n",
5276                                         (unsigned long long)rec->start,
5277                                         back->full_backref ?
5278                                         "parent" : "root",
5279                                         back->full_backref ?
5280                                         (unsigned long long)dback->parent:
5281                                         (unsigned long long)dback->root,
5282                                         (unsigned long long)dback->owner,
5283                                         (unsigned long long)dback->offset,
5284                                         (unsigned long)dback->num_refs);
5285                         } else {
5286                                 tback = to_tree_backref(back);
5287                                 fprintf(stderr, "Backref %llu parent %llu"
5288                                         " root %llu not found in extent tree\n",
5289                                         (unsigned long long)rec->start,
5290                                         (unsigned long long)tback->parent,
5291                                         (unsigned long long)tback->root);
5292                         }
5293                 }
5294                 if (!back->is_data && !back->found_ref) {
5295                         err = 1;
5296                         if (!print_errs)
5297                                 goto out;
5298                         tback = to_tree_backref(back);
5299                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5300                                 (unsigned long long)rec->start,
5301                                 back->full_backref ? "parent" : "root",
5302                                 back->full_backref ?
5303                                 (unsigned long long)tback->parent :
5304                                 (unsigned long long)tback->root, back);
5305                 }
5306                 if (back->is_data) {
5307                         dback = to_data_backref(back);
5308                         if (dback->found_ref != dback->num_refs) {
5309                                 err = 1;
5310                                 if (!print_errs)
5311                                         goto out;
5312                                 fprintf(stderr, "Incorrect local backref count"
5313                                         " on %llu %s %llu owner %llu"
5314                                         " offset %llu found %u wanted %u back %p\n",
5315                                         (unsigned long long)rec->start,
5316                                         back->full_backref ?
5317                                         "parent" : "root",
5318                                         back->full_backref ?
5319                                         (unsigned long long)dback->parent:
5320                                         (unsigned long long)dback->root,
5321                                         (unsigned long long)dback->owner,
5322                                         (unsigned long long)dback->offset,
5323                                         dback->found_ref, dback->num_refs, back);
5324                         }
5325                         if (dback->disk_bytenr != rec->start) {
5326                                 err = 1;
5327                                 if (!print_errs)
5328                                         goto out;
5329                                 fprintf(stderr, "Backref disk bytenr does not"
5330                                         " match extent record, bytenr=%llu, "
5331                                         "ref bytenr=%llu\n",
5332                                         (unsigned long long)rec->start,
5333                                         (unsigned long long)dback->disk_bytenr);
5334                         }
5335
5336                         if (dback->bytes != rec->nr) {
5337                                 err = 1;
5338                                 if (!print_errs)
5339                                         goto out;
5340                                 fprintf(stderr, "Backref bytes do not match "
5341                                         "extent backref, bytenr=%llu, ref "
5342                                         "bytes=%llu, backref bytes=%llu\n",
5343                                         (unsigned long long)rec->start,
5344                                         (unsigned long long)rec->nr,
5345                                         (unsigned long long)dback->bytes);
5346                         }
5347                 }
5348                 if (!back->is_data) {
5349                         found += 1;
5350                 } else {
5351                         dback = to_data_backref(back);
5352                         found += dback->found_ref;
5353                 }
5354         }
5355         if (found != rec->refs) {
5356                 err = 1;
5357                 if (!print_errs)
5358                         goto out;
5359                 fprintf(stderr, "Incorrect global backref count "
5360                         "on %llu found %llu wanted %llu\n",
5361                         (unsigned long long)rec->start,
5362                         (unsigned long long)found,
5363                         (unsigned long long)rec->refs);
5364         }
5365 out:
5366         return err;
5367 }
5368
5369 static int free_all_extent_backrefs(struct extent_record *rec)
5370 {
5371         struct extent_backref *back;
5372         struct list_head *cur;
5373         while (!list_empty(&rec->backrefs)) {
5374                 cur = rec->backrefs.next;
5375                 back = to_extent_backref(cur);
5376                 list_del(cur);
5377                 free(back);
5378         }
5379         return 0;
5380 }
5381
5382 static void free_extent_record_cache(struct cache_tree *extent_cache)
5383 {
5384         struct cache_extent *cache;
5385         struct extent_record *rec;
5386
5387         while (1) {
5388                 cache = first_cache_extent(extent_cache);
5389                 if (!cache)
5390                         break;
5391                 rec = container_of(cache, struct extent_record, cache);
5392                 remove_cache_extent(extent_cache, cache);
5393                 free_all_extent_backrefs(rec);
5394                 free(rec);
5395         }
5396 }
5397
5398 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5399                                  struct extent_record *rec)
5400 {
5401         if (rec->content_checked && rec->owner_ref_checked &&
5402             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5403             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5404             !rec->bad_full_backref && !rec->crossing_stripes &&
5405             !rec->wrong_chunk_type) {
5406                 remove_cache_extent(extent_cache, &rec->cache);
5407                 free_all_extent_backrefs(rec);
5408                 list_del_init(&rec->list);
5409                 free(rec);
5410         }
5411         return 0;
5412 }
5413
5414 static int check_owner_ref(struct btrfs_root *root,
5415                             struct extent_record *rec,
5416                             struct extent_buffer *buf)
5417 {
5418         struct extent_backref *node;
5419         struct tree_backref *back;
5420         struct btrfs_root *ref_root;
5421         struct btrfs_key key;
5422         struct btrfs_path path;
5423         struct extent_buffer *parent;
5424         int level;
5425         int found = 0;
5426         int ret;
5427
5428         list_for_each_entry(node, &rec->backrefs, list) {
5429                 if (node->is_data)
5430                         continue;
5431                 if (!node->found_ref)
5432                         continue;
5433                 if (node->full_backref)
5434                         continue;
5435                 back = to_tree_backref(node);
5436                 if (btrfs_header_owner(buf) == back->root)
5437                         return 0;
5438         }
5439         BUG_ON(rec->is_root);
5440
5441         /* try to find the block by search corresponding fs tree */
5442         key.objectid = btrfs_header_owner(buf);
5443         key.type = BTRFS_ROOT_ITEM_KEY;
5444         key.offset = (u64)-1;
5445
5446         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5447         if (IS_ERR(ref_root))
5448                 return 1;
5449
5450         level = btrfs_header_level(buf);
5451         if (level == 0)
5452                 btrfs_item_key_to_cpu(buf, &key, 0);
5453         else
5454                 btrfs_node_key_to_cpu(buf, &key, 0);
5455
5456         btrfs_init_path(&path);
5457         path.lowest_level = level + 1;
5458         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5459         if (ret < 0)
5460                 return 0;
5461
5462         parent = path.nodes[level + 1];
5463         if (parent && buf->start == btrfs_node_blockptr(parent,
5464                                                         path.slots[level + 1]))
5465                 found = 1;
5466
5467         btrfs_release_path(&path);
5468         return found ? 0 : 1;
5469 }
5470
5471 static int is_extent_tree_record(struct extent_record *rec)
5472 {
5473         struct list_head *cur = rec->backrefs.next;
5474         struct extent_backref *node;
5475         struct tree_backref *back;
5476         int is_extent = 0;
5477
5478         while(cur != &rec->backrefs) {
5479                 node = to_extent_backref(cur);
5480                 cur = cur->next;
5481                 if (node->is_data)
5482                         return 0;
5483                 back = to_tree_backref(node);
5484                 if (node->full_backref)
5485                         return 0;
5486                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5487                         is_extent = 1;
5488         }
5489         return is_extent;
5490 }
5491
5492
5493 static int record_bad_block_io(struct btrfs_fs_info *info,
5494                                struct cache_tree *extent_cache,
5495                                u64 start, u64 len)
5496 {
5497         struct extent_record *rec;
5498         struct cache_extent *cache;
5499         struct btrfs_key key;
5500
5501         cache = lookup_cache_extent(extent_cache, start, len);
5502         if (!cache)
5503                 return 0;
5504
5505         rec = container_of(cache, struct extent_record, cache);
5506         if (!is_extent_tree_record(rec))
5507                 return 0;
5508
5509         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5510         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5511 }
5512
5513 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5514                        struct extent_buffer *buf, int slot)
5515 {
5516         if (btrfs_header_level(buf)) {
5517                 struct btrfs_key_ptr ptr1, ptr2;
5518
5519                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5520                                    sizeof(struct btrfs_key_ptr));
5521                 read_extent_buffer(buf, &ptr2,
5522                                    btrfs_node_key_ptr_offset(slot + 1),
5523                                    sizeof(struct btrfs_key_ptr));
5524                 write_extent_buffer(buf, &ptr1,
5525                                     btrfs_node_key_ptr_offset(slot + 1),
5526                                     sizeof(struct btrfs_key_ptr));
5527                 write_extent_buffer(buf, &ptr2,
5528                                     btrfs_node_key_ptr_offset(slot),
5529                                     sizeof(struct btrfs_key_ptr));
5530                 if (slot == 0) {
5531                         struct btrfs_disk_key key;
5532                         btrfs_node_key(buf, &key, 0);
5533                         btrfs_fixup_low_keys(root, path, &key,
5534                                              btrfs_header_level(buf) + 1);
5535                 }
5536         } else {
5537                 struct btrfs_item *item1, *item2;
5538                 struct btrfs_key k1, k2;
5539                 char *item1_data, *item2_data;
5540                 u32 item1_offset, item2_offset, item1_size, item2_size;
5541
5542                 item1 = btrfs_item_nr(slot);
5543                 item2 = btrfs_item_nr(slot + 1);
5544                 btrfs_item_key_to_cpu(buf, &k1, slot);
5545                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5546                 item1_offset = btrfs_item_offset(buf, item1);
5547                 item2_offset = btrfs_item_offset(buf, item2);
5548                 item1_size = btrfs_item_size(buf, item1);
5549                 item2_size = btrfs_item_size(buf, item2);
5550
5551                 item1_data = malloc(item1_size);
5552                 if (!item1_data)
5553                         return -ENOMEM;
5554                 item2_data = malloc(item2_size);
5555                 if (!item2_data) {
5556                         free(item1_data);
5557                         return -ENOMEM;
5558                 }
5559
5560                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5561                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5562
5563                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5564                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5565                 free(item1_data);
5566                 free(item2_data);
5567
5568                 btrfs_set_item_offset(buf, item1, item2_offset);
5569                 btrfs_set_item_offset(buf, item2, item1_offset);
5570                 btrfs_set_item_size(buf, item1, item2_size);
5571                 btrfs_set_item_size(buf, item2, item1_size);
5572
5573                 path->slots[0] = slot;
5574                 btrfs_set_item_key_unsafe(root, path, &k2);
5575                 path->slots[0] = slot + 1;
5576                 btrfs_set_item_key_unsafe(root, path, &k1);
5577         }
5578         return 0;
5579 }
5580
5581 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5582 {
5583         struct extent_buffer *buf;
5584         struct btrfs_key k1, k2;
5585         int i;
5586         int level = path->lowest_level;
5587         int ret = -EIO;
5588
5589         buf = path->nodes[level];
5590         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5591                 if (level) {
5592                         btrfs_node_key_to_cpu(buf, &k1, i);
5593                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5594                 } else {
5595                         btrfs_item_key_to_cpu(buf, &k1, i);
5596                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5597                 }
5598                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5599                         continue;
5600                 ret = swap_values(root, path, buf, i);
5601                 if (ret)
5602                         break;
5603                 btrfs_mark_buffer_dirty(buf);
5604                 i = 0;
5605         }
5606         return ret;
5607 }
5608
5609 static int delete_bogus_item(struct btrfs_root *root,
5610                              struct btrfs_path *path,
5611                              struct extent_buffer *buf, int slot)
5612 {
5613         struct btrfs_key key;
5614         int nritems = btrfs_header_nritems(buf);
5615
5616         btrfs_item_key_to_cpu(buf, &key, slot);
5617
5618         /* These are all the keys we can deal with missing. */
5619         if (key.type != BTRFS_DIR_INDEX_KEY &&
5620             key.type != BTRFS_EXTENT_ITEM_KEY &&
5621             key.type != BTRFS_METADATA_ITEM_KEY &&
5622             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5623             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5624                 return -1;
5625
5626         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5627                (unsigned long long)key.objectid, key.type,
5628                (unsigned long long)key.offset, slot, buf->start);
5629         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5630                               btrfs_item_nr_offset(slot + 1),
5631                               sizeof(struct btrfs_item) *
5632                               (nritems - slot - 1));
5633         btrfs_set_header_nritems(buf, nritems - 1);
5634         if (slot == 0) {
5635                 struct btrfs_disk_key disk_key;
5636
5637                 btrfs_item_key(buf, &disk_key, 0);
5638                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5639         }
5640         btrfs_mark_buffer_dirty(buf);
5641         return 0;
5642 }
5643
5644 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5645 {
5646         struct extent_buffer *buf;
5647         int i;
5648         int ret = 0;
5649
5650         /* We should only get this for leaves */
5651         BUG_ON(path->lowest_level);
5652         buf = path->nodes[0];
5653 again:
5654         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5655                 unsigned int shift = 0, offset;
5656
5657                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5658                     BTRFS_LEAF_DATA_SIZE(root)) {
5659                         if (btrfs_item_end_nr(buf, i) >
5660                             BTRFS_LEAF_DATA_SIZE(root)) {
5661                                 ret = delete_bogus_item(root, path, buf, i);
5662                                 if (!ret)
5663                                         goto again;
5664                                 fprintf(stderr, "item is off the end of the "
5665                                         "leaf, can't fix\n");
5666                                 ret = -EIO;
5667                                 break;
5668                         }
5669                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5670                                 btrfs_item_end_nr(buf, i);
5671                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5672                            btrfs_item_offset_nr(buf, i - 1)) {
5673                         if (btrfs_item_end_nr(buf, i) >
5674                             btrfs_item_offset_nr(buf, i - 1)) {
5675                                 ret = delete_bogus_item(root, path, buf, i);
5676                                 if (!ret)
5677                                         goto again;
5678                                 fprintf(stderr, "items overlap, can't fix\n");
5679                                 ret = -EIO;
5680                                 break;
5681                         }
5682                         shift = btrfs_item_offset_nr(buf, i - 1) -
5683                                 btrfs_item_end_nr(buf, i);
5684                 }
5685                 if (!shift)
5686                         continue;
5687
5688                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5689                        i, shift, (unsigned long long)buf->start);
5690                 offset = btrfs_item_offset_nr(buf, i);
5691                 memmove_extent_buffer(buf,
5692                                       btrfs_leaf_data(buf) + offset + shift,
5693                                       btrfs_leaf_data(buf) + offset,
5694                                       btrfs_item_size_nr(buf, i));
5695                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5696                                       offset + shift);
5697                 btrfs_mark_buffer_dirty(buf);
5698         }
5699
5700         /*
5701          * We may have moved things, in which case we want to exit so we don't
5702          * write those changes out.  Once we have proper abort functionality in
5703          * progs this can be changed to something nicer.
5704          */
5705         BUG_ON(ret);
5706         return ret;
5707 }
5708
5709 /*
5710  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5711  * then just return -EIO.
5712  */
5713 static int try_to_fix_bad_block(struct btrfs_root *root,
5714                                 struct extent_buffer *buf,
5715                                 enum btrfs_tree_block_status status)
5716 {
5717         struct btrfs_trans_handle *trans;
5718         struct ulist *roots;
5719         struct ulist_node *node;
5720         struct btrfs_root *search_root;
5721         struct btrfs_path path;
5722         struct ulist_iterator iter;
5723         struct btrfs_key root_key, key;
5724         int ret;
5725
5726         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5727             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5728                 return -EIO;
5729
5730         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5731         if (ret)
5732                 return -EIO;
5733
5734         btrfs_init_path(&path);
5735         ULIST_ITER_INIT(&iter);
5736         while ((node = ulist_next(roots, &iter))) {
5737                 root_key.objectid = node->val;
5738                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5739                 root_key.offset = (u64)-1;
5740
5741                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5742                 if (IS_ERR(root)) {
5743                         ret = -EIO;
5744                         break;
5745                 }
5746
5747
5748                 trans = btrfs_start_transaction(search_root, 0);
5749                 if (IS_ERR(trans)) {
5750                         ret = PTR_ERR(trans);
5751                         break;
5752                 }
5753
5754                 path.lowest_level = btrfs_header_level(buf);
5755                 path.skip_check_block = 1;
5756                 if (path.lowest_level)
5757                         btrfs_node_key_to_cpu(buf, &key, 0);
5758                 else
5759                         btrfs_item_key_to_cpu(buf, &key, 0);
5760                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5761                 if (ret) {
5762                         ret = -EIO;
5763                         btrfs_commit_transaction(trans, search_root);
5764                         break;
5765                 }
5766                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5767                         ret = fix_key_order(search_root, &path);
5768                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5769                         ret = fix_item_offset(search_root, &path);
5770                 if (ret) {
5771                         btrfs_commit_transaction(trans, search_root);
5772                         break;
5773                 }
5774                 btrfs_release_path(&path);
5775                 btrfs_commit_transaction(trans, search_root);
5776         }
5777         ulist_free(roots);
5778         btrfs_release_path(&path);
5779         return ret;
5780 }
5781
5782 static int check_block(struct btrfs_root *root,
5783                        struct cache_tree *extent_cache,
5784                        struct extent_buffer *buf, u64 flags)
5785 {
5786         struct extent_record *rec;
5787         struct cache_extent *cache;
5788         struct btrfs_key key;
5789         enum btrfs_tree_block_status status;
5790         int ret = 0;
5791         int level;
5792
5793         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5794         if (!cache)
5795                 return 1;
5796         rec = container_of(cache, struct extent_record, cache);
5797         rec->generation = btrfs_header_generation(buf);
5798
5799         level = btrfs_header_level(buf);
5800         if (btrfs_header_nritems(buf) > 0) {
5801
5802                 if (level == 0)
5803                         btrfs_item_key_to_cpu(buf, &key, 0);
5804                 else
5805                         btrfs_node_key_to_cpu(buf, &key, 0);
5806
5807                 rec->info_objectid = key.objectid;
5808         }
5809         rec->info_level = level;
5810
5811         if (btrfs_is_leaf(buf))
5812                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5813         else
5814                 status = btrfs_check_node(root, &rec->parent_key, buf);
5815
5816         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5817                 if (repair)
5818                         status = try_to_fix_bad_block(root, buf, status);
5819                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5820                         ret = -EIO;
5821                         fprintf(stderr, "bad block %llu\n",
5822                                 (unsigned long long)buf->start);
5823                 } else {
5824                         /*
5825                          * Signal to callers we need to start the scan over
5826                          * again since we'll have cowed blocks.
5827                          */
5828                         ret = -EAGAIN;
5829                 }
5830         } else {
5831                 rec->content_checked = 1;
5832                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5833                         rec->owner_ref_checked = 1;
5834                 else {
5835                         ret = check_owner_ref(root, rec, buf);
5836                         if (!ret)
5837                                 rec->owner_ref_checked = 1;
5838                 }
5839         }
5840         if (!ret)
5841                 maybe_free_extent_rec(extent_cache, rec);
5842         return ret;
5843 }
5844
5845 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5846                                                 u64 parent, u64 root)
5847 {
5848         struct list_head *cur = rec->backrefs.next;
5849         struct extent_backref *node;
5850         struct tree_backref *back;
5851
5852         while(cur != &rec->backrefs) {
5853                 node = to_extent_backref(cur);
5854                 cur = cur->next;
5855                 if (node->is_data)
5856                         continue;
5857                 back = to_tree_backref(node);
5858                 if (parent > 0) {
5859                         if (!node->full_backref)
5860                                 continue;
5861                         if (parent == back->parent)
5862                                 return back;
5863                 } else {
5864                         if (node->full_backref)
5865                                 continue;
5866                         if (back->root == root)
5867                                 return back;
5868                 }
5869         }
5870         return NULL;
5871 }
5872
5873 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5874                                                 u64 parent, u64 root)
5875 {
5876         struct tree_backref *ref = malloc(sizeof(*ref));
5877
5878         if (!ref)
5879                 return NULL;
5880         memset(&ref->node, 0, sizeof(ref->node));
5881         if (parent > 0) {
5882                 ref->parent = parent;
5883                 ref->node.full_backref = 1;
5884         } else {
5885                 ref->root = root;
5886                 ref->node.full_backref = 0;
5887         }
5888         list_add_tail(&ref->node.list, &rec->backrefs);
5889
5890         return ref;
5891 }
5892
5893 static struct data_backref *find_data_backref(struct extent_record *rec,
5894                                                 u64 parent, u64 root,
5895                                                 u64 owner, u64 offset,
5896                                                 int found_ref,
5897                                                 u64 disk_bytenr, u64 bytes)
5898 {
5899         struct list_head *cur = rec->backrefs.next;
5900         struct extent_backref *node;
5901         struct data_backref *back;
5902
5903         while(cur != &rec->backrefs) {
5904                 node = to_extent_backref(cur);
5905                 cur = cur->next;
5906                 if (!node->is_data)
5907                         continue;
5908                 back = to_data_backref(node);
5909                 if (parent > 0) {
5910                         if (!node->full_backref)
5911                                 continue;
5912                         if (parent == back->parent)
5913                                 return back;
5914                 } else {
5915                         if (node->full_backref)
5916                                 continue;
5917                         if (back->root == root && back->owner == owner &&
5918                             back->offset == offset) {
5919                                 if (found_ref && node->found_ref &&
5920                                     (back->bytes != bytes ||
5921                                     back->disk_bytenr != disk_bytenr))
5922                                         continue;
5923                                 return back;
5924                         }
5925                 }
5926         }
5927         return NULL;
5928 }
5929
5930 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5931                                                 u64 parent, u64 root,
5932                                                 u64 owner, u64 offset,
5933                                                 u64 max_size)
5934 {
5935         struct data_backref *ref = malloc(sizeof(*ref));
5936
5937         if (!ref)
5938                 return NULL;
5939         memset(&ref->node, 0, sizeof(ref->node));
5940         ref->node.is_data = 1;
5941
5942         if (parent > 0) {
5943                 ref->parent = parent;
5944                 ref->owner = 0;
5945                 ref->offset = 0;
5946                 ref->node.full_backref = 1;
5947         } else {
5948                 ref->root = root;
5949                 ref->owner = owner;
5950                 ref->offset = offset;
5951                 ref->node.full_backref = 0;
5952         }
5953         ref->bytes = max_size;
5954         ref->found_ref = 0;
5955         ref->num_refs = 0;
5956         list_add_tail(&ref->node.list, &rec->backrefs);
5957         if (max_size > rec->max_size)
5958                 rec->max_size = max_size;
5959         return ref;
5960 }
5961
5962 /* Check if the type of extent matches with its chunk */
5963 static void check_extent_type(struct extent_record *rec)
5964 {
5965         struct btrfs_block_group_cache *bg_cache;
5966
5967         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5968         if (!bg_cache)
5969                 return;
5970
5971         /* data extent, check chunk directly*/
5972         if (!rec->metadata) {
5973                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5974                         rec->wrong_chunk_type = 1;
5975                 return;
5976         }
5977
5978         /* metadata extent, check the obvious case first */
5979         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5980                                  BTRFS_BLOCK_GROUP_METADATA))) {
5981                 rec->wrong_chunk_type = 1;
5982                 return;
5983         }
5984
5985         /*
5986          * Check SYSTEM extent, as it's also marked as metadata, we can only
5987          * make sure it's a SYSTEM extent by its backref
5988          */
5989         if (!list_empty(&rec->backrefs)) {
5990                 struct extent_backref *node;
5991                 struct tree_backref *tback;
5992                 u64 bg_type;
5993
5994                 node = to_extent_backref(rec->backrefs.next);
5995                 if (node->is_data) {
5996                         /* tree block shouldn't have data backref */
5997                         rec->wrong_chunk_type = 1;
5998                         return;
5999                 }
6000                 tback = container_of(node, struct tree_backref, node);
6001
6002                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6003                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6004                 else
6005                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6006                 if (!(bg_cache->flags & bg_type))
6007                         rec->wrong_chunk_type = 1;
6008         }
6009 }
6010
6011 /*
6012  * Allocate a new extent record, fill default values from @tmpl and insert int
6013  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6014  * the cache, otherwise it fails.
6015  */
6016 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6017                 struct extent_record *tmpl)
6018 {
6019         struct extent_record *rec;
6020         int ret = 0;
6021
6022         rec = malloc(sizeof(*rec));
6023         if (!rec)
6024                 return -ENOMEM;
6025         rec->start = tmpl->start;
6026         rec->max_size = tmpl->max_size;
6027         rec->nr = max(tmpl->nr, tmpl->max_size);
6028         rec->found_rec = tmpl->found_rec;
6029         rec->content_checked = tmpl->content_checked;
6030         rec->owner_ref_checked = tmpl->owner_ref_checked;
6031         rec->num_duplicates = 0;
6032         rec->metadata = tmpl->metadata;
6033         rec->flag_block_full_backref = FLAG_UNSET;
6034         rec->bad_full_backref = 0;
6035         rec->crossing_stripes = 0;
6036         rec->wrong_chunk_type = 0;
6037         rec->is_root = tmpl->is_root;
6038         rec->refs = tmpl->refs;
6039         rec->extent_item_refs = tmpl->extent_item_refs;
6040         rec->parent_generation = tmpl->parent_generation;
6041         INIT_LIST_HEAD(&rec->backrefs);
6042         INIT_LIST_HEAD(&rec->dups);
6043         INIT_LIST_HEAD(&rec->list);
6044         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6045         rec->cache.start = tmpl->start;
6046         rec->cache.size = tmpl->nr;
6047         ret = insert_cache_extent(extent_cache, &rec->cache);
6048         if (ret) {
6049                 free(rec);
6050                 return ret;
6051         }
6052         bytes_used += rec->nr;
6053
6054         if (tmpl->metadata)
6055                 rec->crossing_stripes = check_crossing_stripes(global_info,
6056                                 rec->start, global_info->tree_root->nodesize);
6057         check_extent_type(rec);
6058         return ret;
6059 }
6060
6061 /*
6062  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6063  * some are hints:
6064  * - refs              - if found, increase refs
6065  * - is_root           - if found, set
6066  * - content_checked   - if found, set
6067  * - owner_ref_checked - if found, set
6068  *
6069  * If not found, create a new one, initialize and insert.
6070  */
6071 static int add_extent_rec(struct cache_tree *extent_cache,
6072                 struct extent_record *tmpl)
6073 {
6074         struct extent_record *rec;
6075         struct cache_extent *cache;
6076         int ret = 0;
6077         int dup = 0;
6078
6079         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6080         if (cache) {
6081                 rec = container_of(cache, struct extent_record, cache);
6082                 if (tmpl->refs)
6083                         rec->refs++;
6084                 if (rec->nr == 1)
6085                         rec->nr = max(tmpl->nr, tmpl->max_size);
6086
6087                 /*
6088                  * We need to make sure to reset nr to whatever the extent
6089                  * record says was the real size, this way we can compare it to
6090                  * the backrefs.
6091                  */
6092                 if (tmpl->found_rec) {
6093                         if (tmpl->start != rec->start || rec->found_rec) {
6094                                 struct extent_record *tmp;
6095
6096                                 dup = 1;
6097                                 if (list_empty(&rec->list))
6098                                         list_add_tail(&rec->list,
6099                                                       &duplicate_extents);
6100
6101                                 /*
6102                                  * We have to do this song and dance in case we
6103                                  * find an extent record that falls inside of
6104                                  * our current extent record but does not have
6105                                  * the same objectid.
6106                                  */
6107                                 tmp = malloc(sizeof(*tmp));
6108                                 if (!tmp)
6109                                         return -ENOMEM;
6110                                 tmp->start = tmpl->start;
6111                                 tmp->max_size = tmpl->max_size;
6112                                 tmp->nr = tmpl->nr;
6113                                 tmp->found_rec = 1;
6114                                 tmp->metadata = tmpl->metadata;
6115                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6116                                 INIT_LIST_HEAD(&tmp->list);
6117                                 list_add_tail(&tmp->list, &rec->dups);
6118                                 rec->num_duplicates++;
6119                         } else {
6120                                 rec->nr = tmpl->nr;
6121                                 rec->found_rec = 1;
6122                         }
6123                 }
6124
6125                 if (tmpl->extent_item_refs && !dup) {
6126                         if (rec->extent_item_refs) {
6127                                 fprintf(stderr, "block %llu rec "
6128                                         "extent_item_refs %llu, passed %llu\n",
6129                                         (unsigned long long)tmpl->start,
6130                                         (unsigned long long)
6131                                                         rec->extent_item_refs,
6132                                         (unsigned long long)tmpl->extent_item_refs);
6133                         }
6134                         rec->extent_item_refs = tmpl->extent_item_refs;
6135                 }
6136                 if (tmpl->is_root)
6137                         rec->is_root = 1;
6138                 if (tmpl->content_checked)
6139                         rec->content_checked = 1;
6140                 if (tmpl->owner_ref_checked)
6141                         rec->owner_ref_checked = 1;
6142                 memcpy(&rec->parent_key, &tmpl->parent_key,
6143                                 sizeof(tmpl->parent_key));
6144                 if (tmpl->parent_generation)
6145                         rec->parent_generation = tmpl->parent_generation;
6146                 if (rec->max_size < tmpl->max_size)
6147                         rec->max_size = tmpl->max_size;
6148
6149                 /*
6150                  * A metadata extent can't cross stripe_len boundary, otherwise
6151                  * kernel scrub won't be able to handle it.
6152                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6153                  * it.
6154                  */
6155                 if (tmpl->metadata)
6156                         rec->crossing_stripes = check_crossing_stripes(
6157                                         global_info, rec->start,
6158                                         global_info->tree_root->nodesize);
6159                 check_extent_type(rec);
6160                 maybe_free_extent_rec(extent_cache, rec);
6161                 return ret;
6162         }
6163
6164         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6165
6166         return ret;
6167 }
6168
6169 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6170                             u64 parent, u64 root, int found_ref)
6171 {
6172         struct extent_record *rec;
6173         struct tree_backref *back;
6174         struct cache_extent *cache;
6175         int ret;
6176
6177         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6178         if (!cache) {
6179                 struct extent_record tmpl;
6180
6181                 memset(&tmpl, 0, sizeof(tmpl));
6182                 tmpl.start = bytenr;
6183                 tmpl.nr = 1;
6184                 tmpl.metadata = 1;
6185
6186                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6187                 if (ret)
6188                         return ret;
6189
6190                 /* really a bug in cache_extent implement now */
6191                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6192                 if (!cache)
6193                         return -ENOENT;
6194         }
6195
6196         rec = container_of(cache, struct extent_record, cache);
6197         if (rec->start != bytenr) {
6198                 /*
6199                  * Several cause, from unaligned bytenr to over lapping extents
6200                  */
6201                 return -EEXIST;
6202         }
6203
6204         back = find_tree_backref(rec, parent, root);
6205         if (!back) {
6206                 back = alloc_tree_backref(rec, parent, root);
6207                 if (!back)
6208                         return -ENOMEM;
6209         }
6210
6211         if (found_ref) {
6212                 if (back->node.found_ref) {
6213                         fprintf(stderr, "Extent back ref already exists "
6214                                 "for %llu parent %llu root %llu \n",
6215                                 (unsigned long long)bytenr,
6216                                 (unsigned long long)parent,
6217                                 (unsigned long long)root);
6218                 }
6219                 back->node.found_ref = 1;
6220         } else {
6221                 if (back->node.found_extent_tree) {
6222                         fprintf(stderr, "Extent back ref already exists "
6223                                 "for %llu parent %llu root %llu \n",
6224                                 (unsigned long long)bytenr,
6225                                 (unsigned long long)parent,
6226                                 (unsigned long long)root);
6227                 }
6228                 back->node.found_extent_tree = 1;
6229         }
6230         check_extent_type(rec);
6231         maybe_free_extent_rec(extent_cache, rec);
6232         return 0;
6233 }
6234
6235 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6236                             u64 parent, u64 root, u64 owner, u64 offset,
6237                             u32 num_refs, int found_ref, u64 max_size)
6238 {
6239         struct extent_record *rec;
6240         struct data_backref *back;
6241         struct cache_extent *cache;
6242         int ret;
6243
6244         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6245         if (!cache) {
6246                 struct extent_record tmpl;
6247
6248                 memset(&tmpl, 0, sizeof(tmpl));
6249                 tmpl.start = bytenr;
6250                 tmpl.nr = 1;
6251                 tmpl.max_size = max_size;
6252
6253                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6254                 if (ret)
6255                         return ret;
6256
6257                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6258                 if (!cache)
6259                         abort();
6260         }
6261
6262         rec = container_of(cache, struct extent_record, cache);
6263         if (rec->max_size < max_size)
6264                 rec->max_size = max_size;
6265
6266         /*
6267          * If found_ref is set then max_size is the real size and must match the
6268          * existing refs.  So if we have already found a ref then we need to
6269          * make sure that this ref matches the existing one, otherwise we need
6270          * to add a new backref so we can notice that the backrefs don't match
6271          * and we need to figure out who is telling the truth.  This is to
6272          * account for that awful fsync bug I introduced where we'd end up with
6273          * a btrfs_file_extent_item that would have its length include multiple
6274          * prealloc extents or point inside of a prealloc extent.
6275          */
6276         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6277                                  bytenr, max_size);
6278         if (!back) {
6279                 back = alloc_data_backref(rec, parent, root, owner, offset,
6280                                           max_size);
6281                 BUG_ON(!back);
6282         }
6283
6284         if (found_ref) {
6285                 BUG_ON(num_refs != 1);
6286                 if (back->node.found_ref)
6287                         BUG_ON(back->bytes != max_size);
6288                 back->node.found_ref = 1;
6289                 back->found_ref += 1;
6290                 back->bytes = max_size;
6291                 back->disk_bytenr = bytenr;
6292                 rec->refs += 1;
6293                 rec->content_checked = 1;
6294                 rec->owner_ref_checked = 1;
6295         } else {
6296                 if (back->node.found_extent_tree) {
6297                         fprintf(stderr, "Extent back ref already exists "
6298                                 "for %llu parent %llu root %llu "
6299                                 "owner %llu offset %llu num_refs %lu\n",
6300                                 (unsigned long long)bytenr,
6301                                 (unsigned long long)parent,
6302                                 (unsigned long long)root,
6303                                 (unsigned long long)owner,
6304                                 (unsigned long long)offset,
6305                                 (unsigned long)num_refs);
6306                 }
6307                 back->num_refs = num_refs;
6308                 back->node.found_extent_tree = 1;
6309         }
6310         maybe_free_extent_rec(extent_cache, rec);
6311         return 0;
6312 }
6313
6314 static int add_pending(struct cache_tree *pending,
6315                        struct cache_tree *seen, u64 bytenr, u32 size)
6316 {
6317         int ret;
6318         ret = add_cache_extent(seen, bytenr, size);
6319         if (ret)
6320                 return ret;
6321         add_cache_extent(pending, bytenr, size);
6322         return 0;
6323 }
6324
6325 static int pick_next_pending(struct cache_tree *pending,
6326                         struct cache_tree *reada,
6327                         struct cache_tree *nodes,
6328                         u64 last, struct block_info *bits, int bits_nr,
6329                         int *reada_bits)
6330 {
6331         unsigned long node_start = last;
6332         struct cache_extent *cache;
6333         int ret;
6334
6335         cache = search_cache_extent(reada, 0);
6336         if (cache) {
6337                 bits[0].start = cache->start;
6338                 bits[0].size = cache->size;
6339                 *reada_bits = 1;
6340                 return 1;
6341         }
6342         *reada_bits = 0;
6343         if (node_start > 32768)
6344                 node_start -= 32768;
6345
6346         cache = search_cache_extent(nodes, node_start);
6347         if (!cache)
6348                 cache = search_cache_extent(nodes, 0);
6349
6350         if (!cache) {
6351                  cache = search_cache_extent(pending, 0);
6352                  if (!cache)
6353                          return 0;
6354                  ret = 0;
6355                  do {
6356                          bits[ret].start = cache->start;
6357                          bits[ret].size = cache->size;
6358                          cache = next_cache_extent(cache);
6359                          ret++;
6360                  } while (cache && ret < bits_nr);
6361                  return ret;
6362         }
6363
6364         ret = 0;
6365         do {
6366                 bits[ret].start = cache->start;
6367                 bits[ret].size = cache->size;
6368                 cache = next_cache_extent(cache);
6369                 ret++;
6370         } while (cache && ret < bits_nr);
6371
6372         if (bits_nr - ret > 8) {
6373                 u64 lookup = bits[0].start + bits[0].size;
6374                 struct cache_extent *next;
6375                 next = search_cache_extent(pending, lookup);
6376                 while(next) {
6377                         if (next->start - lookup > 32768)
6378                                 break;
6379                         bits[ret].start = next->start;
6380                         bits[ret].size = next->size;
6381                         lookup = next->start + next->size;
6382                         ret++;
6383                         if (ret == bits_nr)
6384                                 break;
6385                         next = next_cache_extent(next);
6386                         if (!next)
6387                                 break;
6388                 }
6389         }
6390         return ret;
6391 }
6392
6393 static void free_chunk_record(struct cache_extent *cache)
6394 {
6395         struct chunk_record *rec;
6396
6397         rec = container_of(cache, struct chunk_record, cache);
6398         list_del_init(&rec->list);
6399         list_del_init(&rec->dextents);
6400         free(rec);
6401 }
6402
6403 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6404 {
6405         cache_tree_free_extents(chunk_cache, free_chunk_record);
6406 }
6407
6408 static void free_device_record(struct rb_node *node)
6409 {
6410         struct device_record *rec;
6411
6412         rec = container_of(node, struct device_record, node);
6413         free(rec);
6414 }
6415
6416 FREE_RB_BASED_TREE(device_cache, free_device_record);
6417
6418 int insert_block_group_record(struct block_group_tree *tree,
6419                               struct block_group_record *bg_rec)
6420 {
6421         int ret;
6422
6423         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6424         if (ret)
6425                 return ret;
6426
6427         list_add_tail(&bg_rec->list, &tree->block_groups);
6428         return 0;
6429 }
6430
6431 static void free_block_group_record(struct cache_extent *cache)
6432 {
6433         struct block_group_record *rec;
6434
6435         rec = container_of(cache, struct block_group_record, cache);
6436         list_del_init(&rec->list);
6437         free(rec);
6438 }
6439
6440 void free_block_group_tree(struct block_group_tree *tree)
6441 {
6442         cache_tree_free_extents(&tree->tree, free_block_group_record);
6443 }
6444
6445 int insert_device_extent_record(struct device_extent_tree *tree,
6446                                 struct device_extent_record *de_rec)
6447 {
6448         int ret;
6449
6450         /*
6451          * Device extent is a bit different from the other extents, because
6452          * the extents which belong to the different devices may have the
6453          * same start and size, so we need use the special extent cache
6454          * search/insert functions.
6455          */
6456         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6457         if (ret)
6458                 return ret;
6459
6460         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6461         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6462         return 0;
6463 }
6464
6465 static void free_device_extent_record(struct cache_extent *cache)
6466 {
6467         struct device_extent_record *rec;
6468
6469         rec = container_of(cache, struct device_extent_record, cache);
6470         if (!list_empty(&rec->chunk_list))
6471                 list_del_init(&rec->chunk_list);
6472         if (!list_empty(&rec->device_list))
6473                 list_del_init(&rec->device_list);
6474         free(rec);
6475 }
6476
6477 void free_device_extent_tree(struct device_extent_tree *tree)
6478 {
6479         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6480 }
6481
6482 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6483 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6484                                  struct extent_buffer *leaf, int slot)
6485 {
6486         struct btrfs_extent_ref_v0 *ref0;
6487         struct btrfs_key key;
6488         int ret;
6489
6490         btrfs_item_key_to_cpu(leaf, &key, slot);
6491         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6492         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6493                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6494                                 0, 0);
6495         } else {
6496                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6497                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6498         }
6499         return ret;
6500 }
6501 #endif
6502
6503 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6504                                             struct btrfs_key *key,
6505                                             int slot)
6506 {
6507         struct btrfs_chunk *ptr;
6508         struct chunk_record *rec;
6509         int num_stripes, i;
6510
6511         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6512         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6513
6514         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6515         if (!rec) {
6516                 fprintf(stderr, "memory allocation failed\n");
6517                 exit(-1);
6518         }
6519
6520         INIT_LIST_HEAD(&rec->list);
6521         INIT_LIST_HEAD(&rec->dextents);
6522         rec->bg_rec = NULL;
6523
6524         rec->cache.start = key->offset;
6525         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6526
6527         rec->generation = btrfs_header_generation(leaf);
6528
6529         rec->objectid = key->objectid;
6530         rec->type = key->type;
6531         rec->offset = key->offset;
6532
6533         rec->length = rec->cache.size;
6534         rec->owner = btrfs_chunk_owner(leaf, ptr);
6535         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6536         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6537         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6538         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6539         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6540         rec->num_stripes = num_stripes;
6541         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6542
6543         for (i = 0; i < rec->num_stripes; ++i) {
6544                 rec->stripes[i].devid =
6545                         btrfs_stripe_devid_nr(leaf, ptr, i);
6546                 rec->stripes[i].offset =
6547                         btrfs_stripe_offset_nr(leaf, ptr, i);
6548                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6549                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6550                                 BTRFS_UUID_SIZE);
6551         }
6552
6553         return rec;
6554 }
6555
6556 static int process_chunk_item(struct cache_tree *chunk_cache,
6557                               struct btrfs_key *key, struct extent_buffer *eb,
6558                               int slot)
6559 {
6560         struct chunk_record *rec;
6561         struct btrfs_chunk *chunk;
6562         int ret = 0;
6563
6564         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6565         /*
6566          * Do extra check for this chunk item,
6567          *
6568          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6569          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6570          * and owner<->key_type check.
6571          */
6572         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6573                                       key->offset);
6574         if (ret < 0) {
6575                 error("chunk(%llu, %llu) is not valid, ignore it",
6576                       key->offset, btrfs_chunk_length(eb, chunk));
6577                 return 0;
6578         }
6579         rec = btrfs_new_chunk_record(eb, key, slot);
6580         ret = insert_cache_extent(chunk_cache, &rec->cache);
6581         if (ret) {
6582                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6583                         rec->offset, rec->length);
6584                 free(rec);
6585         }
6586
6587         return ret;
6588 }
6589
6590 static int process_device_item(struct rb_root *dev_cache,
6591                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6592 {
6593         struct btrfs_dev_item *ptr;
6594         struct device_record *rec;
6595         int ret = 0;
6596
6597         ptr = btrfs_item_ptr(eb,
6598                 slot, struct btrfs_dev_item);
6599
6600         rec = malloc(sizeof(*rec));
6601         if (!rec) {
6602                 fprintf(stderr, "memory allocation failed\n");
6603                 return -ENOMEM;
6604         }
6605
6606         rec->devid = key->offset;
6607         rec->generation = btrfs_header_generation(eb);
6608
6609         rec->objectid = key->objectid;
6610         rec->type = key->type;
6611         rec->offset = key->offset;
6612
6613         rec->devid = btrfs_device_id(eb, ptr);
6614         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6615         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6616
6617         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6618         if (ret) {
6619                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6620                 free(rec);
6621         }
6622
6623         return ret;
6624 }
6625
6626 struct block_group_record *
6627 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6628                              int slot)
6629 {
6630         struct btrfs_block_group_item *ptr;
6631         struct block_group_record *rec;
6632
6633         rec = calloc(1, sizeof(*rec));
6634         if (!rec) {
6635                 fprintf(stderr, "memory allocation failed\n");
6636                 exit(-1);
6637         }
6638
6639         rec->cache.start = key->objectid;
6640         rec->cache.size = key->offset;
6641
6642         rec->generation = btrfs_header_generation(leaf);
6643
6644         rec->objectid = key->objectid;
6645         rec->type = key->type;
6646         rec->offset = key->offset;
6647
6648         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6649         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6650
6651         INIT_LIST_HEAD(&rec->list);
6652
6653         return rec;
6654 }
6655
6656 static int process_block_group_item(struct block_group_tree *block_group_cache,
6657                                     struct btrfs_key *key,
6658                                     struct extent_buffer *eb, int slot)
6659 {
6660         struct block_group_record *rec;
6661         int ret = 0;
6662
6663         rec = btrfs_new_block_group_record(eb, key, slot);
6664         ret = insert_block_group_record(block_group_cache, rec);
6665         if (ret) {
6666                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6667                         rec->objectid, rec->offset);
6668                 free(rec);
6669         }
6670
6671         return ret;
6672 }
6673
6674 struct device_extent_record *
6675 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6676                                struct btrfs_key *key, int slot)
6677 {
6678         struct device_extent_record *rec;
6679         struct btrfs_dev_extent *ptr;
6680
6681         rec = calloc(1, sizeof(*rec));
6682         if (!rec) {
6683                 fprintf(stderr, "memory allocation failed\n");
6684                 exit(-1);
6685         }
6686
6687         rec->cache.objectid = key->objectid;
6688         rec->cache.start = key->offset;
6689
6690         rec->generation = btrfs_header_generation(leaf);
6691
6692         rec->objectid = key->objectid;
6693         rec->type = key->type;
6694         rec->offset = key->offset;
6695
6696         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6697         rec->chunk_objecteid =
6698                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6699         rec->chunk_offset =
6700                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6701         rec->length = btrfs_dev_extent_length(leaf, ptr);
6702         rec->cache.size = rec->length;
6703
6704         INIT_LIST_HEAD(&rec->chunk_list);
6705         INIT_LIST_HEAD(&rec->device_list);
6706
6707         return rec;
6708 }
6709
6710 static int
6711 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6712                            struct btrfs_key *key, struct extent_buffer *eb,
6713                            int slot)
6714 {
6715         struct device_extent_record *rec;
6716         int ret;
6717
6718         rec = btrfs_new_device_extent_record(eb, key, slot);
6719         ret = insert_device_extent_record(dev_extent_cache, rec);
6720         if (ret) {
6721                 fprintf(stderr,
6722                         "Device extent[%llu, %llu, %llu] existed.\n",
6723                         rec->objectid, rec->offset, rec->length);
6724                 free(rec);
6725         }
6726
6727         return ret;
6728 }
6729
6730 static int process_extent_item(struct btrfs_root *root,
6731                                struct cache_tree *extent_cache,
6732                                struct extent_buffer *eb, int slot)
6733 {
6734         struct btrfs_extent_item *ei;
6735         struct btrfs_extent_inline_ref *iref;
6736         struct btrfs_extent_data_ref *dref;
6737         struct btrfs_shared_data_ref *sref;
6738         struct btrfs_key key;
6739         struct extent_record tmpl;
6740         unsigned long end;
6741         unsigned long ptr;
6742         int ret;
6743         int type;
6744         u32 item_size = btrfs_item_size_nr(eb, slot);
6745         u64 refs = 0;
6746         u64 offset;
6747         u64 num_bytes;
6748         int metadata = 0;
6749
6750         btrfs_item_key_to_cpu(eb, &key, slot);
6751
6752         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6753                 metadata = 1;
6754                 num_bytes = root->nodesize;
6755         } else {
6756                 num_bytes = key.offset;
6757         }
6758
6759         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6760                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6761                       key.objectid, root->sectorsize);
6762                 return -EIO;
6763         }
6764         if (item_size < sizeof(*ei)) {
6765 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6766                 struct btrfs_extent_item_v0 *ei0;
6767                 BUG_ON(item_size != sizeof(*ei0));
6768                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6769                 refs = btrfs_extent_refs_v0(eb, ei0);
6770 #else
6771                 BUG();
6772 #endif
6773                 memset(&tmpl, 0, sizeof(tmpl));
6774                 tmpl.start = key.objectid;
6775                 tmpl.nr = num_bytes;
6776                 tmpl.extent_item_refs = refs;
6777                 tmpl.metadata = metadata;
6778                 tmpl.found_rec = 1;
6779                 tmpl.max_size = num_bytes;
6780
6781                 return add_extent_rec(extent_cache, &tmpl);
6782         }
6783
6784         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6785         refs = btrfs_extent_refs(eb, ei);
6786         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6787                 metadata = 1;
6788         else
6789                 metadata = 0;
6790         if (metadata && num_bytes != root->nodesize) {
6791                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6792                       num_bytes, root->nodesize);
6793                 return -EIO;
6794         }
6795         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6796                 error("ignore invalid data extent, length %llu is not aligned to %u",
6797                       num_bytes, root->sectorsize);
6798                 return -EIO;
6799         }
6800
6801         memset(&tmpl, 0, sizeof(tmpl));
6802         tmpl.start = key.objectid;
6803         tmpl.nr = num_bytes;
6804         tmpl.extent_item_refs = refs;
6805         tmpl.metadata = metadata;
6806         tmpl.found_rec = 1;
6807         tmpl.max_size = num_bytes;
6808         add_extent_rec(extent_cache, &tmpl);
6809
6810         ptr = (unsigned long)(ei + 1);
6811         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6812             key.type == BTRFS_EXTENT_ITEM_KEY)
6813                 ptr += sizeof(struct btrfs_tree_block_info);
6814
6815         end = (unsigned long)ei + item_size;
6816         while (ptr < end) {
6817                 iref = (struct btrfs_extent_inline_ref *)ptr;
6818                 type = btrfs_extent_inline_ref_type(eb, iref);
6819                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6820                 switch (type) {
6821                 case BTRFS_TREE_BLOCK_REF_KEY:
6822                         ret = add_tree_backref(extent_cache, key.objectid,
6823                                         0, offset, 0);
6824                         if (ret < 0)
6825                                 error("add_tree_backref failed: %s",
6826                                       strerror(-ret));
6827                         break;
6828                 case BTRFS_SHARED_BLOCK_REF_KEY:
6829                         ret = add_tree_backref(extent_cache, key.objectid,
6830                                         offset, 0, 0);
6831                         if (ret < 0)
6832                                 error("add_tree_backref failed: %s",
6833                                       strerror(-ret));
6834                         break;
6835                 case BTRFS_EXTENT_DATA_REF_KEY:
6836                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6837                         add_data_backref(extent_cache, key.objectid, 0,
6838                                         btrfs_extent_data_ref_root(eb, dref),
6839                                         btrfs_extent_data_ref_objectid(eb,
6840                                                                        dref),
6841                                         btrfs_extent_data_ref_offset(eb, dref),
6842                                         btrfs_extent_data_ref_count(eb, dref),
6843                                         0, num_bytes);
6844                         break;
6845                 case BTRFS_SHARED_DATA_REF_KEY:
6846                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6847                         add_data_backref(extent_cache, key.objectid, offset,
6848                                         0, 0, 0,
6849                                         btrfs_shared_data_ref_count(eb, sref),
6850                                         0, num_bytes);
6851                         break;
6852                 default:
6853                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6854                                 key.objectid, key.type, num_bytes);
6855                         goto out;
6856                 }
6857                 ptr += btrfs_extent_inline_ref_size(type);
6858         }
6859         WARN_ON(ptr > end);
6860 out:
6861         return 0;
6862 }
6863
6864 static int check_cache_range(struct btrfs_root *root,
6865                              struct btrfs_block_group_cache *cache,
6866                              u64 offset, u64 bytes)
6867 {
6868         struct btrfs_free_space *entry;
6869         u64 *logical;
6870         u64 bytenr;
6871         int stripe_len;
6872         int i, nr, ret;
6873
6874         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6875                 bytenr = btrfs_sb_offset(i);
6876                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6877                                        cache->key.objectid, bytenr, 0,
6878                                        &logical, &nr, &stripe_len);
6879                 if (ret)
6880                         return ret;
6881
6882                 while (nr--) {
6883                         if (logical[nr] + stripe_len <= offset)
6884                                 continue;
6885                         if (offset + bytes <= logical[nr])
6886                                 continue;
6887                         if (logical[nr] == offset) {
6888                                 if (stripe_len >= bytes) {
6889                                         free(logical);
6890                                         return 0;
6891                                 }
6892                                 bytes -= stripe_len;
6893                                 offset += stripe_len;
6894                         } else if (logical[nr] < offset) {
6895                                 if (logical[nr] + stripe_len >=
6896                                     offset + bytes) {
6897                                         free(logical);
6898                                         return 0;
6899                                 }
6900                                 bytes = (offset + bytes) -
6901                                         (logical[nr] + stripe_len);
6902                                 offset = logical[nr] + stripe_len;
6903                         } else {
6904                                 /*
6905                                  * Could be tricky, the super may land in the
6906                                  * middle of the area we're checking.  First
6907                                  * check the easiest case, it's at the end.
6908                                  */
6909                                 if (logical[nr] + stripe_len >=
6910                                     bytes + offset) {
6911                                         bytes = logical[nr] - offset;
6912                                         continue;
6913                                 }
6914
6915                                 /* Check the left side */
6916                                 ret = check_cache_range(root, cache,
6917                                                         offset,
6918                                                         logical[nr] - offset);
6919                                 if (ret) {
6920                                         free(logical);
6921                                         return ret;
6922                                 }
6923
6924                                 /* Now we continue with the right side */
6925                                 bytes = (offset + bytes) -
6926                                         (logical[nr] + stripe_len);
6927                                 offset = logical[nr] + stripe_len;
6928                         }
6929                 }
6930
6931                 free(logical);
6932         }
6933
6934         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6935         if (!entry) {
6936                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6937                         offset, offset+bytes);
6938                 return -EINVAL;
6939         }
6940
6941         if (entry->offset != offset) {
6942                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6943                         entry->offset);
6944                 return -EINVAL;
6945         }
6946
6947         if (entry->bytes != bytes) {
6948                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6949                         bytes, entry->bytes, offset);
6950                 return -EINVAL;
6951         }
6952
6953         unlink_free_space(cache->free_space_ctl, entry);
6954         free(entry);
6955         return 0;
6956 }
6957
6958 static int verify_space_cache(struct btrfs_root *root,
6959                               struct btrfs_block_group_cache *cache)
6960 {
6961         struct btrfs_path path;
6962         struct extent_buffer *leaf;
6963         struct btrfs_key key;
6964         u64 last;
6965         int ret = 0;
6966
6967         root = root->fs_info->extent_root;
6968
6969         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6970
6971         btrfs_init_path(&path);
6972         key.objectid = last;
6973         key.offset = 0;
6974         key.type = BTRFS_EXTENT_ITEM_KEY;
6975         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6976         if (ret < 0)
6977                 goto out;
6978         ret = 0;
6979         while (1) {
6980                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6981                         ret = btrfs_next_leaf(root, &path);
6982                         if (ret < 0)
6983                                 goto out;
6984                         if (ret > 0) {
6985                                 ret = 0;
6986                                 break;
6987                         }
6988                 }
6989                 leaf = path.nodes[0];
6990                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6991                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6992                         break;
6993                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6994                     key.type != BTRFS_METADATA_ITEM_KEY) {
6995                         path.slots[0]++;
6996                         continue;
6997                 }
6998
6999                 if (last == key.objectid) {
7000                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7001                                 last = key.objectid + key.offset;
7002                         else
7003                                 last = key.objectid + root->nodesize;
7004                         path.slots[0]++;
7005                         continue;
7006                 }
7007
7008                 ret = check_cache_range(root, cache, last,
7009                                         key.objectid - last);
7010                 if (ret)
7011                         break;
7012                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7013                         last = key.objectid + key.offset;
7014                 else
7015                         last = key.objectid + root->nodesize;
7016                 path.slots[0]++;
7017         }
7018
7019         if (last < cache->key.objectid + cache->key.offset)
7020                 ret = check_cache_range(root, cache, last,
7021                                         cache->key.objectid +
7022                                         cache->key.offset - last);
7023
7024 out:
7025         btrfs_release_path(&path);
7026
7027         if (!ret &&
7028             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7029                 fprintf(stderr, "There are still entries left in the space "
7030                         "cache\n");
7031                 ret = -EINVAL;
7032         }
7033
7034         return ret;
7035 }
7036
7037 static int check_space_cache(struct btrfs_root *root)
7038 {
7039         struct btrfs_block_group_cache *cache;
7040         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7041         int ret;
7042         int error = 0;
7043
7044         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7045             btrfs_super_generation(root->fs_info->super_copy) !=
7046             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7047                 printf("cache and super generation don't match, space cache "
7048                        "will be invalidated\n");
7049                 return 0;
7050         }
7051
7052         if (ctx.progress_enabled) {
7053                 ctx.tp = TASK_FREE_SPACE;
7054                 task_start(ctx.info);
7055         }
7056
7057         while (1) {
7058                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7059                 if (!cache)
7060                         break;
7061
7062                 start = cache->key.objectid + cache->key.offset;
7063                 if (!cache->free_space_ctl) {
7064                         if (btrfs_init_free_space_ctl(cache,
7065                                                       root->sectorsize)) {
7066                                 ret = -ENOMEM;
7067                                 break;
7068                         }
7069                 } else {
7070                         btrfs_remove_free_space_cache(cache);
7071                 }
7072
7073                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7074                         ret = exclude_super_stripes(root, cache);
7075                         if (ret) {
7076                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7077                                         strerror(-ret));
7078                                 error++;
7079                                 continue;
7080                         }
7081                         ret = load_free_space_tree(root->fs_info, cache);
7082                         free_excluded_extents(root, cache);
7083                         if (ret < 0) {
7084                                 fprintf(stderr, "could not load free space tree: %s\n",
7085                                         strerror(-ret));
7086                                 error++;
7087                                 continue;
7088                         }
7089                         error += ret;
7090                 } else {
7091                         ret = load_free_space_cache(root->fs_info, cache);
7092                         if (!ret)
7093                                 continue;
7094                 }
7095
7096                 ret = verify_space_cache(root, cache);
7097                 if (ret) {
7098                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7099                                 cache->key.objectid);
7100                         error++;
7101                 }
7102         }
7103
7104         task_stop(ctx.info);
7105
7106         return error ? -EINVAL : 0;
7107 }
7108
7109 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7110                         u64 num_bytes, unsigned long leaf_offset,
7111                         struct extent_buffer *eb) {
7112
7113         u64 offset = 0;
7114         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7115         char *data;
7116         unsigned long csum_offset;
7117         u32 csum;
7118         u32 csum_expected;
7119         u64 read_len;
7120         u64 data_checked = 0;
7121         u64 tmp;
7122         int ret = 0;
7123         int mirror;
7124         int num_copies;
7125
7126         if (num_bytes % root->sectorsize)
7127                 return -EINVAL;
7128
7129         data = malloc(num_bytes);
7130         if (!data)
7131                 return -ENOMEM;
7132
7133         while (offset < num_bytes) {
7134                 mirror = 0;
7135 again:
7136                 read_len = num_bytes - offset;
7137                 /* read as much space once a time */
7138                 ret = read_extent_data(root, data + offset,
7139                                 bytenr + offset, &read_len, mirror);
7140                 if (ret)
7141                         goto out;
7142                 data_checked = 0;
7143                 /* verify every 4k data's checksum */
7144                 while (data_checked < read_len) {
7145                         csum = ~(u32)0;
7146                         tmp = offset + data_checked;
7147
7148                         csum = btrfs_csum_data((char *)data + tmp,
7149                                                csum, root->sectorsize);
7150                         btrfs_csum_final(csum, (u8 *)&csum);
7151
7152                         csum_offset = leaf_offset +
7153                                  tmp / root->sectorsize * csum_size;
7154                         read_extent_buffer(eb, (char *)&csum_expected,
7155                                            csum_offset, csum_size);
7156                         /* try another mirror */
7157                         if (csum != csum_expected) {
7158                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7159                                                 mirror, bytenr + tmp,
7160                                                 csum, csum_expected);
7161                                 num_copies = btrfs_num_copies(
7162                                                 &root->fs_info->mapping_tree,
7163                                                 bytenr, num_bytes);
7164                                 if (mirror < num_copies - 1) {
7165                                         mirror += 1;
7166                                         goto again;
7167                                 }
7168                         }
7169                         data_checked += root->sectorsize;
7170                 }
7171                 offset += read_len;
7172         }
7173 out:
7174         free(data);
7175         return ret;
7176 }
7177
7178 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7179                                u64 num_bytes)
7180 {
7181         struct btrfs_path path;
7182         struct extent_buffer *leaf;
7183         struct btrfs_key key;
7184         int ret;
7185
7186         btrfs_init_path(&path);
7187         key.objectid = bytenr;
7188         key.type = BTRFS_EXTENT_ITEM_KEY;
7189         key.offset = (u64)-1;
7190
7191 again:
7192         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7193                                 0, 0);
7194         if (ret < 0) {
7195                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7196                 btrfs_release_path(&path);
7197                 return ret;
7198         } else if (ret) {
7199                 if (path.slots[0] > 0) {
7200                         path.slots[0]--;
7201                 } else {
7202                         ret = btrfs_prev_leaf(root, &path);
7203                         if (ret < 0) {
7204                                 goto out;
7205                         } else if (ret > 0) {
7206                                 ret = 0;
7207                                 goto out;
7208                         }
7209                 }
7210         }
7211
7212         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7213
7214         /*
7215          * Block group items come before extent items if they have the same
7216          * bytenr, so walk back one more just in case.  Dear future traveller,
7217          * first congrats on mastering time travel.  Now if it's not too much
7218          * trouble could you go back to 2006 and tell Chris to make the
7219          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7220          * EXTENT_ITEM_KEY please?
7221          */
7222         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7223                 if (path.slots[0] > 0) {
7224                         path.slots[0]--;
7225                 } else {
7226                         ret = btrfs_prev_leaf(root, &path);
7227                         if (ret < 0) {
7228                                 goto out;
7229                         } else if (ret > 0) {
7230                                 ret = 0;
7231                                 goto out;
7232                         }
7233                 }
7234                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7235         }
7236
7237         while (num_bytes) {
7238                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7239                         ret = btrfs_next_leaf(root, &path);
7240                         if (ret < 0) {
7241                                 fprintf(stderr, "Error going to next leaf "
7242                                         "%d\n", ret);
7243                                 btrfs_release_path(&path);
7244                                 return ret;
7245                         } else if (ret) {
7246                                 break;
7247                         }
7248                 }
7249                 leaf = path.nodes[0];
7250                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7251                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7252                         path.slots[0]++;
7253                         continue;
7254                 }
7255                 if (key.objectid + key.offset < bytenr) {
7256                         path.slots[0]++;
7257                         continue;
7258                 }
7259                 if (key.objectid > bytenr + num_bytes)
7260                         break;
7261
7262                 if (key.objectid == bytenr) {
7263                         if (key.offset >= num_bytes) {
7264                                 num_bytes = 0;
7265                                 break;
7266                         }
7267                         num_bytes -= key.offset;
7268                         bytenr += key.offset;
7269                 } else if (key.objectid < bytenr) {
7270                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7271                                 num_bytes = 0;
7272                                 break;
7273                         }
7274                         num_bytes = (bytenr + num_bytes) -
7275                                 (key.objectid + key.offset);
7276                         bytenr = key.objectid + key.offset;
7277                 } else {
7278                         if (key.objectid + key.offset < bytenr + num_bytes) {
7279                                 u64 new_start = key.objectid + key.offset;
7280                                 u64 new_bytes = bytenr + num_bytes - new_start;
7281
7282                                 /*
7283                                  * Weird case, the extent is in the middle of
7284                                  * our range, we'll have to search one side
7285                                  * and then the other.  Not sure if this happens
7286                                  * in real life, but no harm in coding it up
7287                                  * anyway just in case.
7288                                  */
7289                                 btrfs_release_path(&path);
7290                                 ret = check_extent_exists(root, new_start,
7291                                                           new_bytes);
7292                                 if (ret) {
7293                                         fprintf(stderr, "Right section didn't "
7294                                                 "have a record\n");
7295                                         break;
7296                                 }
7297                                 num_bytes = key.objectid - bytenr;
7298                                 goto again;
7299                         }
7300                         num_bytes = key.objectid - bytenr;
7301                 }
7302                 path.slots[0]++;
7303         }
7304         ret = 0;
7305
7306 out:
7307         if (num_bytes && !ret) {
7308                 fprintf(stderr, "There are no extents for csum range "
7309                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7310                 ret = 1;
7311         }
7312
7313         btrfs_release_path(&path);
7314         return ret;
7315 }
7316
7317 static int check_csums(struct btrfs_root *root)
7318 {
7319         struct btrfs_path path;
7320         struct extent_buffer *leaf;
7321         struct btrfs_key key;
7322         u64 offset = 0, num_bytes = 0;
7323         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7324         int errors = 0;
7325         int ret;
7326         u64 data_len;
7327         unsigned long leaf_offset;
7328
7329         root = root->fs_info->csum_root;
7330         if (!extent_buffer_uptodate(root->node)) {
7331                 fprintf(stderr, "No valid csum tree found\n");
7332                 return -ENOENT;
7333         }
7334
7335         btrfs_init_path(&path);
7336         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7337         key.type = BTRFS_EXTENT_CSUM_KEY;
7338         key.offset = 0;
7339         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7340         if (ret < 0) {
7341                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7342                 btrfs_release_path(&path);
7343                 return ret;
7344         }
7345
7346         if (ret > 0 && path.slots[0])
7347                 path.slots[0]--;
7348         ret = 0;
7349
7350         while (1) {
7351                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7352                         ret = btrfs_next_leaf(root, &path);
7353                         if (ret < 0) {
7354                                 fprintf(stderr, "Error going to next leaf "
7355                                         "%d\n", ret);
7356                                 break;
7357                         }
7358                         if (ret)
7359                                 break;
7360                 }
7361                 leaf = path.nodes[0];
7362
7363                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7364                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7365                         path.slots[0]++;
7366                         continue;
7367                 }
7368
7369                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7370                               csum_size) * root->sectorsize;
7371                 if (!check_data_csum)
7372                         goto skip_csum_check;
7373                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7374                 ret = check_extent_csums(root, key.offset, data_len,
7375                                          leaf_offset, leaf);
7376                 if (ret)
7377                         break;
7378 skip_csum_check:
7379                 if (!num_bytes) {
7380                         offset = key.offset;
7381                 } else if (key.offset != offset + num_bytes) {
7382                         ret = check_extent_exists(root, offset, num_bytes);
7383                         if (ret) {
7384                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7385                                         "there is no extent record\n",
7386                                         offset, offset+num_bytes);
7387                                 errors++;
7388                         }
7389                         offset = key.offset;
7390                         num_bytes = 0;
7391                 }
7392                 num_bytes += data_len;
7393                 path.slots[0]++;
7394         }
7395
7396         btrfs_release_path(&path);
7397         return errors;
7398 }
7399
7400 static int is_dropped_key(struct btrfs_key *key,
7401                           struct btrfs_key *drop_key) {
7402         if (key->objectid < drop_key->objectid)
7403                 return 1;
7404         else if (key->objectid == drop_key->objectid) {
7405                 if (key->type < drop_key->type)
7406                         return 1;
7407                 else if (key->type == drop_key->type) {
7408                         if (key->offset < drop_key->offset)
7409                                 return 1;
7410                 }
7411         }
7412         return 0;
7413 }
7414
7415 /*
7416  * Here are the rules for FULL_BACKREF.
7417  *
7418  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7419  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7420  *      FULL_BACKREF set.
7421  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7422  *    if it happened after the relocation occurred since we'll have dropped the
7423  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7424  *    have no real way to know for sure.
7425  *
7426  * We process the blocks one root at a time, and we start from the lowest root
7427  * objectid and go to the highest.  So we can just lookup the owner backref for
7428  * the record and if we don't find it then we know it doesn't exist and we have
7429  * a FULL BACKREF.
7430  *
7431  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7432  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7433  * be set or not and then we can check later once we've gathered all the refs.
7434  */
7435 static int calc_extent_flag(struct cache_tree *extent_cache,
7436                            struct extent_buffer *buf,
7437                            struct root_item_record *ri,
7438                            u64 *flags)
7439 {
7440         struct extent_record *rec;
7441         struct cache_extent *cache;
7442         struct tree_backref *tback;
7443         u64 owner = 0;
7444
7445         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7446         /* we have added this extent before */
7447         if (!cache)
7448                 return -ENOENT;
7449
7450         rec = container_of(cache, struct extent_record, cache);
7451
7452         /*
7453          * Except file/reloc tree, we can not have
7454          * FULL BACKREF MODE
7455          */
7456         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7457                 goto normal;
7458         /*
7459          * root node
7460          */
7461         if (buf->start == ri->bytenr)
7462                 goto normal;
7463
7464         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7465                 goto full_backref;
7466
7467         owner = btrfs_header_owner(buf);
7468         if (owner == ri->objectid)
7469                 goto normal;
7470
7471         tback = find_tree_backref(rec, 0, owner);
7472         if (!tback)
7473                 goto full_backref;
7474 normal:
7475         *flags = 0;
7476         if (rec->flag_block_full_backref != FLAG_UNSET &&
7477             rec->flag_block_full_backref != 0)
7478                 rec->bad_full_backref = 1;
7479         return 0;
7480 full_backref:
7481         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7482         if (rec->flag_block_full_backref != FLAG_UNSET &&
7483             rec->flag_block_full_backref != 1)
7484                 rec->bad_full_backref = 1;
7485         return 0;
7486 }
7487
7488 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7489 {
7490         fprintf(stderr, "Invalid key type(");
7491         print_key_type(stderr, 0, key_type);
7492         fprintf(stderr, ") found in root(");
7493         print_objectid(stderr, rootid, 0);
7494         fprintf(stderr, ")\n");
7495 }
7496
7497 /*
7498  * Check if the key is valid with its extent buffer.
7499  *
7500  * This is a early check in case invalid key exists in a extent buffer
7501  * This is not comprehensive yet, but should prevent wrong key/item passed
7502  * further
7503  */
7504 static int check_type_with_root(u64 rootid, u8 key_type)
7505 {
7506         switch (key_type) {
7507         /* Only valid in chunk tree */
7508         case BTRFS_DEV_ITEM_KEY:
7509         case BTRFS_CHUNK_ITEM_KEY:
7510                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7511                         goto err;
7512                 break;
7513         /* valid in csum and log tree */
7514         case BTRFS_CSUM_TREE_OBJECTID:
7515                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7516                       is_fstree(rootid)))
7517                         goto err;
7518                 break;
7519         case BTRFS_EXTENT_ITEM_KEY:
7520         case BTRFS_METADATA_ITEM_KEY:
7521         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7522                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7523                         goto err;
7524                 break;
7525         case BTRFS_ROOT_ITEM_KEY:
7526                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7527                         goto err;
7528                 break;
7529         case BTRFS_DEV_EXTENT_KEY:
7530                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7531                         goto err;
7532                 break;
7533         }
7534         return 0;
7535 err:
7536         report_mismatch_key_root(key_type, rootid);
7537         return -EINVAL;
7538 }
7539
7540 static int run_next_block(struct btrfs_root *root,
7541                           struct block_info *bits,
7542                           int bits_nr,
7543                           u64 *last,
7544                           struct cache_tree *pending,
7545                           struct cache_tree *seen,
7546                           struct cache_tree *reada,
7547                           struct cache_tree *nodes,
7548                           struct cache_tree *extent_cache,
7549                           struct cache_tree *chunk_cache,
7550                           struct rb_root *dev_cache,
7551                           struct block_group_tree *block_group_cache,
7552                           struct device_extent_tree *dev_extent_cache,
7553                           struct root_item_record *ri)
7554 {
7555         struct extent_buffer *buf;
7556         struct extent_record *rec = NULL;
7557         u64 bytenr;
7558         u32 size;
7559         u64 parent;
7560         u64 owner;
7561         u64 flags;
7562         u64 ptr;
7563         u64 gen = 0;
7564         int ret = 0;
7565         int i;
7566         int nritems;
7567         struct btrfs_key key;
7568         struct cache_extent *cache;
7569         int reada_bits;
7570
7571         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7572                                     bits_nr, &reada_bits);
7573         if (nritems == 0)
7574                 return 1;
7575
7576         if (!reada_bits) {
7577                 for(i = 0; i < nritems; i++) {
7578                         ret = add_cache_extent(reada, bits[i].start,
7579                                                bits[i].size);
7580                         if (ret == -EEXIST)
7581                                 continue;
7582
7583                         /* fixme, get the parent transid */
7584                         readahead_tree_block(root, bits[i].start,
7585                                              bits[i].size, 0);
7586                 }
7587         }
7588         *last = bits[0].start;
7589         bytenr = bits[0].start;
7590         size = bits[0].size;
7591
7592         cache = lookup_cache_extent(pending, bytenr, size);
7593         if (cache) {
7594                 remove_cache_extent(pending, cache);
7595                 free(cache);
7596         }
7597         cache = lookup_cache_extent(reada, bytenr, size);
7598         if (cache) {
7599                 remove_cache_extent(reada, cache);
7600                 free(cache);
7601         }
7602         cache = lookup_cache_extent(nodes, bytenr, size);
7603         if (cache) {
7604                 remove_cache_extent(nodes, cache);
7605                 free(cache);
7606         }
7607         cache = lookup_cache_extent(extent_cache, bytenr, size);
7608         if (cache) {
7609                 rec = container_of(cache, struct extent_record, cache);
7610                 gen = rec->parent_generation;
7611         }
7612
7613         /* fixme, get the real parent transid */
7614         buf = read_tree_block(root, bytenr, size, gen);
7615         if (!extent_buffer_uptodate(buf)) {
7616                 record_bad_block_io(root->fs_info,
7617                                     extent_cache, bytenr, size);
7618                 goto out;
7619         }
7620
7621         nritems = btrfs_header_nritems(buf);
7622
7623         flags = 0;
7624         if (!init_extent_tree) {
7625                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7626                                        btrfs_header_level(buf), 1, NULL,
7627                                        &flags);
7628                 if (ret < 0) {
7629                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7630                         if (ret < 0) {
7631                                 fprintf(stderr, "Couldn't calc extent flags\n");
7632                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7633                         }
7634                 }
7635         } else {
7636                 flags = 0;
7637                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7638                 if (ret < 0) {
7639                         fprintf(stderr, "Couldn't calc extent flags\n");
7640                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7641                 }
7642         }
7643
7644         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7645                 if (ri != NULL &&
7646                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7647                     ri->objectid == btrfs_header_owner(buf)) {
7648                         /*
7649                          * Ok we got to this block from it's original owner and
7650                          * we have FULL_BACKREF set.  Relocation can leave
7651                          * converted blocks over so this is altogether possible,
7652                          * however it's not possible if the generation > the
7653                          * last snapshot, so check for this case.
7654                          */
7655                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7656                             btrfs_header_generation(buf) > ri->last_snapshot) {
7657                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7658                                 rec->bad_full_backref = 1;
7659                         }
7660                 }
7661         } else {
7662                 if (ri != NULL &&
7663                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7664                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7665                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7666                         rec->bad_full_backref = 1;
7667                 }
7668         }
7669
7670         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7671                 rec->flag_block_full_backref = 1;
7672                 parent = bytenr;
7673                 owner = 0;
7674         } else {
7675                 rec->flag_block_full_backref = 0;
7676                 parent = 0;
7677                 owner = btrfs_header_owner(buf);
7678         }
7679
7680         ret = check_block(root, extent_cache, buf, flags);
7681         if (ret)
7682                 goto out;
7683
7684         if (btrfs_is_leaf(buf)) {
7685                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7686                 for (i = 0; i < nritems; i++) {
7687                         struct btrfs_file_extent_item *fi;
7688                         btrfs_item_key_to_cpu(buf, &key, i);
7689                         /*
7690                          * Check key type against the leaf owner.
7691                          * Could filter quite a lot of early error if
7692                          * owner is correct
7693                          */
7694                         if (check_type_with_root(btrfs_header_owner(buf),
7695                                                  key.type)) {
7696                                 fprintf(stderr, "ignoring invalid key\n");
7697                                 continue;
7698                         }
7699                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7700                                 process_extent_item(root, extent_cache, buf,
7701                                                     i);
7702                                 continue;
7703                         }
7704                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7705                                 process_extent_item(root, extent_cache, buf,
7706                                                     i);
7707                                 continue;
7708                         }
7709                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7710                                 total_csum_bytes +=
7711                                         btrfs_item_size_nr(buf, i);
7712                                 continue;
7713                         }
7714                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7715                                 process_chunk_item(chunk_cache, &key, buf, i);
7716                                 continue;
7717                         }
7718                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7719                                 process_device_item(dev_cache, &key, buf, i);
7720                                 continue;
7721                         }
7722                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7723                                 process_block_group_item(block_group_cache,
7724                                         &key, buf, i);
7725                                 continue;
7726                         }
7727                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7728                                 process_device_extent_item(dev_extent_cache,
7729                                         &key, buf, i);
7730                                 continue;
7731
7732                         }
7733                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7734 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7735                                 process_extent_ref_v0(extent_cache, buf, i);
7736 #else
7737                                 BUG();
7738 #endif
7739                                 continue;
7740                         }
7741
7742                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7743                                 ret = add_tree_backref(extent_cache,
7744                                                 key.objectid, 0, key.offset, 0);
7745                                 if (ret < 0)
7746                                         error("add_tree_backref failed: %s",
7747                                               strerror(-ret));
7748                                 continue;
7749                         }
7750                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7751                                 ret = add_tree_backref(extent_cache,
7752                                                 key.objectid, key.offset, 0, 0);
7753                                 if (ret < 0)
7754                                         error("add_tree_backref failed: %s",
7755                                               strerror(-ret));
7756                                 continue;
7757                         }
7758                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7759                                 struct btrfs_extent_data_ref *ref;
7760                                 ref = btrfs_item_ptr(buf, i,
7761                                                 struct btrfs_extent_data_ref);
7762                                 add_data_backref(extent_cache,
7763                                         key.objectid, 0,
7764                                         btrfs_extent_data_ref_root(buf, ref),
7765                                         btrfs_extent_data_ref_objectid(buf,
7766                                                                        ref),
7767                                         btrfs_extent_data_ref_offset(buf, ref),
7768                                         btrfs_extent_data_ref_count(buf, ref),
7769                                         0, root->sectorsize);
7770                                 continue;
7771                         }
7772                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7773                                 struct btrfs_shared_data_ref *ref;
7774                                 ref = btrfs_item_ptr(buf, i,
7775                                                 struct btrfs_shared_data_ref);
7776                                 add_data_backref(extent_cache,
7777                                         key.objectid, key.offset, 0, 0, 0,
7778                                         btrfs_shared_data_ref_count(buf, ref),
7779                                         0, root->sectorsize);
7780                                 continue;
7781                         }
7782                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7783                                 struct bad_item *bad;
7784
7785                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7786                                         continue;
7787                                 if (!owner)
7788                                         continue;
7789                                 bad = malloc(sizeof(struct bad_item));
7790                                 if (!bad)
7791                                         continue;
7792                                 INIT_LIST_HEAD(&bad->list);
7793                                 memcpy(&bad->key, &key,
7794                                        sizeof(struct btrfs_key));
7795                                 bad->root_id = owner;
7796                                 list_add_tail(&bad->list, &delete_items);
7797                                 continue;
7798                         }
7799                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7800                                 continue;
7801                         fi = btrfs_item_ptr(buf, i,
7802                                             struct btrfs_file_extent_item);
7803                         if (btrfs_file_extent_type(buf, fi) ==
7804                             BTRFS_FILE_EXTENT_INLINE)
7805                                 continue;
7806                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7807                                 continue;
7808
7809                         data_bytes_allocated +=
7810                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7811                         if (data_bytes_allocated < root->sectorsize) {
7812                                 abort();
7813                         }
7814                         data_bytes_referenced +=
7815                                 btrfs_file_extent_num_bytes(buf, fi);
7816                         add_data_backref(extent_cache,
7817                                 btrfs_file_extent_disk_bytenr(buf, fi),
7818                                 parent, owner, key.objectid, key.offset -
7819                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7820                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7821                 }
7822         } else {
7823                 int level;
7824                 struct btrfs_key first_key;
7825
7826                 first_key.objectid = 0;
7827
7828                 if (nritems > 0)
7829                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7830                 level = btrfs_header_level(buf);
7831                 for (i = 0; i < nritems; i++) {
7832                         struct extent_record tmpl;
7833
7834                         ptr = btrfs_node_blockptr(buf, i);
7835                         size = root->nodesize;
7836                         btrfs_node_key_to_cpu(buf, &key, i);
7837                         if (ri != NULL) {
7838                                 if ((level == ri->drop_level)
7839                                     && is_dropped_key(&key, &ri->drop_key)) {
7840                                         continue;
7841                                 }
7842                         }
7843
7844                         memset(&tmpl, 0, sizeof(tmpl));
7845                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7846                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7847                         tmpl.start = ptr;
7848                         tmpl.nr = size;
7849                         tmpl.refs = 1;
7850                         tmpl.metadata = 1;
7851                         tmpl.max_size = size;
7852                         ret = add_extent_rec(extent_cache, &tmpl);
7853                         if (ret < 0)
7854                                 goto out;
7855
7856                         ret = add_tree_backref(extent_cache, ptr, parent,
7857                                         owner, 1);
7858                         if (ret < 0) {
7859                                 error("add_tree_backref failed: %s",
7860                                       strerror(-ret));
7861                                 continue;
7862                         }
7863
7864                         if (level > 1) {
7865                                 add_pending(nodes, seen, ptr, size);
7866                         } else {
7867                                 add_pending(pending, seen, ptr, size);
7868                         }
7869                 }
7870                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7871                                       nritems) * sizeof(struct btrfs_key_ptr);
7872         }
7873         total_btree_bytes += buf->len;
7874         if (fs_root_objectid(btrfs_header_owner(buf)))
7875                 total_fs_tree_bytes += buf->len;
7876         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7877                 total_extent_tree_bytes += buf->len;
7878         if (!found_old_backref &&
7879             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7880             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7881             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7882                 found_old_backref = 1;
7883 out:
7884         free_extent_buffer(buf);
7885         return ret;
7886 }
7887
7888 static int add_root_to_pending(struct extent_buffer *buf,
7889                                struct cache_tree *extent_cache,
7890                                struct cache_tree *pending,
7891                                struct cache_tree *seen,
7892                                struct cache_tree *nodes,
7893                                u64 objectid)
7894 {
7895         struct extent_record tmpl;
7896         int ret;
7897
7898         if (btrfs_header_level(buf) > 0)
7899                 add_pending(nodes, seen, buf->start, buf->len);
7900         else
7901                 add_pending(pending, seen, buf->start, buf->len);
7902
7903         memset(&tmpl, 0, sizeof(tmpl));
7904         tmpl.start = buf->start;
7905         tmpl.nr = buf->len;
7906         tmpl.is_root = 1;
7907         tmpl.refs = 1;
7908         tmpl.metadata = 1;
7909         tmpl.max_size = buf->len;
7910         add_extent_rec(extent_cache, &tmpl);
7911
7912         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7913             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7914                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7915                                 0, 1);
7916         else
7917                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7918                                 1);
7919         return ret;
7920 }
7921
7922 /* as we fix the tree, we might be deleting blocks that
7923  * we're tracking for repair.  This hook makes sure we
7924  * remove any backrefs for blocks as we are fixing them.
7925  */
7926 static int free_extent_hook(struct btrfs_trans_handle *trans,
7927                             struct btrfs_root *root,
7928                             u64 bytenr, u64 num_bytes, u64 parent,
7929                             u64 root_objectid, u64 owner, u64 offset,
7930                             int refs_to_drop)
7931 {
7932         struct extent_record *rec;
7933         struct cache_extent *cache;
7934         int is_data;
7935         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7936
7937         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7938         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7939         if (!cache)
7940                 return 0;
7941
7942         rec = container_of(cache, struct extent_record, cache);
7943         if (is_data) {
7944                 struct data_backref *back;
7945                 back = find_data_backref(rec, parent, root_objectid, owner,
7946                                          offset, 1, bytenr, num_bytes);
7947                 if (!back)
7948                         goto out;
7949                 if (back->node.found_ref) {
7950                         back->found_ref -= refs_to_drop;
7951                         if (rec->refs)
7952                                 rec->refs -= refs_to_drop;
7953                 }
7954                 if (back->node.found_extent_tree) {
7955                         back->num_refs -= refs_to_drop;
7956                         if (rec->extent_item_refs)
7957                                 rec->extent_item_refs -= refs_to_drop;
7958                 }
7959                 if (back->found_ref == 0)
7960                         back->node.found_ref = 0;
7961                 if (back->num_refs == 0)
7962                         back->node.found_extent_tree = 0;
7963
7964                 if (!back->node.found_extent_tree && back->node.found_ref) {
7965                         list_del(&back->node.list);
7966                         free(back);
7967                 }
7968         } else {
7969                 struct tree_backref *back;
7970                 back = find_tree_backref(rec, parent, root_objectid);
7971                 if (!back)
7972                         goto out;
7973                 if (back->node.found_ref) {
7974                         if (rec->refs)
7975                                 rec->refs--;
7976                         back->node.found_ref = 0;
7977                 }
7978                 if (back->node.found_extent_tree) {
7979                         if (rec->extent_item_refs)
7980                                 rec->extent_item_refs--;
7981                         back->node.found_extent_tree = 0;
7982                 }
7983                 if (!back->node.found_extent_tree && back->node.found_ref) {
7984                         list_del(&back->node.list);
7985                         free(back);
7986                 }
7987         }
7988         maybe_free_extent_rec(extent_cache, rec);
7989 out:
7990         return 0;
7991 }
7992
7993 static int delete_extent_records(struct btrfs_trans_handle *trans,
7994                                  struct btrfs_root *root,
7995                                  struct btrfs_path *path,
7996                                  u64 bytenr)
7997 {
7998         struct btrfs_key key;
7999         struct btrfs_key found_key;
8000         struct extent_buffer *leaf;
8001         int ret;
8002         int slot;
8003
8004
8005         key.objectid = bytenr;
8006         key.type = (u8)-1;
8007         key.offset = (u64)-1;
8008
8009         while(1) {
8010                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8011                                         &key, path, 0, 1);
8012                 if (ret < 0)
8013                         break;
8014
8015                 if (ret > 0) {
8016                         ret = 0;
8017                         if (path->slots[0] == 0)
8018                                 break;
8019                         path->slots[0]--;
8020                 }
8021                 ret = 0;
8022
8023                 leaf = path->nodes[0];
8024                 slot = path->slots[0];
8025
8026                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8027                 if (found_key.objectid != bytenr)
8028                         break;
8029
8030                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8031                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8032                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8033                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8034                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8035                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8036                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8037                         btrfs_release_path(path);
8038                         if (found_key.type == 0) {
8039                                 if (found_key.offset == 0)
8040                                         break;
8041                                 key.offset = found_key.offset - 1;
8042                                 key.type = found_key.type;
8043                         }
8044                         key.type = found_key.type - 1;
8045                         key.offset = (u64)-1;
8046                         continue;
8047                 }
8048
8049                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8050                         found_key.objectid, found_key.type, found_key.offset);
8051
8052                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8053                 if (ret)
8054                         break;
8055                 btrfs_release_path(path);
8056
8057                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8058                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8059                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8060                                 found_key.offset : root->nodesize;
8061
8062                         ret = btrfs_update_block_group(trans, root, bytenr,
8063                                                        bytes, 0, 0);
8064                         if (ret)
8065                                 break;
8066                 }
8067         }
8068
8069         btrfs_release_path(path);
8070         return ret;
8071 }
8072
8073 /*
8074  * for a single backref, this will allocate a new extent
8075  * and add the backref to it.
8076  */
8077 static int record_extent(struct btrfs_trans_handle *trans,
8078                          struct btrfs_fs_info *info,
8079                          struct btrfs_path *path,
8080                          struct extent_record *rec,
8081                          struct extent_backref *back,
8082                          int allocated, u64 flags)
8083 {
8084         int ret = 0;
8085         struct btrfs_root *extent_root = info->extent_root;
8086         struct extent_buffer *leaf;
8087         struct btrfs_key ins_key;
8088         struct btrfs_extent_item *ei;
8089         struct data_backref *dback;
8090         struct btrfs_tree_block_info *bi;
8091
8092         if (!back->is_data)
8093                 rec->max_size = max_t(u64, rec->max_size,
8094                                     info->extent_root->nodesize);
8095
8096         if (!allocated) {
8097                 u32 item_size = sizeof(*ei);
8098
8099                 if (!back->is_data)
8100                         item_size += sizeof(*bi);
8101
8102                 ins_key.objectid = rec->start;
8103                 ins_key.offset = rec->max_size;
8104                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8105
8106                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8107                                         &ins_key, item_size);
8108                 if (ret)
8109                         goto fail;
8110
8111                 leaf = path->nodes[0];
8112                 ei = btrfs_item_ptr(leaf, path->slots[0],
8113                                     struct btrfs_extent_item);
8114
8115                 btrfs_set_extent_refs(leaf, ei, 0);
8116                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8117
8118                 if (back->is_data) {
8119                         btrfs_set_extent_flags(leaf, ei,
8120                                                BTRFS_EXTENT_FLAG_DATA);
8121                 } else {
8122                         struct btrfs_disk_key copy_key;;
8123
8124                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8125                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8126                                              sizeof(*bi));
8127
8128                         btrfs_set_disk_key_objectid(&copy_key,
8129                                                     rec->info_objectid);
8130                         btrfs_set_disk_key_type(&copy_key, 0);
8131                         btrfs_set_disk_key_offset(&copy_key, 0);
8132
8133                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8134                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8135
8136                         btrfs_set_extent_flags(leaf, ei,
8137                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8138                 }
8139
8140                 btrfs_mark_buffer_dirty(leaf);
8141                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8142                                                rec->max_size, 1, 0);
8143                 if (ret)
8144                         goto fail;
8145                 btrfs_release_path(path);
8146         }
8147
8148         if (back->is_data) {
8149                 u64 parent;
8150                 int i;
8151
8152                 dback = to_data_backref(back);
8153                 if (back->full_backref)
8154                         parent = dback->parent;
8155                 else
8156                         parent = 0;
8157
8158                 for (i = 0; i < dback->found_ref; i++) {
8159                         /* if parent != 0, we're doing a full backref
8160                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8161                          * just makes the backref allocator create a data
8162                          * backref
8163                          */
8164                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8165                                                    rec->start, rec->max_size,
8166                                                    parent,
8167                                                    dback->root,
8168                                                    parent ?
8169                                                    BTRFS_FIRST_FREE_OBJECTID :
8170                                                    dback->owner,
8171                                                    dback->offset);
8172                         if (ret)
8173                                 break;
8174                 }
8175                 fprintf(stderr, "adding new data backref"
8176                                 " on %llu %s %llu owner %llu"
8177                                 " offset %llu found %d\n",
8178                                 (unsigned long long)rec->start,
8179                                 back->full_backref ?
8180                                 "parent" : "root",
8181                                 back->full_backref ?
8182                                 (unsigned long long)parent :
8183                                 (unsigned long long)dback->root,
8184                                 (unsigned long long)dback->owner,
8185                                 (unsigned long long)dback->offset,
8186                                 dback->found_ref);
8187         } else {
8188                 u64 parent;
8189                 struct tree_backref *tback;
8190
8191                 tback = to_tree_backref(back);
8192                 if (back->full_backref)
8193                         parent = tback->parent;
8194                 else
8195                         parent = 0;
8196
8197                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8198                                            rec->start, rec->max_size,
8199                                            parent, tback->root, 0, 0);
8200                 fprintf(stderr, "adding new tree backref on "
8201                         "start %llu len %llu parent %llu root %llu\n",
8202                         rec->start, rec->max_size, parent, tback->root);
8203         }
8204 fail:
8205         btrfs_release_path(path);
8206         return ret;
8207 }
8208
8209 static struct extent_entry *find_entry(struct list_head *entries,
8210                                        u64 bytenr, u64 bytes)
8211 {
8212         struct extent_entry *entry = NULL;
8213
8214         list_for_each_entry(entry, entries, list) {
8215                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8216                         return entry;
8217         }
8218
8219         return NULL;
8220 }
8221
8222 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8223 {
8224         struct extent_entry *entry, *best = NULL, *prev = NULL;
8225
8226         list_for_each_entry(entry, entries, list) {
8227                 /*
8228                  * If there are as many broken entries as entries then we know
8229                  * not to trust this particular entry.
8230                  */
8231                 if (entry->broken == entry->count)
8232                         continue;
8233
8234                 /*
8235                  * Special case, when there are only two entries and 'best' is
8236                  * the first one
8237                  */
8238                 if (!prev) {
8239                         best = entry;
8240                         prev = entry;
8241                         continue;
8242                 }
8243
8244                 /*
8245                  * If our current entry == best then we can't be sure our best
8246                  * is really the best, so we need to keep searching.
8247                  */
8248                 if (best && best->count == entry->count) {
8249                         prev = entry;
8250                         best = NULL;
8251                         continue;
8252                 }
8253
8254                 /* Prev == entry, not good enough, have to keep searching */
8255                 if (!prev->broken && prev->count == entry->count)
8256                         continue;
8257
8258                 if (!best)
8259                         best = (prev->count > entry->count) ? prev : entry;
8260                 else if (best->count < entry->count)
8261                         best = entry;
8262                 prev = entry;
8263         }
8264
8265         return best;
8266 }
8267
8268 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8269                       struct data_backref *dback, struct extent_entry *entry)
8270 {
8271         struct btrfs_trans_handle *trans;
8272         struct btrfs_root *root;
8273         struct btrfs_file_extent_item *fi;
8274         struct extent_buffer *leaf;
8275         struct btrfs_key key;
8276         u64 bytenr, bytes;
8277         int ret, err;
8278
8279         key.objectid = dback->root;
8280         key.type = BTRFS_ROOT_ITEM_KEY;
8281         key.offset = (u64)-1;
8282         root = btrfs_read_fs_root(info, &key);
8283         if (IS_ERR(root)) {
8284                 fprintf(stderr, "Couldn't find root for our ref\n");
8285                 return -EINVAL;
8286         }
8287
8288         /*
8289          * The backref points to the original offset of the extent if it was
8290          * split, so we need to search down to the offset we have and then walk
8291          * forward until we find the backref we're looking for.
8292          */
8293         key.objectid = dback->owner;
8294         key.type = BTRFS_EXTENT_DATA_KEY;
8295         key.offset = dback->offset;
8296         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8297         if (ret < 0) {
8298                 fprintf(stderr, "Error looking up ref %d\n", ret);
8299                 return ret;
8300         }
8301
8302         while (1) {
8303                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8304                         ret = btrfs_next_leaf(root, path);
8305                         if (ret) {
8306                                 fprintf(stderr, "Couldn't find our ref, next\n");
8307                                 return -EINVAL;
8308                         }
8309                 }
8310                 leaf = path->nodes[0];
8311                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8312                 if (key.objectid != dback->owner ||
8313                     key.type != BTRFS_EXTENT_DATA_KEY) {
8314                         fprintf(stderr, "Couldn't find our ref, search\n");
8315                         return -EINVAL;
8316                 }
8317                 fi = btrfs_item_ptr(leaf, path->slots[0],
8318                                     struct btrfs_file_extent_item);
8319                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8320                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8321
8322                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8323                         break;
8324                 path->slots[0]++;
8325         }
8326
8327         btrfs_release_path(path);
8328
8329         trans = btrfs_start_transaction(root, 1);
8330         if (IS_ERR(trans))
8331                 return PTR_ERR(trans);
8332
8333         /*
8334          * Ok we have the key of the file extent we want to fix, now we can cow
8335          * down to the thing and fix it.
8336          */
8337         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8338         if (ret < 0) {
8339                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8340                         key.objectid, key.type, key.offset, ret);
8341                 goto out;
8342         }
8343         if (ret > 0) {
8344                 fprintf(stderr, "Well that's odd, we just found this key "
8345                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8346                         key.offset);
8347                 ret = -EINVAL;
8348                 goto out;
8349         }
8350         leaf = path->nodes[0];
8351         fi = btrfs_item_ptr(leaf, path->slots[0],
8352                             struct btrfs_file_extent_item);
8353
8354         if (btrfs_file_extent_compression(leaf, fi) &&
8355             dback->disk_bytenr != entry->bytenr) {
8356                 fprintf(stderr, "Ref doesn't match the record start and is "
8357                         "compressed, please take a btrfs-image of this file "
8358                         "system and send it to a btrfs developer so they can "
8359                         "complete this functionality for bytenr %Lu\n",
8360                         dback->disk_bytenr);
8361                 ret = -EINVAL;
8362                 goto out;
8363         }
8364
8365         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8366                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8367         } else if (dback->disk_bytenr > entry->bytenr) {
8368                 u64 off_diff, offset;
8369
8370                 off_diff = dback->disk_bytenr - entry->bytenr;
8371                 offset = btrfs_file_extent_offset(leaf, fi);
8372                 if (dback->disk_bytenr + offset +
8373                     btrfs_file_extent_num_bytes(leaf, fi) >
8374                     entry->bytenr + entry->bytes) {
8375                         fprintf(stderr, "Ref is past the entry end, please "
8376                                 "take a btrfs-image of this file system and "
8377                                 "send it to a btrfs developer, ref %Lu\n",
8378                                 dback->disk_bytenr);
8379                         ret = -EINVAL;
8380                         goto out;
8381                 }
8382                 offset += off_diff;
8383                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8384                 btrfs_set_file_extent_offset(leaf, fi, offset);
8385         } else if (dback->disk_bytenr < entry->bytenr) {
8386                 u64 offset;
8387
8388                 offset = btrfs_file_extent_offset(leaf, fi);
8389                 if (dback->disk_bytenr + offset < entry->bytenr) {
8390                         fprintf(stderr, "Ref is before the entry start, please"
8391                                 " take a btrfs-image of this file system and "
8392                                 "send it to a btrfs developer, ref %Lu\n",
8393                                 dback->disk_bytenr);
8394                         ret = -EINVAL;
8395                         goto out;
8396                 }
8397
8398                 offset += dback->disk_bytenr;
8399                 offset -= entry->bytenr;
8400                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8401                 btrfs_set_file_extent_offset(leaf, fi, offset);
8402         }
8403
8404         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8405
8406         /*
8407          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8408          * only do this if we aren't using compression, otherwise it's a
8409          * trickier case.
8410          */
8411         if (!btrfs_file_extent_compression(leaf, fi))
8412                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8413         else
8414                 printf("ram bytes may be wrong?\n");
8415         btrfs_mark_buffer_dirty(leaf);
8416 out:
8417         err = btrfs_commit_transaction(trans, root);
8418         btrfs_release_path(path);
8419         return ret ? ret : err;
8420 }
8421
8422 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8423                            struct extent_record *rec)
8424 {
8425         struct extent_backref *back;
8426         struct data_backref *dback;
8427         struct extent_entry *entry, *best = NULL;
8428         LIST_HEAD(entries);
8429         int nr_entries = 0;
8430         int broken_entries = 0;
8431         int ret = 0;
8432         short mismatch = 0;
8433
8434         /*
8435          * Metadata is easy and the backrefs should always agree on bytenr and
8436          * size, if not we've got bigger issues.
8437          */
8438         if (rec->metadata)
8439                 return 0;
8440
8441         list_for_each_entry(back, &rec->backrefs, list) {
8442                 if (back->full_backref || !back->is_data)
8443                         continue;
8444
8445                 dback = to_data_backref(back);
8446
8447                 /*
8448                  * We only pay attention to backrefs that we found a real
8449                  * backref for.
8450                  */
8451                 if (dback->found_ref == 0)
8452                         continue;
8453
8454                 /*
8455                  * For now we only catch when the bytes don't match, not the
8456                  * bytenr.  We can easily do this at the same time, but I want
8457                  * to have a fs image to test on before we just add repair
8458                  * functionality willy-nilly so we know we won't screw up the
8459                  * repair.
8460                  */
8461
8462                 entry = find_entry(&entries, dback->disk_bytenr,
8463                                    dback->bytes);
8464                 if (!entry) {
8465                         entry = malloc(sizeof(struct extent_entry));
8466                         if (!entry) {
8467                                 ret = -ENOMEM;
8468                                 goto out;
8469                         }
8470                         memset(entry, 0, sizeof(*entry));
8471                         entry->bytenr = dback->disk_bytenr;
8472                         entry->bytes = dback->bytes;
8473                         list_add_tail(&entry->list, &entries);
8474                         nr_entries++;
8475                 }
8476
8477                 /*
8478                  * If we only have on entry we may think the entries agree when
8479                  * in reality they don't so we have to do some extra checking.
8480                  */
8481                 if (dback->disk_bytenr != rec->start ||
8482                     dback->bytes != rec->nr || back->broken)
8483                         mismatch = 1;
8484
8485                 if (back->broken) {
8486                         entry->broken++;
8487                         broken_entries++;
8488                 }
8489
8490                 entry->count++;
8491         }
8492
8493         /* Yay all the backrefs agree, carry on good sir */
8494         if (nr_entries <= 1 && !mismatch)
8495                 goto out;
8496
8497         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8498                 "%Lu\n", rec->start);
8499
8500         /*
8501          * First we want to see if the backrefs can agree amongst themselves who
8502          * is right, so figure out which one of the entries has the highest
8503          * count.
8504          */
8505         best = find_most_right_entry(&entries);
8506
8507         /*
8508          * Ok so we may have an even split between what the backrefs think, so
8509          * this is where we use the extent ref to see what it thinks.
8510          */
8511         if (!best) {
8512                 entry = find_entry(&entries, rec->start, rec->nr);
8513                 if (!entry && (!broken_entries || !rec->found_rec)) {
8514                         fprintf(stderr, "Backrefs don't agree with each other "
8515                                 "and extent record doesn't agree with anybody,"
8516                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8517                                 rec->start, rec->nr);
8518                         ret = -EINVAL;
8519                         goto out;
8520                 } else if (!entry) {
8521                         /*
8522                          * Ok our backrefs were broken, we'll assume this is the
8523                          * correct value and add an entry for this range.
8524                          */
8525                         entry = malloc(sizeof(struct extent_entry));
8526                         if (!entry) {
8527                                 ret = -ENOMEM;
8528                                 goto out;
8529                         }
8530                         memset(entry, 0, sizeof(*entry));
8531                         entry->bytenr = rec->start;
8532                         entry->bytes = rec->nr;
8533                         list_add_tail(&entry->list, &entries);
8534                         nr_entries++;
8535                 }
8536                 entry->count++;
8537                 best = find_most_right_entry(&entries);
8538                 if (!best) {
8539                         fprintf(stderr, "Backrefs and extent record evenly "
8540                                 "split on who is right, this is going to "
8541                                 "require user input to fix bytenr %Lu bytes "
8542                                 "%Lu\n", rec->start, rec->nr);
8543                         ret = -EINVAL;
8544                         goto out;
8545                 }
8546         }
8547
8548         /*
8549          * I don't think this can happen currently as we'll abort() if we catch
8550          * this case higher up, but in case somebody removes that we still can't
8551          * deal with it properly here yet, so just bail out of that's the case.
8552          */
8553         if (best->bytenr != rec->start) {
8554                 fprintf(stderr, "Extent start and backref starts don't match, "
8555                         "please use btrfs-image on this file system and send "
8556                         "it to a btrfs developer so they can make fsck fix "
8557                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8558                         rec->start, rec->nr);
8559                 ret = -EINVAL;
8560                 goto out;
8561         }
8562
8563         /*
8564          * Ok great we all agreed on an extent record, let's go find the real
8565          * references and fix up the ones that don't match.
8566          */
8567         list_for_each_entry(back, &rec->backrefs, list) {
8568                 if (back->full_backref || !back->is_data)
8569                         continue;
8570
8571                 dback = to_data_backref(back);
8572
8573                 /*
8574                  * Still ignoring backrefs that don't have a real ref attached
8575                  * to them.
8576                  */
8577                 if (dback->found_ref == 0)
8578                         continue;
8579
8580                 if (dback->bytes == best->bytes &&
8581                     dback->disk_bytenr == best->bytenr)
8582                         continue;
8583
8584                 ret = repair_ref(info, path, dback, best);
8585                 if (ret)
8586                         goto out;
8587         }
8588
8589         /*
8590          * Ok we messed with the actual refs, which means we need to drop our
8591          * entire cache and go back and rescan.  I know this is a huge pain and
8592          * adds a lot of extra work, but it's the only way to be safe.  Once all
8593          * the backrefs agree we may not need to do anything to the extent
8594          * record itself.
8595          */
8596         ret = -EAGAIN;
8597 out:
8598         while (!list_empty(&entries)) {
8599                 entry = list_entry(entries.next, struct extent_entry, list);
8600                 list_del_init(&entry->list);
8601                 free(entry);
8602         }
8603         return ret;
8604 }
8605
8606 static int process_duplicates(struct cache_tree *extent_cache,
8607                               struct extent_record *rec)
8608 {
8609         struct extent_record *good, *tmp;
8610         struct cache_extent *cache;
8611         int ret;
8612
8613         /*
8614          * If we found a extent record for this extent then return, or if we
8615          * have more than one duplicate we are likely going to need to delete
8616          * something.
8617          */
8618         if (rec->found_rec || rec->num_duplicates > 1)
8619                 return 0;
8620
8621         /* Shouldn't happen but just in case */
8622         BUG_ON(!rec->num_duplicates);
8623
8624         /*
8625          * So this happens if we end up with a backref that doesn't match the
8626          * actual extent entry.  So either the backref is bad or the extent
8627          * entry is bad.  Either way we want to have the extent_record actually
8628          * reflect what we found in the extent_tree, so we need to take the
8629          * duplicate out and use that as the extent_record since the only way we
8630          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8631          */
8632         remove_cache_extent(extent_cache, &rec->cache);
8633
8634         good = to_extent_record(rec->dups.next);
8635         list_del_init(&good->list);
8636         INIT_LIST_HEAD(&good->backrefs);
8637         INIT_LIST_HEAD(&good->dups);
8638         good->cache.start = good->start;
8639         good->cache.size = good->nr;
8640         good->content_checked = 0;
8641         good->owner_ref_checked = 0;
8642         good->num_duplicates = 0;
8643         good->refs = rec->refs;
8644         list_splice_init(&rec->backrefs, &good->backrefs);
8645         while (1) {
8646                 cache = lookup_cache_extent(extent_cache, good->start,
8647                                             good->nr);
8648                 if (!cache)
8649                         break;
8650                 tmp = container_of(cache, struct extent_record, cache);
8651
8652                 /*
8653                  * If we find another overlapping extent and it's found_rec is
8654                  * set then it's a duplicate and we need to try and delete
8655                  * something.
8656                  */
8657                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8658                         if (list_empty(&good->list))
8659                                 list_add_tail(&good->list,
8660                                               &duplicate_extents);
8661                         good->num_duplicates += tmp->num_duplicates + 1;
8662                         list_splice_init(&tmp->dups, &good->dups);
8663                         list_del_init(&tmp->list);
8664                         list_add_tail(&tmp->list, &good->dups);
8665                         remove_cache_extent(extent_cache, &tmp->cache);
8666                         continue;
8667                 }
8668
8669                 /*
8670                  * Ok we have another non extent item backed extent rec, so lets
8671                  * just add it to this extent and carry on like we did above.
8672                  */
8673                 good->refs += tmp->refs;
8674                 list_splice_init(&tmp->backrefs, &good->backrefs);
8675                 remove_cache_extent(extent_cache, &tmp->cache);
8676                 free(tmp);
8677         }
8678         ret = insert_cache_extent(extent_cache, &good->cache);
8679         BUG_ON(ret);
8680         free(rec);
8681         return good->num_duplicates ? 0 : 1;
8682 }
8683
8684 static int delete_duplicate_records(struct btrfs_root *root,
8685                                     struct extent_record *rec)
8686 {
8687         struct btrfs_trans_handle *trans;
8688         LIST_HEAD(delete_list);
8689         struct btrfs_path path;
8690         struct extent_record *tmp, *good, *n;
8691         int nr_del = 0;
8692         int ret = 0, err;
8693         struct btrfs_key key;
8694
8695         btrfs_init_path(&path);
8696
8697         good = rec;
8698         /* Find the record that covers all of the duplicates. */
8699         list_for_each_entry(tmp, &rec->dups, list) {
8700                 if (good->start < tmp->start)
8701                         continue;
8702                 if (good->nr > tmp->nr)
8703                         continue;
8704
8705                 if (tmp->start + tmp->nr < good->start + good->nr) {
8706                         fprintf(stderr, "Ok we have overlapping extents that "
8707                                 "aren't completely covered by each other, this "
8708                                 "is going to require more careful thought.  "
8709                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8710                                 tmp->start, tmp->nr, good->start, good->nr);
8711                         abort();
8712                 }
8713                 good = tmp;
8714         }
8715
8716         if (good != rec)
8717                 list_add_tail(&rec->list, &delete_list);
8718
8719         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8720                 if (tmp == good)
8721                         continue;
8722                 list_move_tail(&tmp->list, &delete_list);
8723         }
8724
8725         root = root->fs_info->extent_root;
8726         trans = btrfs_start_transaction(root, 1);
8727         if (IS_ERR(trans)) {
8728                 ret = PTR_ERR(trans);
8729                 goto out;
8730         }
8731
8732         list_for_each_entry(tmp, &delete_list, list) {
8733                 if (tmp->found_rec == 0)
8734                         continue;
8735                 key.objectid = tmp->start;
8736                 key.type = BTRFS_EXTENT_ITEM_KEY;
8737                 key.offset = tmp->nr;
8738
8739                 /* Shouldn't happen but just in case */
8740                 if (tmp->metadata) {
8741                         fprintf(stderr, "Well this shouldn't happen, extent "
8742                                 "record overlaps but is metadata? "
8743                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8744                         abort();
8745                 }
8746
8747                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8748                 if (ret) {
8749                         if (ret > 0)
8750                                 ret = -EINVAL;
8751                         break;
8752                 }
8753                 ret = btrfs_del_item(trans, root, &path);
8754                 if (ret)
8755                         break;
8756                 btrfs_release_path(&path);
8757                 nr_del++;
8758         }
8759         err = btrfs_commit_transaction(trans, root);
8760         if (err && !ret)
8761                 ret = err;
8762 out:
8763         while (!list_empty(&delete_list)) {
8764                 tmp = to_extent_record(delete_list.next);
8765                 list_del_init(&tmp->list);
8766                 if (tmp == rec)
8767                         continue;
8768                 free(tmp);
8769         }
8770
8771         while (!list_empty(&rec->dups)) {
8772                 tmp = to_extent_record(rec->dups.next);
8773                 list_del_init(&tmp->list);
8774                 free(tmp);
8775         }
8776
8777         btrfs_release_path(&path);
8778
8779         if (!ret && !nr_del)
8780                 rec->num_duplicates = 0;
8781
8782         return ret ? ret : nr_del;
8783 }
8784
8785 static int find_possible_backrefs(struct btrfs_fs_info *info,
8786                                   struct btrfs_path *path,
8787                                   struct cache_tree *extent_cache,
8788                                   struct extent_record *rec)
8789 {
8790         struct btrfs_root *root;
8791         struct extent_backref *back;
8792         struct data_backref *dback;
8793         struct cache_extent *cache;
8794         struct btrfs_file_extent_item *fi;
8795         struct btrfs_key key;
8796         u64 bytenr, bytes;
8797         int ret;
8798
8799         list_for_each_entry(back, &rec->backrefs, list) {
8800                 /* Don't care about full backrefs (poor unloved backrefs) */
8801                 if (back->full_backref || !back->is_data)
8802                         continue;
8803
8804                 dback = to_data_backref(back);
8805
8806                 /* We found this one, we don't need to do a lookup */
8807                 if (dback->found_ref)
8808                         continue;
8809
8810                 key.objectid = dback->root;
8811                 key.type = BTRFS_ROOT_ITEM_KEY;
8812                 key.offset = (u64)-1;
8813
8814                 root = btrfs_read_fs_root(info, &key);
8815
8816                 /* No root, definitely a bad ref, skip */
8817                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8818                         continue;
8819                 /* Other err, exit */
8820                 if (IS_ERR(root))
8821                         return PTR_ERR(root);
8822
8823                 key.objectid = dback->owner;
8824                 key.type = BTRFS_EXTENT_DATA_KEY;
8825                 key.offset = dback->offset;
8826                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8827                 if (ret) {
8828                         btrfs_release_path(path);
8829                         if (ret < 0)
8830                                 return ret;
8831                         /* Didn't find it, we can carry on */
8832                         ret = 0;
8833                         continue;
8834                 }
8835
8836                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8837                                     struct btrfs_file_extent_item);
8838                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8839                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8840                 btrfs_release_path(path);
8841                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8842                 if (cache) {
8843                         struct extent_record *tmp;
8844                         tmp = container_of(cache, struct extent_record, cache);
8845
8846                         /*
8847                          * If we found an extent record for the bytenr for this
8848                          * particular backref then we can't add it to our
8849                          * current extent record.  We only want to add backrefs
8850                          * that don't have a corresponding extent item in the
8851                          * extent tree since they likely belong to this record
8852                          * and we need to fix it if it doesn't match bytenrs.
8853                          */
8854                         if  (tmp->found_rec)
8855                                 continue;
8856                 }
8857
8858                 dback->found_ref += 1;
8859                 dback->disk_bytenr = bytenr;
8860                 dback->bytes = bytes;
8861
8862                 /*
8863                  * Set this so the verify backref code knows not to trust the
8864                  * values in this backref.
8865                  */
8866                 back->broken = 1;
8867         }
8868
8869         return 0;
8870 }
8871
8872 /*
8873  * Record orphan data ref into corresponding root.
8874  *
8875  * Return 0 if the extent item contains data ref and recorded.
8876  * Return 1 if the extent item contains no useful data ref
8877  *   On that case, it may contains only shared_dataref or metadata backref
8878  *   or the file extent exists(this should be handled by the extent bytenr
8879  *   recovery routine)
8880  * Return <0 if something goes wrong.
8881  */
8882 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8883                                       struct extent_record *rec)
8884 {
8885         struct btrfs_key key;
8886         struct btrfs_root *dest_root;
8887         struct extent_backref *back;
8888         struct data_backref *dback;
8889         struct orphan_data_extent *orphan;
8890         struct btrfs_path path;
8891         int recorded_data_ref = 0;
8892         int ret = 0;
8893
8894         if (rec->metadata)
8895                 return 1;
8896         btrfs_init_path(&path);
8897         list_for_each_entry(back, &rec->backrefs, list) {
8898                 if (back->full_backref || !back->is_data ||
8899                     !back->found_extent_tree)
8900                         continue;
8901                 dback = to_data_backref(back);
8902                 if (dback->found_ref)
8903                         continue;
8904                 key.objectid = dback->root;
8905                 key.type = BTRFS_ROOT_ITEM_KEY;
8906                 key.offset = (u64)-1;
8907
8908                 dest_root = btrfs_read_fs_root(fs_info, &key);
8909
8910                 /* For non-exist root we just skip it */
8911                 if (IS_ERR(dest_root) || !dest_root)
8912                         continue;
8913
8914                 key.objectid = dback->owner;
8915                 key.type = BTRFS_EXTENT_DATA_KEY;
8916                 key.offset = dback->offset;
8917
8918                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8919                 btrfs_release_path(&path);
8920                 /*
8921                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8922                  * we need to record it for inode/file extent rebuild.
8923                  * For ret > 0, we record it only for file extent rebuild.
8924                  * For ret == 0, the file extent exists but only bytenr
8925                  * mismatch, let the original bytenr fix routine to handle,
8926                  * don't record it.
8927                  */
8928                 if (ret == 0)
8929                         continue;
8930                 ret = 0;
8931                 orphan = malloc(sizeof(*orphan));
8932                 if (!orphan) {
8933                         ret = -ENOMEM;
8934                         goto out;
8935                 }
8936                 INIT_LIST_HEAD(&orphan->list);
8937                 orphan->root = dback->root;
8938                 orphan->objectid = dback->owner;
8939                 orphan->offset = dback->offset;
8940                 orphan->disk_bytenr = rec->cache.start;
8941                 orphan->disk_len = rec->cache.size;
8942                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8943                 recorded_data_ref = 1;
8944         }
8945 out:
8946         btrfs_release_path(&path);
8947         if (!ret)
8948                 return !recorded_data_ref;
8949         else
8950                 return ret;
8951 }
8952
8953 /*
8954  * when an incorrect extent item is found, this will delete
8955  * all of the existing entries for it and recreate them
8956  * based on what the tree scan found.
8957  */
8958 static int fixup_extent_refs(struct btrfs_fs_info *info,
8959                              struct cache_tree *extent_cache,
8960                              struct extent_record *rec)
8961 {
8962         struct btrfs_trans_handle *trans = NULL;
8963         int ret;
8964         struct btrfs_path path;
8965         struct list_head *cur = rec->backrefs.next;
8966         struct cache_extent *cache;
8967         struct extent_backref *back;
8968         int allocated = 0;
8969         u64 flags = 0;
8970
8971         if (rec->flag_block_full_backref)
8972                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8973
8974         btrfs_init_path(&path);
8975         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8976                 /*
8977                  * Sometimes the backrefs themselves are so broken they don't
8978                  * get attached to any meaningful rec, so first go back and
8979                  * check any of our backrefs that we couldn't find and throw
8980                  * them into the list if we find the backref so that
8981                  * verify_backrefs can figure out what to do.
8982                  */
8983                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8984                 if (ret < 0)
8985                         goto out;
8986         }
8987
8988         /* step one, make sure all of the backrefs agree */
8989         ret = verify_backrefs(info, &path, rec);
8990         if (ret < 0)
8991                 goto out;
8992
8993         trans = btrfs_start_transaction(info->extent_root, 1);
8994         if (IS_ERR(trans)) {
8995                 ret = PTR_ERR(trans);
8996                 goto out;
8997         }
8998
8999         /* step two, delete all the existing records */
9000         ret = delete_extent_records(trans, info->extent_root, &path,
9001                                     rec->start);
9002
9003         if (ret < 0)
9004                 goto out;
9005
9006         /* was this block corrupt?  If so, don't add references to it */
9007         cache = lookup_cache_extent(info->corrupt_blocks,
9008                                     rec->start, rec->max_size);
9009         if (cache) {
9010                 ret = 0;
9011                 goto out;
9012         }
9013
9014         /* step three, recreate all the refs we did find */
9015         while(cur != &rec->backrefs) {
9016                 back = to_extent_backref(cur);
9017                 cur = cur->next;
9018
9019                 /*
9020                  * if we didn't find any references, don't create a
9021                  * new extent record
9022                  */
9023                 if (!back->found_ref)
9024                         continue;
9025
9026                 rec->bad_full_backref = 0;
9027                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9028                 allocated = 1;
9029
9030                 if (ret)
9031                         goto out;
9032         }
9033 out:
9034         if (trans) {
9035                 int err = btrfs_commit_transaction(trans, info->extent_root);
9036                 if (!ret)
9037                         ret = err;
9038         }
9039
9040         if (!ret)
9041                 fprintf(stderr, "Repaired extent references for %llu\n",
9042                                 (unsigned long long)rec->start);
9043
9044         btrfs_release_path(&path);
9045         return ret;
9046 }
9047
9048 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9049                               struct extent_record *rec)
9050 {
9051         struct btrfs_trans_handle *trans;
9052         struct btrfs_root *root = fs_info->extent_root;
9053         struct btrfs_path path;
9054         struct btrfs_extent_item *ei;
9055         struct btrfs_key key;
9056         u64 flags;
9057         int ret = 0;
9058
9059         key.objectid = rec->start;
9060         if (rec->metadata) {
9061                 key.type = BTRFS_METADATA_ITEM_KEY;
9062                 key.offset = rec->info_level;
9063         } else {
9064                 key.type = BTRFS_EXTENT_ITEM_KEY;
9065                 key.offset = rec->max_size;
9066         }
9067
9068         trans = btrfs_start_transaction(root, 0);
9069         if (IS_ERR(trans))
9070                 return PTR_ERR(trans);
9071
9072         btrfs_init_path(&path);
9073         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9074         if (ret < 0) {
9075                 btrfs_release_path(&path);
9076                 btrfs_commit_transaction(trans, root);
9077                 return ret;
9078         } else if (ret) {
9079                 fprintf(stderr, "Didn't find extent for %llu\n",
9080                         (unsigned long long)rec->start);
9081                 btrfs_release_path(&path);
9082                 btrfs_commit_transaction(trans, root);
9083                 return -ENOENT;
9084         }
9085
9086         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9087                             struct btrfs_extent_item);
9088         flags = btrfs_extent_flags(path.nodes[0], ei);
9089         if (rec->flag_block_full_backref) {
9090                 fprintf(stderr, "setting full backref on %llu\n",
9091                         (unsigned long long)key.objectid);
9092                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9093         } else {
9094                 fprintf(stderr, "clearing full backref on %llu\n",
9095                         (unsigned long long)key.objectid);
9096                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9097         }
9098         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9099         btrfs_mark_buffer_dirty(path.nodes[0]);
9100         btrfs_release_path(&path);
9101         ret = btrfs_commit_transaction(trans, root);
9102         if (!ret)
9103                 fprintf(stderr, "Repaired extent flags for %llu\n",
9104                                 (unsigned long long)rec->start);
9105
9106         return ret;
9107 }
9108
9109 /* right now we only prune from the extent allocation tree */
9110 static int prune_one_block(struct btrfs_trans_handle *trans,
9111                            struct btrfs_fs_info *info,
9112                            struct btrfs_corrupt_block *corrupt)
9113 {
9114         int ret;
9115         struct btrfs_path path;
9116         struct extent_buffer *eb;
9117         u64 found;
9118         int slot;
9119         int nritems;
9120         int level = corrupt->level + 1;
9121
9122         btrfs_init_path(&path);
9123 again:
9124         /* we want to stop at the parent to our busted block */
9125         path.lowest_level = level;
9126
9127         ret = btrfs_search_slot(trans, info->extent_root,
9128                                 &corrupt->key, &path, -1, 1);
9129
9130         if (ret < 0)
9131                 goto out;
9132
9133         eb = path.nodes[level];
9134         if (!eb) {
9135                 ret = -ENOENT;
9136                 goto out;
9137         }
9138
9139         /*
9140          * hopefully the search gave us the block we want to prune,
9141          * lets try that first
9142          */
9143         slot = path.slots[level];
9144         found =  btrfs_node_blockptr(eb, slot);
9145         if (found == corrupt->cache.start)
9146                 goto del_ptr;
9147
9148         nritems = btrfs_header_nritems(eb);
9149
9150         /* the search failed, lets scan this node and hope we find it */
9151         for (slot = 0; slot < nritems; slot++) {
9152                 found =  btrfs_node_blockptr(eb, slot);
9153                 if (found == corrupt->cache.start)
9154                         goto del_ptr;
9155         }
9156         /*
9157          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9158          * to this block
9159          */
9160         if (eb == info->extent_root->node) {
9161                 ret = -ENOENT;
9162                 goto out;
9163         } else {
9164                 level++;
9165                 btrfs_release_path(&path);
9166                 goto again;
9167         }
9168
9169 del_ptr:
9170         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9171         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9172
9173 out:
9174         btrfs_release_path(&path);
9175         return ret;
9176 }
9177
9178 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9179 {
9180         struct btrfs_trans_handle *trans = NULL;
9181         struct cache_extent *cache;
9182         struct btrfs_corrupt_block *corrupt;
9183
9184         while (1) {
9185                 cache = search_cache_extent(info->corrupt_blocks, 0);
9186                 if (!cache)
9187                         break;
9188                 if (!trans) {
9189                         trans = btrfs_start_transaction(info->extent_root, 1);
9190                         if (IS_ERR(trans))
9191                                 return PTR_ERR(trans);
9192                 }
9193                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9194                 prune_one_block(trans, info, corrupt);
9195                 remove_cache_extent(info->corrupt_blocks, cache);
9196         }
9197         if (trans)
9198                 return btrfs_commit_transaction(trans, info->extent_root);
9199         return 0;
9200 }
9201
9202 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9203 {
9204         struct btrfs_block_group_cache *cache;
9205         u64 start, end;
9206         int ret;
9207
9208         while (1) {
9209                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9210                                             &start, &end, EXTENT_DIRTY);
9211                 if (ret)
9212                         break;
9213                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9214         }
9215
9216         start = 0;
9217         while (1) {
9218                 cache = btrfs_lookup_first_block_group(fs_info, start);
9219                 if (!cache)
9220                         break;
9221                 if (cache->cached)
9222                         cache->cached = 0;
9223                 start = cache->key.objectid + cache->key.offset;
9224         }
9225 }
9226
9227 static int check_extent_refs(struct btrfs_root *root,
9228                              struct cache_tree *extent_cache)
9229 {
9230         struct extent_record *rec;
9231         struct cache_extent *cache;
9232         int ret = 0;
9233         int had_dups = 0;
9234
9235         if (repair) {
9236                 /*
9237                  * if we're doing a repair, we have to make sure
9238                  * we don't allocate from the problem extents.
9239                  * In the worst case, this will be all the
9240                  * extents in the FS
9241                  */
9242                 cache = search_cache_extent(extent_cache, 0);
9243                 while(cache) {
9244                         rec = container_of(cache, struct extent_record, cache);
9245                         set_extent_dirty(root->fs_info->excluded_extents,
9246                                          rec->start,
9247                                          rec->start + rec->max_size - 1);
9248                         cache = next_cache_extent(cache);
9249                 }
9250
9251                 /* pin down all the corrupted blocks too */
9252                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9253                 while(cache) {
9254                         set_extent_dirty(root->fs_info->excluded_extents,
9255                                          cache->start,
9256                                          cache->start + cache->size - 1);
9257                         cache = next_cache_extent(cache);
9258                 }
9259                 prune_corrupt_blocks(root->fs_info);
9260                 reset_cached_block_groups(root->fs_info);
9261         }
9262
9263         reset_cached_block_groups(root->fs_info);
9264
9265         /*
9266          * We need to delete any duplicate entries we find first otherwise we
9267          * could mess up the extent tree when we have backrefs that actually
9268          * belong to a different extent item and not the weird duplicate one.
9269          */
9270         while (repair && !list_empty(&duplicate_extents)) {
9271                 rec = to_extent_record(duplicate_extents.next);
9272                 list_del_init(&rec->list);
9273
9274                 /* Sometimes we can find a backref before we find an actual
9275                  * extent, so we need to process it a little bit to see if there
9276                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9277                  * if this is a backref screwup.  If we need to delete stuff
9278                  * process_duplicates() will return 0, otherwise it will return
9279                  * 1 and we
9280                  */
9281                 if (process_duplicates(extent_cache, rec))
9282                         continue;
9283                 ret = delete_duplicate_records(root, rec);
9284                 if (ret < 0)
9285                         return ret;
9286                 /*
9287                  * delete_duplicate_records will return the number of entries
9288                  * deleted, so if it's greater than 0 then we know we actually
9289                  * did something and we need to remove.
9290                  */
9291                 if (ret)
9292                         had_dups = 1;
9293         }
9294
9295         if (had_dups)
9296                 return -EAGAIN;
9297
9298         while(1) {
9299                 int cur_err = 0;
9300                 int fix = 0;
9301
9302                 cache = search_cache_extent(extent_cache, 0);
9303                 if (!cache)
9304                         break;
9305                 rec = container_of(cache, struct extent_record, cache);
9306                 if (rec->num_duplicates) {
9307                         fprintf(stderr, "extent item %llu has multiple extent "
9308                                 "items\n", (unsigned long long)rec->start);
9309                         cur_err = 1;
9310                 }
9311
9312                 if (rec->refs != rec->extent_item_refs) {
9313                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9314                                 (unsigned long long)rec->start,
9315                                 (unsigned long long)rec->nr);
9316                         fprintf(stderr, "extent item %llu, found %llu\n",
9317                                 (unsigned long long)rec->extent_item_refs,
9318                                 (unsigned long long)rec->refs);
9319                         ret = record_orphan_data_extents(root->fs_info, rec);
9320                         if (ret < 0)
9321                                 goto repair_abort;
9322                         fix = ret;
9323                         cur_err = 1;
9324                 }
9325                 if (all_backpointers_checked(rec, 1)) {
9326                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9327                                 (unsigned long long)rec->start,
9328                                 (unsigned long long)rec->nr);
9329                         fix = 1;
9330                         cur_err = 1;
9331                 }
9332                 if (!rec->owner_ref_checked) {
9333                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9334                                 (unsigned long long)rec->start,
9335                                 (unsigned long long)rec->nr);
9336                         fix = 1;
9337                         cur_err = 1;
9338                 }
9339
9340                 if (repair && fix) {
9341                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9342                         if (ret)
9343                                 goto repair_abort;
9344                 }
9345
9346
9347                 if (rec->bad_full_backref) {
9348                         fprintf(stderr, "bad full backref, on [%llu]\n",
9349                                 (unsigned long long)rec->start);
9350                         if (repair) {
9351                                 ret = fixup_extent_flags(root->fs_info, rec);
9352                                 if (ret)
9353                                         goto repair_abort;
9354                                 fix = 1;
9355                         }
9356                         cur_err = 1;
9357                 }
9358                 /*
9359                  * Although it's not a extent ref's problem, we reuse this
9360                  * routine for error reporting.
9361                  * No repair function yet.
9362                  */
9363                 if (rec->crossing_stripes) {
9364                         fprintf(stderr,
9365                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9366                                 rec->start, rec->start + rec->max_size);
9367                         cur_err = 1;
9368                 }
9369
9370                 if (rec->wrong_chunk_type) {
9371                         fprintf(stderr,
9372                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9373                                 rec->start, rec->start + rec->max_size);
9374                         cur_err = 1;
9375                 }
9376
9377                 remove_cache_extent(extent_cache, cache);
9378                 free_all_extent_backrefs(rec);
9379                 if (!init_extent_tree && repair && (!cur_err || fix))
9380                         clear_extent_dirty(root->fs_info->excluded_extents,
9381                                            rec->start,
9382                                            rec->start + rec->max_size - 1);
9383                 free(rec);
9384         }
9385 repair_abort:
9386         if (repair) {
9387                 if (ret && ret != -EAGAIN) {
9388                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9389                         exit(1);
9390                 } else if (!ret) {
9391                         struct btrfs_trans_handle *trans;
9392
9393                         root = root->fs_info->extent_root;
9394                         trans = btrfs_start_transaction(root, 1);
9395                         if (IS_ERR(trans)) {
9396                                 ret = PTR_ERR(trans);
9397                                 goto repair_abort;
9398                         }
9399
9400                         btrfs_fix_block_accounting(trans, root);
9401                         ret = btrfs_commit_transaction(trans, root);
9402                         if (ret)
9403                                 goto repair_abort;
9404                 }
9405                 return ret;
9406         }
9407         return 0;
9408 }
9409
9410 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9411 {
9412         u64 stripe_size;
9413
9414         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9415                 stripe_size = length;
9416                 stripe_size /= num_stripes;
9417         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9418                 stripe_size = length * 2;
9419                 stripe_size /= num_stripes;
9420         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9421                 stripe_size = length;
9422                 stripe_size /= (num_stripes - 1);
9423         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9424                 stripe_size = length;
9425                 stripe_size /= (num_stripes - 2);
9426         } else {
9427                 stripe_size = length;
9428         }
9429         return stripe_size;
9430 }
9431
9432 /*
9433  * Check the chunk with its block group/dev list ref:
9434  * Return 0 if all refs seems valid.
9435  * Return 1 if part of refs seems valid, need later check for rebuild ref
9436  * like missing block group and needs to search extent tree to rebuild them.
9437  * Return -1 if essential refs are missing and unable to rebuild.
9438  */
9439 static int check_chunk_refs(struct chunk_record *chunk_rec,
9440                             struct block_group_tree *block_group_cache,
9441                             struct device_extent_tree *dev_extent_cache,
9442                             int silent)
9443 {
9444         struct cache_extent *block_group_item;
9445         struct block_group_record *block_group_rec;
9446         struct cache_extent *dev_extent_item;
9447         struct device_extent_record *dev_extent_rec;
9448         u64 devid;
9449         u64 offset;
9450         u64 length;
9451         int metadump_v2 = 0;
9452         int i;
9453         int ret = 0;
9454
9455         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9456                                                chunk_rec->offset,
9457                                                chunk_rec->length);
9458         if (block_group_item) {
9459                 block_group_rec = container_of(block_group_item,
9460                                                struct block_group_record,
9461                                                cache);
9462                 if (chunk_rec->length != block_group_rec->offset ||
9463                     chunk_rec->offset != block_group_rec->objectid ||
9464                     (!metadump_v2 &&
9465                      chunk_rec->type_flags != block_group_rec->flags)) {
9466                         if (!silent)
9467                                 fprintf(stderr,
9468                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9469                                         chunk_rec->objectid,
9470                                         chunk_rec->type,
9471                                         chunk_rec->offset,
9472                                         chunk_rec->length,
9473                                         chunk_rec->offset,
9474                                         chunk_rec->type_flags,
9475                                         block_group_rec->objectid,
9476                                         block_group_rec->type,
9477                                         block_group_rec->offset,
9478                                         block_group_rec->offset,
9479                                         block_group_rec->objectid,
9480                                         block_group_rec->flags);
9481                         ret = -1;
9482                 } else {
9483                         list_del_init(&block_group_rec->list);
9484                         chunk_rec->bg_rec = block_group_rec;
9485                 }
9486         } else {
9487                 if (!silent)
9488                         fprintf(stderr,
9489                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9490                                 chunk_rec->objectid,
9491                                 chunk_rec->type,
9492                                 chunk_rec->offset,
9493                                 chunk_rec->length,
9494                                 chunk_rec->offset,
9495                                 chunk_rec->type_flags);
9496                 ret = 1;
9497         }
9498
9499         if (metadump_v2)
9500                 return ret;
9501
9502         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9503                                     chunk_rec->num_stripes);
9504         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9505                 devid = chunk_rec->stripes[i].devid;
9506                 offset = chunk_rec->stripes[i].offset;
9507                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9508                                                        devid, offset, length);
9509                 if (dev_extent_item) {
9510                         dev_extent_rec = container_of(dev_extent_item,
9511                                                 struct device_extent_record,
9512                                                 cache);
9513                         if (dev_extent_rec->objectid != devid ||
9514                             dev_extent_rec->offset != offset ||
9515                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9516                             dev_extent_rec->length != length) {
9517                                 if (!silent)
9518                                         fprintf(stderr,
9519                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9520                                                 chunk_rec->objectid,
9521                                                 chunk_rec->type,
9522                                                 chunk_rec->offset,
9523                                                 chunk_rec->stripes[i].devid,
9524                                                 chunk_rec->stripes[i].offset,
9525                                                 dev_extent_rec->objectid,
9526                                                 dev_extent_rec->offset,
9527                                                 dev_extent_rec->length);
9528                                 ret = -1;
9529                         } else {
9530                                 list_move(&dev_extent_rec->chunk_list,
9531                                           &chunk_rec->dextents);
9532                         }
9533                 } else {
9534                         if (!silent)
9535                                 fprintf(stderr,
9536                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9537                                         chunk_rec->objectid,
9538                                         chunk_rec->type,
9539                                         chunk_rec->offset,
9540                                         chunk_rec->stripes[i].devid,
9541                                         chunk_rec->stripes[i].offset);
9542                         ret = -1;
9543                 }
9544         }
9545         return ret;
9546 }
9547
9548 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9549 int check_chunks(struct cache_tree *chunk_cache,
9550                  struct block_group_tree *block_group_cache,
9551                  struct device_extent_tree *dev_extent_cache,
9552                  struct list_head *good, struct list_head *bad,
9553                  struct list_head *rebuild, int silent)
9554 {
9555         struct cache_extent *chunk_item;
9556         struct chunk_record *chunk_rec;
9557         struct block_group_record *bg_rec;
9558         struct device_extent_record *dext_rec;
9559         int err;
9560         int ret = 0;
9561
9562         chunk_item = first_cache_extent(chunk_cache);
9563         while (chunk_item) {
9564                 chunk_rec = container_of(chunk_item, struct chunk_record,
9565                                          cache);
9566                 err = check_chunk_refs(chunk_rec, block_group_cache,
9567                                        dev_extent_cache, silent);
9568                 if (err < 0)
9569                         ret = err;
9570                 if (err == 0 && good)
9571                         list_add_tail(&chunk_rec->list, good);
9572                 if (err > 0 && rebuild)
9573                         list_add_tail(&chunk_rec->list, rebuild);
9574                 if (err < 0 && bad)
9575                         list_add_tail(&chunk_rec->list, bad);
9576                 chunk_item = next_cache_extent(chunk_item);
9577         }
9578
9579         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9580                 if (!silent)
9581                         fprintf(stderr,
9582                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9583                                 bg_rec->objectid,
9584                                 bg_rec->offset,
9585                                 bg_rec->flags);
9586                 if (!ret)
9587                         ret = 1;
9588         }
9589
9590         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9591                             chunk_list) {
9592                 if (!silent)
9593                         fprintf(stderr,
9594                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9595                                 dext_rec->objectid,
9596                                 dext_rec->offset,
9597                                 dext_rec->length);
9598                 if (!ret)
9599                         ret = 1;
9600         }
9601         return ret;
9602 }
9603
9604
9605 static int check_device_used(struct device_record *dev_rec,
9606                              struct device_extent_tree *dext_cache)
9607 {
9608         struct cache_extent *cache;
9609         struct device_extent_record *dev_extent_rec;
9610         u64 total_byte = 0;
9611
9612         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9613         while (cache) {
9614                 dev_extent_rec = container_of(cache,
9615                                               struct device_extent_record,
9616                                               cache);
9617                 if (dev_extent_rec->objectid != dev_rec->devid)
9618                         break;
9619
9620                 list_del_init(&dev_extent_rec->device_list);
9621                 total_byte += dev_extent_rec->length;
9622                 cache = next_cache_extent(cache);
9623         }
9624
9625         if (total_byte != dev_rec->byte_used) {
9626                 fprintf(stderr,
9627                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9628                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9629                         dev_rec->type, dev_rec->offset);
9630                 return -1;
9631         } else {
9632                 return 0;
9633         }
9634 }
9635
9636 /* check btrfs_dev_item -> btrfs_dev_extent */
9637 static int check_devices(struct rb_root *dev_cache,
9638                          struct device_extent_tree *dev_extent_cache)
9639 {
9640         struct rb_node *dev_node;
9641         struct device_record *dev_rec;
9642         struct device_extent_record *dext_rec;
9643         int err;
9644         int ret = 0;
9645
9646         dev_node = rb_first(dev_cache);
9647         while (dev_node) {
9648                 dev_rec = container_of(dev_node, struct device_record, node);
9649                 err = check_device_used(dev_rec, dev_extent_cache);
9650                 if (err)
9651                         ret = err;
9652
9653                 dev_node = rb_next(dev_node);
9654         }
9655         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9656                             device_list) {
9657                 fprintf(stderr,
9658                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9659                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9660                 if (!ret)
9661                         ret = 1;
9662         }
9663         return ret;
9664 }
9665
9666 static int add_root_item_to_list(struct list_head *head,
9667                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9668                                   u8 level, u8 drop_level,
9669                                   int level_size, struct btrfs_key *drop_key)
9670 {
9671
9672         struct root_item_record *ri_rec;
9673         ri_rec = malloc(sizeof(*ri_rec));
9674         if (!ri_rec)
9675                 return -ENOMEM;
9676         ri_rec->bytenr = bytenr;
9677         ri_rec->objectid = objectid;
9678         ri_rec->level = level;
9679         ri_rec->level_size = level_size;
9680         ri_rec->drop_level = drop_level;
9681         ri_rec->last_snapshot = last_snapshot;
9682         if (drop_key)
9683                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9684         list_add_tail(&ri_rec->list, head);
9685
9686         return 0;
9687 }
9688
9689 static void free_root_item_list(struct list_head *list)
9690 {
9691         struct root_item_record *ri_rec;
9692
9693         while (!list_empty(list)) {
9694                 ri_rec = list_first_entry(list, struct root_item_record,
9695                                           list);
9696                 list_del_init(&ri_rec->list);
9697                 free(ri_rec);
9698         }
9699 }
9700
9701 static int deal_root_from_list(struct list_head *list,
9702                                struct btrfs_root *root,
9703                                struct block_info *bits,
9704                                int bits_nr,
9705                                struct cache_tree *pending,
9706                                struct cache_tree *seen,
9707                                struct cache_tree *reada,
9708                                struct cache_tree *nodes,
9709                                struct cache_tree *extent_cache,
9710                                struct cache_tree *chunk_cache,
9711                                struct rb_root *dev_cache,
9712                                struct block_group_tree *block_group_cache,
9713                                struct device_extent_tree *dev_extent_cache)
9714 {
9715         int ret = 0;
9716         u64 last;
9717
9718         while (!list_empty(list)) {
9719                 struct root_item_record *rec;
9720                 struct extent_buffer *buf;
9721                 rec = list_entry(list->next,
9722                                  struct root_item_record, list);
9723                 last = 0;
9724                 buf = read_tree_block(root->fs_info->tree_root,
9725                                       rec->bytenr, rec->level_size, 0);
9726                 if (!extent_buffer_uptodate(buf)) {
9727                         free_extent_buffer(buf);
9728                         ret = -EIO;
9729                         break;
9730                 }
9731                 ret = add_root_to_pending(buf, extent_cache, pending,
9732                                     seen, nodes, rec->objectid);
9733                 if (ret < 0)
9734                         break;
9735                 /*
9736                  * To rebuild extent tree, we need deal with snapshot
9737                  * one by one, otherwise we deal with node firstly which
9738                  * can maximize readahead.
9739                  */
9740                 while (1) {
9741                         ret = run_next_block(root, bits, bits_nr, &last,
9742                                              pending, seen, reada, nodes,
9743                                              extent_cache, chunk_cache,
9744                                              dev_cache, block_group_cache,
9745                                              dev_extent_cache, rec);
9746                         if (ret != 0)
9747                                 break;
9748                 }
9749                 free_extent_buffer(buf);
9750                 list_del(&rec->list);
9751                 free(rec);
9752                 if (ret < 0)
9753                         break;
9754         }
9755         while (ret >= 0) {
9756                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9757                                      reada, nodes, extent_cache, chunk_cache,
9758                                      dev_cache, block_group_cache,
9759                                      dev_extent_cache, NULL);
9760                 if (ret != 0) {
9761                         if (ret > 0)
9762                                 ret = 0;
9763                         break;
9764                 }
9765         }
9766         return ret;
9767 }
9768
9769 static int check_chunks_and_extents(struct btrfs_root *root)
9770 {
9771         struct rb_root dev_cache;
9772         struct cache_tree chunk_cache;
9773         struct block_group_tree block_group_cache;
9774         struct device_extent_tree dev_extent_cache;
9775         struct cache_tree extent_cache;
9776         struct cache_tree seen;
9777         struct cache_tree pending;
9778         struct cache_tree reada;
9779         struct cache_tree nodes;
9780         struct extent_io_tree excluded_extents;
9781         struct cache_tree corrupt_blocks;
9782         struct btrfs_path path;
9783         struct btrfs_key key;
9784         struct btrfs_key found_key;
9785         int ret, err = 0;
9786         struct block_info *bits;
9787         int bits_nr;
9788         struct extent_buffer *leaf;
9789         int slot;
9790         struct btrfs_root_item ri;
9791         struct list_head dropping_trees;
9792         struct list_head normal_trees;
9793         struct btrfs_root *root1;
9794         u64 objectid;
9795         u32 level_size;
9796         u8 level;
9797
9798         dev_cache = RB_ROOT;
9799         cache_tree_init(&chunk_cache);
9800         block_group_tree_init(&block_group_cache);
9801         device_extent_tree_init(&dev_extent_cache);
9802
9803         cache_tree_init(&extent_cache);
9804         cache_tree_init(&seen);
9805         cache_tree_init(&pending);
9806         cache_tree_init(&nodes);
9807         cache_tree_init(&reada);
9808         cache_tree_init(&corrupt_blocks);
9809         extent_io_tree_init(&excluded_extents);
9810         INIT_LIST_HEAD(&dropping_trees);
9811         INIT_LIST_HEAD(&normal_trees);
9812
9813         if (repair) {
9814                 root->fs_info->excluded_extents = &excluded_extents;
9815                 root->fs_info->fsck_extent_cache = &extent_cache;
9816                 root->fs_info->free_extent_hook = free_extent_hook;
9817                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9818         }
9819
9820         bits_nr = 1024;
9821         bits = malloc(bits_nr * sizeof(struct block_info));
9822         if (!bits) {
9823                 perror("malloc");
9824                 exit(1);
9825         }
9826
9827         if (ctx.progress_enabled) {
9828                 ctx.tp = TASK_EXTENTS;
9829                 task_start(ctx.info);
9830         }
9831
9832 again:
9833         root1 = root->fs_info->tree_root;
9834         level = btrfs_header_level(root1->node);
9835         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9836                                     root1->node->start, 0, level, 0,
9837                                     root1->nodesize, NULL);
9838         if (ret < 0)
9839                 goto out;
9840         root1 = root->fs_info->chunk_root;
9841         level = btrfs_header_level(root1->node);
9842         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9843                                     root1->node->start, 0, level, 0,
9844                                     root1->nodesize, NULL);
9845         if (ret < 0)
9846                 goto out;
9847         btrfs_init_path(&path);
9848         key.offset = 0;
9849         key.objectid = 0;
9850         key.type = BTRFS_ROOT_ITEM_KEY;
9851         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9852                                         &key, &path, 0, 0);
9853         if (ret < 0)
9854                 goto out;
9855         while(1) {
9856                 leaf = path.nodes[0];
9857                 slot = path.slots[0];
9858                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9859                         ret = btrfs_next_leaf(root, &path);
9860                         if (ret != 0)
9861                                 break;
9862                         leaf = path.nodes[0];
9863                         slot = path.slots[0];
9864                 }
9865                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9866                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9867                         unsigned long offset;
9868                         u64 last_snapshot;
9869
9870                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9871                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9872                         last_snapshot = btrfs_root_last_snapshot(&ri);
9873                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9874                                 level = btrfs_root_level(&ri);
9875                                 level_size = root->nodesize;
9876                                 ret = add_root_item_to_list(&normal_trees,
9877                                                 found_key.objectid,
9878                                                 btrfs_root_bytenr(&ri),
9879                                                 last_snapshot, level,
9880                                                 0, level_size, NULL);
9881                                 if (ret < 0)
9882                                         goto out;
9883                         } else {
9884                                 level = btrfs_root_level(&ri);
9885                                 level_size = root->nodesize;
9886                                 objectid = found_key.objectid;
9887                                 btrfs_disk_key_to_cpu(&found_key,
9888                                                       &ri.drop_progress);
9889                                 ret = add_root_item_to_list(&dropping_trees,
9890                                                 objectid,
9891                                                 btrfs_root_bytenr(&ri),
9892                                                 last_snapshot, level,
9893                                                 ri.drop_level,
9894                                                 level_size, &found_key);
9895                                 if (ret < 0)
9896                                         goto out;
9897                         }
9898                 }
9899                 path.slots[0]++;
9900         }
9901         btrfs_release_path(&path);
9902
9903         /*
9904          * check_block can return -EAGAIN if it fixes something, please keep
9905          * this in mind when dealing with return values from these functions, if
9906          * we get -EAGAIN we want to fall through and restart the loop.
9907          */
9908         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9909                                   &seen, &reada, &nodes, &extent_cache,
9910                                   &chunk_cache, &dev_cache, &block_group_cache,
9911                                   &dev_extent_cache);
9912         if (ret < 0) {
9913                 if (ret == -EAGAIN)
9914                         goto loop;
9915                 goto out;
9916         }
9917         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9918                                   &pending, &seen, &reada, &nodes,
9919                                   &extent_cache, &chunk_cache, &dev_cache,
9920                                   &block_group_cache, &dev_extent_cache);
9921         if (ret < 0) {
9922                 if (ret == -EAGAIN)
9923                         goto loop;
9924                 goto out;
9925         }
9926
9927         ret = check_chunks(&chunk_cache, &block_group_cache,
9928                            &dev_extent_cache, NULL, NULL, NULL, 0);
9929         if (ret) {
9930                 if (ret == -EAGAIN)
9931                         goto loop;
9932                 err = ret;
9933         }
9934
9935         ret = check_extent_refs(root, &extent_cache);
9936         if (ret < 0) {
9937                 if (ret == -EAGAIN)
9938                         goto loop;
9939                 goto out;
9940         }
9941
9942         ret = check_devices(&dev_cache, &dev_extent_cache);
9943         if (ret && err)
9944                 ret = err;
9945
9946 out:
9947         task_stop(ctx.info);
9948         if (repair) {
9949                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9950                 extent_io_tree_cleanup(&excluded_extents);
9951                 root->fs_info->fsck_extent_cache = NULL;
9952                 root->fs_info->free_extent_hook = NULL;
9953                 root->fs_info->corrupt_blocks = NULL;
9954                 root->fs_info->excluded_extents = NULL;
9955         }
9956         free(bits);
9957         free_chunk_cache_tree(&chunk_cache);
9958         free_device_cache_tree(&dev_cache);
9959         free_block_group_tree(&block_group_cache);
9960         free_device_extent_tree(&dev_extent_cache);
9961         free_extent_cache_tree(&seen);
9962         free_extent_cache_tree(&pending);
9963         free_extent_cache_tree(&reada);
9964         free_extent_cache_tree(&nodes);
9965         return ret;
9966 loop:
9967         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9968         free_extent_cache_tree(&seen);
9969         free_extent_cache_tree(&pending);
9970         free_extent_cache_tree(&reada);
9971         free_extent_cache_tree(&nodes);
9972         free_chunk_cache_tree(&chunk_cache);
9973         free_block_group_tree(&block_group_cache);
9974         free_device_cache_tree(&dev_cache);
9975         free_device_extent_tree(&dev_extent_cache);
9976         free_extent_record_cache(&extent_cache);
9977         free_root_item_list(&normal_trees);
9978         free_root_item_list(&dropping_trees);
9979         extent_io_tree_cleanup(&excluded_extents);
9980         goto again;
9981 }
9982
9983 /*
9984  * Check backrefs of a tree block given by @bytenr or @eb.
9985  *
9986  * @root:       the root containing the @bytenr or @eb
9987  * @eb:         tree block extent buffer, can be NULL
9988  * @bytenr:     bytenr of the tree block to search
9989  * @level:      tree level of the tree block
9990  * @owner:      owner of the tree block
9991  *
9992  * Return >0 for any error found and output error message
9993  * Return 0 for no error found
9994  */
9995 static int check_tree_block_ref(struct btrfs_root *root,
9996                                 struct extent_buffer *eb, u64 bytenr,
9997                                 int level, u64 owner)
9998 {
9999         struct btrfs_key key;
10000         struct btrfs_root *extent_root = root->fs_info->extent_root;
10001         struct btrfs_path path;
10002         struct btrfs_extent_item *ei;
10003         struct btrfs_extent_inline_ref *iref;
10004         struct extent_buffer *leaf;
10005         unsigned long end;
10006         unsigned long ptr;
10007         int slot;
10008         int skinny_level;
10009         int type;
10010         u32 nodesize = root->nodesize;
10011         u32 item_size;
10012         u64 offset;
10013         int tree_reloc_root = 0;
10014         int found_ref = 0;
10015         int err = 0;
10016         int ret;
10017
10018         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10019             btrfs_header_bytenr(root->node) == bytenr)
10020                 tree_reloc_root = 1;
10021
10022         btrfs_init_path(&path);
10023         key.objectid = bytenr;
10024         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10025                 key.type = BTRFS_METADATA_ITEM_KEY;
10026         else
10027                 key.type = BTRFS_EXTENT_ITEM_KEY;
10028         key.offset = (u64)-1;
10029
10030         /* Search for the backref in extent tree */
10031         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10032         if (ret < 0) {
10033                 err |= BACKREF_MISSING;
10034                 goto out;
10035         }
10036         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10037         if (ret) {
10038                 err |= BACKREF_MISSING;
10039                 goto out;
10040         }
10041
10042         leaf = path.nodes[0];
10043         slot = path.slots[0];
10044         btrfs_item_key_to_cpu(leaf, &key, slot);
10045
10046         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10047
10048         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10049                 skinny_level = (int)key.offset;
10050                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10051         } else {
10052                 struct btrfs_tree_block_info *info;
10053
10054                 info = (struct btrfs_tree_block_info *)(ei + 1);
10055                 skinny_level = btrfs_tree_block_level(leaf, info);
10056                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10057         }
10058
10059         if (eb) {
10060                 u64 header_gen;
10061                 u64 extent_gen;
10062
10063                 if (!(btrfs_extent_flags(leaf, ei) &
10064                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10065                         error(
10066                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10067                                 key.objectid, nodesize,
10068                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10069                         err = BACKREF_MISMATCH;
10070                 }
10071                 header_gen = btrfs_header_generation(eb);
10072                 extent_gen = btrfs_extent_generation(leaf, ei);
10073                 if (header_gen != extent_gen) {
10074                         error(
10075         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10076                                 key.objectid, nodesize, header_gen,
10077                                 extent_gen);
10078                         err = BACKREF_MISMATCH;
10079                 }
10080                 if (level != skinny_level) {
10081                         error(
10082                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10083                                 key.objectid, nodesize, level, skinny_level);
10084                         err = BACKREF_MISMATCH;
10085                 }
10086                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10087                         error(
10088                         "extent[%llu %u] is referred by other roots than %llu",
10089                                 key.objectid, nodesize, root->objectid);
10090                         err = BACKREF_MISMATCH;
10091                 }
10092         }
10093
10094         /*
10095          * Iterate the extent/metadata item to find the exact backref
10096          */
10097         item_size = btrfs_item_size_nr(leaf, slot);
10098         ptr = (unsigned long)iref;
10099         end = (unsigned long)ei + item_size;
10100         while (ptr < end) {
10101                 iref = (struct btrfs_extent_inline_ref *)ptr;
10102                 type = btrfs_extent_inline_ref_type(leaf, iref);
10103                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10104
10105                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10106                         (offset == root->objectid || offset == owner)) {
10107                         found_ref = 1;
10108                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10109                         /*
10110                          * Backref of tree reloc root points to itself, no need
10111                          * to check backref any more.
10112                          */
10113                         if (tree_reloc_root)
10114                                 found_ref = 1;
10115                         else
10116                         /* Check if the backref points to valid referencer */
10117                                 found_ref = !check_tree_block_ref(root, NULL,
10118                                                 offset, level + 1, owner);
10119                 }
10120
10121                 if (found_ref)
10122                         break;
10123                 ptr += btrfs_extent_inline_ref_size(type);
10124         }
10125
10126         /*
10127          * Inlined extent item doesn't have what we need, check
10128          * TREE_BLOCK_REF_KEY
10129          */
10130         if (!found_ref) {
10131                 btrfs_release_path(&path);
10132                 key.objectid = bytenr;
10133                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10134                 key.offset = root->objectid;
10135
10136                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10137                 if (!ret)
10138                         found_ref = 1;
10139         }
10140         if (!found_ref)
10141                 err |= BACKREF_MISSING;
10142 out:
10143         btrfs_release_path(&path);
10144         if (eb && (err & BACKREF_MISSING))
10145                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10146                         bytenr, nodesize, owner, level);
10147         return err;
10148 }
10149
10150 /*
10151  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10152  *
10153  * Return >0 any error found and output error message
10154  * Return 0 for no error found
10155  */
10156 static int check_extent_data_item(struct btrfs_root *root,
10157                                   struct extent_buffer *eb, int slot)
10158 {
10159         struct btrfs_file_extent_item *fi;
10160         struct btrfs_path path;
10161         struct btrfs_root *extent_root = root->fs_info->extent_root;
10162         struct btrfs_key fi_key;
10163         struct btrfs_key dbref_key;
10164         struct extent_buffer *leaf;
10165         struct btrfs_extent_item *ei;
10166         struct btrfs_extent_inline_ref *iref;
10167         struct btrfs_extent_data_ref *dref;
10168         u64 owner;
10169         u64 disk_bytenr;
10170         u64 disk_num_bytes;
10171         u64 extent_num_bytes;
10172         u64 extent_flags;
10173         u32 item_size;
10174         unsigned long end;
10175         unsigned long ptr;
10176         int type;
10177         u64 ref_root;
10178         int found_dbackref = 0;
10179         int err = 0;
10180         int ret;
10181
10182         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10183         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10184
10185         /* Nothing to check for hole and inline data extents */
10186         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10187             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10188                 return 0;
10189
10190         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10191         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10192         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10193
10194         /* Check unaligned disk_num_bytes and num_bytes */
10195         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10196                 error(
10197 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10198                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10199                         root->sectorsize);
10200                 err |= BYTES_UNALIGNED;
10201         } else {
10202                 data_bytes_allocated += disk_num_bytes;
10203         }
10204         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10205                 error(
10206 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10207                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10208                         root->sectorsize);
10209                 err |= BYTES_UNALIGNED;
10210         } else {
10211                 data_bytes_referenced += extent_num_bytes;
10212         }
10213         owner = btrfs_header_owner(eb);
10214
10215         /* Check the extent item of the file extent in extent tree */
10216         btrfs_init_path(&path);
10217         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10218         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10219         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10220
10221         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10222         if (ret) {
10223                 err |= BACKREF_MISSING;
10224                 goto error;
10225         }
10226
10227         leaf = path.nodes[0];
10228         slot = path.slots[0];
10229         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10230
10231         extent_flags = btrfs_extent_flags(leaf, ei);
10232
10233         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10234                 error(
10235                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10236                     disk_bytenr, disk_num_bytes,
10237                     BTRFS_EXTENT_FLAG_DATA);
10238                 err |= BACKREF_MISMATCH;
10239         }
10240
10241         /* Check data backref inside that extent item */
10242         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10243         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10244         ptr = (unsigned long)iref;
10245         end = (unsigned long)ei + item_size;
10246         while (ptr < end) {
10247                 iref = (struct btrfs_extent_inline_ref *)ptr;
10248                 type = btrfs_extent_inline_ref_type(leaf, iref);
10249                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10250
10251                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10252                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10253                         if (ref_root == owner || ref_root == root->objectid)
10254                                 found_dbackref = 1;
10255                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10256                         found_dbackref = !check_tree_block_ref(root, NULL,
10257                                 btrfs_extent_inline_ref_offset(leaf, iref),
10258                                 0, owner);
10259                 }
10260
10261                 if (found_dbackref)
10262                         break;
10263                 ptr += btrfs_extent_inline_ref_size(type);
10264         }
10265
10266         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10267         if (!found_dbackref) {
10268                 btrfs_release_path(&path);
10269
10270                 btrfs_init_path(&path);
10271                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10272                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10273                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10274                                 fi_key.objectid, fi_key.offset);
10275
10276                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10277                                         &dbref_key, &path, 0, 0);
10278                 if (!ret)
10279                         found_dbackref = 1;
10280         }
10281
10282         if (!found_dbackref)
10283                 err |= BACKREF_MISSING;
10284 error:
10285         btrfs_release_path(&path);
10286         if (err & BACKREF_MISSING) {
10287                 error("data extent[%llu %llu] backref lost",
10288                       disk_bytenr, disk_num_bytes);
10289         }
10290         return err;
10291 }
10292
10293 /*
10294  * Get real tree block level for the case like shared block
10295  * Return >= 0 as tree level
10296  * Return <0 for error
10297  */
10298 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10299 {
10300         struct extent_buffer *eb;
10301         struct btrfs_path path;
10302         struct btrfs_key key;
10303         struct btrfs_extent_item *ei;
10304         u64 flags;
10305         u64 transid;
10306         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10307         u8 backref_level;
10308         u8 header_level;
10309         int ret;
10310
10311         /* Search extent tree for extent generation and level */
10312         key.objectid = bytenr;
10313         key.type = BTRFS_METADATA_ITEM_KEY;
10314         key.offset = (u64)-1;
10315
10316         btrfs_init_path(&path);
10317         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10318         if (ret < 0)
10319                 goto release_out;
10320         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10321         if (ret < 0)
10322                 goto release_out;
10323         if (ret > 0) {
10324                 ret = -ENOENT;
10325                 goto release_out;
10326         }
10327
10328         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10329         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10330                             struct btrfs_extent_item);
10331         flags = btrfs_extent_flags(path.nodes[0], ei);
10332         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10333                 ret = -ENOENT;
10334                 goto release_out;
10335         }
10336
10337         /* Get transid for later read_tree_block() check */
10338         transid = btrfs_extent_generation(path.nodes[0], ei);
10339
10340         /* Get backref level as one source */
10341         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10342                 backref_level = key.offset;
10343         } else {
10344                 struct btrfs_tree_block_info *info;
10345
10346                 info = (struct btrfs_tree_block_info *)(ei + 1);
10347                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10348         }
10349         btrfs_release_path(&path);
10350
10351         /* Get level from tree block as an alternative source */
10352         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10353         if (!extent_buffer_uptodate(eb)) {
10354                 free_extent_buffer(eb);
10355                 return -EIO;
10356         }
10357         header_level = btrfs_header_level(eb);
10358         free_extent_buffer(eb);
10359
10360         if (header_level != backref_level)
10361                 return -EIO;
10362         return header_level;
10363
10364 release_out:
10365         btrfs_release_path(&path);
10366         return ret;
10367 }
10368
10369 /*
10370  * Check if a tree block backref is valid (points to a valid tree block)
10371  * if level == -1, level will be resolved
10372  * Return >0 for any error found and print error message
10373  */
10374 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10375                                     u64 bytenr, int level)
10376 {
10377         struct btrfs_root *root;
10378         struct btrfs_key key;
10379         struct btrfs_path path;
10380         struct extent_buffer *eb;
10381         struct extent_buffer *node;
10382         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10383         int err = 0;
10384         int ret;
10385
10386         /* Query level for level == -1 special case */
10387         if (level == -1)
10388                 level = query_tree_block_level(fs_info, bytenr);
10389         if (level < 0) {
10390                 err |= REFERENCER_MISSING;
10391                 goto out;
10392         }
10393
10394         key.objectid = root_id;
10395         key.type = BTRFS_ROOT_ITEM_KEY;
10396         key.offset = (u64)-1;
10397
10398         root = btrfs_read_fs_root(fs_info, &key);
10399         if (IS_ERR(root)) {
10400                 err |= REFERENCER_MISSING;
10401                 goto out;
10402         }
10403
10404         /* Read out the tree block to get item/node key */
10405         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10406         if (!extent_buffer_uptodate(eb)) {
10407                 err |= REFERENCER_MISSING;
10408                 free_extent_buffer(eb);
10409                 goto out;
10410         }
10411
10412         /* Empty tree, no need to check key */
10413         if (!btrfs_header_nritems(eb) && !level) {
10414                 free_extent_buffer(eb);
10415                 goto out;
10416         }
10417
10418         if (level)
10419                 btrfs_node_key_to_cpu(eb, &key, 0);
10420         else
10421                 btrfs_item_key_to_cpu(eb, &key, 0);
10422
10423         free_extent_buffer(eb);
10424
10425         btrfs_init_path(&path);
10426         path.lowest_level = level;
10427         /* Search with the first key, to ensure we can reach it */
10428         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10429         if (ret < 0) {
10430                 err |= REFERENCER_MISSING;
10431                 goto release_out;
10432         }
10433
10434         node = path.nodes[level];
10435         if (btrfs_header_bytenr(node) != bytenr) {
10436                 error(
10437         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10438                         bytenr, nodesize, bytenr,
10439                         btrfs_header_bytenr(node));
10440                 err |= REFERENCER_MISMATCH;
10441         }
10442         if (btrfs_header_level(node) != level) {
10443                 error(
10444         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10445                         bytenr, nodesize, level,
10446                         btrfs_header_level(node));
10447                 err |= REFERENCER_MISMATCH;
10448         }
10449
10450 release_out:
10451         btrfs_release_path(&path);
10452 out:
10453         if (err & REFERENCER_MISSING) {
10454                 if (level < 0)
10455                         error("extent [%llu %d] lost referencer (owner: %llu)",
10456                                 bytenr, nodesize, root_id);
10457                 else
10458                         error(
10459                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10460                                 bytenr, nodesize, root_id, level);
10461         }
10462
10463         return err;
10464 }
10465
10466 /*
10467  * Check if tree block @eb is tree reloc root.
10468  * Return 0 if it's not or any problem happens
10469  * Return 1 if it's a tree reloc root
10470  */
10471 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10472                                  struct extent_buffer *eb)
10473 {
10474         struct btrfs_root *tree_reloc_root;
10475         struct btrfs_key key;
10476         u64 bytenr = btrfs_header_bytenr(eb);
10477         u64 owner = btrfs_header_owner(eb);
10478         int ret = 0;
10479
10480         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10481         key.offset = owner;
10482         key.type = BTRFS_ROOT_ITEM_KEY;
10483
10484         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10485         if (IS_ERR(tree_reloc_root))
10486                 return 0;
10487
10488         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10489                 ret = 1;
10490         btrfs_free_fs_root(tree_reloc_root);
10491         return ret;
10492 }
10493
10494 /*
10495  * Check referencer for shared block backref
10496  * If level == -1, this function will resolve the level.
10497  */
10498 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10499                                      u64 parent, u64 bytenr, int level)
10500 {
10501         struct extent_buffer *eb;
10502         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10503         u32 nr;
10504         int found_parent = 0;
10505         int i;
10506
10507         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10508         if (!extent_buffer_uptodate(eb))
10509                 goto out;
10510
10511         if (level == -1)
10512                 level = query_tree_block_level(fs_info, bytenr);
10513         if (level < 0)
10514                 goto out;
10515
10516         /* It's possible it's a tree reloc root */
10517         if (parent == bytenr) {
10518                 if (is_tree_reloc_root(fs_info, eb))
10519                         found_parent = 1;
10520                 goto out;
10521         }
10522
10523         if (level + 1 != btrfs_header_level(eb))
10524                 goto out;
10525
10526         nr = btrfs_header_nritems(eb);
10527         for (i = 0; i < nr; i++) {
10528                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10529                         found_parent = 1;
10530                         break;
10531                 }
10532         }
10533 out:
10534         free_extent_buffer(eb);
10535         if (!found_parent) {
10536                 error(
10537         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10538                         bytenr, nodesize, parent, level);
10539                 return REFERENCER_MISSING;
10540         }
10541         return 0;
10542 }
10543
10544 /*
10545  * Check referencer for normal (inlined) data ref
10546  * If len == 0, it will be resolved by searching in extent tree
10547  */
10548 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10549                                      u64 root_id, u64 objectid, u64 offset,
10550                                      u64 bytenr, u64 len, u32 count)
10551 {
10552         struct btrfs_root *root;
10553         struct btrfs_root *extent_root = fs_info->extent_root;
10554         struct btrfs_key key;
10555         struct btrfs_path path;
10556         struct extent_buffer *leaf;
10557         struct btrfs_file_extent_item *fi;
10558         u32 found_count = 0;
10559         int slot;
10560         int ret = 0;
10561
10562         if (!len) {
10563                 key.objectid = bytenr;
10564                 key.type = BTRFS_EXTENT_ITEM_KEY;
10565                 key.offset = (u64)-1;
10566
10567                 btrfs_init_path(&path);
10568                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10569                 if (ret < 0)
10570                         goto out;
10571                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10572                 if (ret)
10573                         goto out;
10574                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10575                 if (key.objectid != bytenr ||
10576                     key.type != BTRFS_EXTENT_ITEM_KEY)
10577                         goto out;
10578                 len = key.offset;
10579                 btrfs_release_path(&path);
10580         }
10581         key.objectid = root_id;
10582         key.type = BTRFS_ROOT_ITEM_KEY;
10583         key.offset = (u64)-1;
10584         btrfs_init_path(&path);
10585
10586         root = btrfs_read_fs_root(fs_info, &key);
10587         if (IS_ERR(root))
10588                 goto out;
10589
10590         key.objectid = objectid;
10591         key.type = BTRFS_EXTENT_DATA_KEY;
10592         /*
10593          * It can be nasty as data backref offset is
10594          * file offset - file extent offset, which is smaller or
10595          * equal to original backref offset.  The only special case is
10596          * overflow.  So we need to special check and do further search.
10597          */
10598         key.offset = offset & (1ULL << 63) ? 0 : offset;
10599
10600         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10601         if (ret < 0)
10602                 goto out;
10603
10604         /*
10605          * Search afterwards to get correct one
10606          * NOTE: As we must do a comprehensive check on the data backref to
10607          * make sure the dref count also matches, we must iterate all file
10608          * extents for that inode.
10609          */
10610         while (1) {
10611                 leaf = path.nodes[0];
10612                 slot = path.slots[0];
10613
10614                 if (slot >= btrfs_header_nritems(leaf))
10615                         goto next;
10616                 btrfs_item_key_to_cpu(leaf, &key, slot);
10617                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10618                         break;
10619                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10620                 /*
10621                  * Except normal disk bytenr and disk num bytes, we still
10622                  * need to do extra check on dbackref offset as
10623                  * dbackref offset = file_offset - file_extent_offset
10624                  */
10625                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10626                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10627                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10628                     offset)
10629                         found_count++;
10630
10631 next:
10632                 ret = btrfs_next_item(root, &path);
10633                 if (ret)
10634                         break;
10635         }
10636 out:
10637         btrfs_release_path(&path);
10638         if (found_count != count) {
10639                 error(
10640 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10641                         bytenr, len, root_id, objectid, offset, count, found_count);
10642                 return REFERENCER_MISSING;
10643         }
10644         return 0;
10645 }
10646
10647 /*
10648  * Check if the referencer of a shared data backref exists
10649  */
10650 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10651                                      u64 parent, u64 bytenr)
10652 {
10653         struct extent_buffer *eb;
10654         struct btrfs_key key;
10655         struct btrfs_file_extent_item *fi;
10656         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10657         u32 nr;
10658         int found_parent = 0;
10659         int i;
10660
10661         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10662         if (!extent_buffer_uptodate(eb))
10663                 goto out;
10664
10665         nr = btrfs_header_nritems(eb);
10666         for (i = 0; i < nr; i++) {
10667                 btrfs_item_key_to_cpu(eb, &key, i);
10668                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10669                         continue;
10670
10671                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10672                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10673                         continue;
10674
10675                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10676                         found_parent = 1;
10677                         break;
10678                 }
10679         }
10680
10681 out:
10682         free_extent_buffer(eb);
10683         if (!found_parent) {
10684                 error("shared extent %llu referencer lost (parent: %llu)",
10685                         bytenr, parent);
10686                 return REFERENCER_MISSING;
10687         }
10688         return 0;
10689 }
10690
10691 /*
10692  * This function will check a given extent item, including its backref and
10693  * itself (like crossing stripe boundary and type)
10694  *
10695  * Since we don't use extent_record anymore, introduce new error bit
10696  */
10697 static int check_extent_item(struct btrfs_fs_info *fs_info,
10698                              struct extent_buffer *eb, int slot)
10699 {
10700         struct btrfs_extent_item *ei;
10701         struct btrfs_extent_inline_ref *iref;
10702         struct btrfs_extent_data_ref *dref;
10703         unsigned long end;
10704         unsigned long ptr;
10705         int type;
10706         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10707         u32 item_size = btrfs_item_size_nr(eb, slot);
10708         u64 flags;
10709         u64 offset;
10710         int metadata = 0;
10711         int level;
10712         struct btrfs_key key;
10713         int ret;
10714         int err = 0;
10715
10716         btrfs_item_key_to_cpu(eb, &key, slot);
10717         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10718                 bytes_used += key.offset;
10719         else
10720                 bytes_used += nodesize;
10721
10722         if (item_size < sizeof(*ei)) {
10723                 /*
10724                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10725                  * old thing when on disk format is still un-determined.
10726                  * No need to care about it anymore
10727                  */
10728                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10729                 return -ENOTTY;
10730         }
10731
10732         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10733         flags = btrfs_extent_flags(eb, ei);
10734
10735         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10736                 metadata = 1;
10737         if (metadata && check_crossing_stripes(global_info, key.objectid,
10738                                                eb->len)) {
10739                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10740                       key.objectid, key.objectid + nodesize);
10741                 err |= CROSSING_STRIPE_BOUNDARY;
10742         }
10743
10744         ptr = (unsigned long)(ei + 1);
10745
10746         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10747                 /* Old EXTENT_ITEM metadata */
10748                 struct btrfs_tree_block_info *info;
10749
10750                 info = (struct btrfs_tree_block_info *)ptr;
10751                 level = btrfs_tree_block_level(eb, info);
10752                 ptr += sizeof(struct btrfs_tree_block_info);
10753         } else {
10754                 /* New METADATA_ITEM */
10755                 level = key.offset;
10756         }
10757         end = (unsigned long)ei + item_size;
10758
10759 next:
10760         /* Reached extent item end normally */
10761         if (ptr == end)
10762                 goto out;
10763
10764         /* Beyond extent item end, wrong item size */
10765         if (ptr > end) {
10766                 err |= ITEM_SIZE_MISMATCH;
10767                 error("extent item at bytenr %llu slot %d has wrong size",
10768                         eb->start, slot);
10769                 goto out;
10770         }
10771
10772         /* Now check every backref in this extent item */
10773         iref = (struct btrfs_extent_inline_ref *)ptr;
10774         type = btrfs_extent_inline_ref_type(eb, iref);
10775         offset = btrfs_extent_inline_ref_offset(eb, iref);
10776         switch (type) {
10777         case BTRFS_TREE_BLOCK_REF_KEY:
10778                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10779                                                level);
10780                 err |= ret;
10781                 break;
10782         case BTRFS_SHARED_BLOCK_REF_KEY:
10783                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10784                                                  level);
10785                 err |= ret;
10786                 break;
10787         case BTRFS_EXTENT_DATA_REF_KEY:
10788                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10789                 ret = check_extent_data_backref(fs_info,
10790                                 btrfs_extent_data_ref_root(eb, dref),
10791                                 btrfs_extent_data_ref_objectid(eb, dref),
10792                                 btrfs_extent_data_ref_offset(eb, dref),
10793                                 key.objectid, key.offset,
10794                                 btrfs_extent_data_ref_count(eb, dref));
10795                 err |= ret;
10796                 break;
10797         case BTRFS_SHARED_DATA_REF_KEY:
10798                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10799                 err |= ret;
10800                 break;
10801         default:
10802                 error("extent[%llu %d %llu] has unknown ref type: %d",
10803                         key.objectid, key.type, key.offset, type);
10804                 err |= UNKNOWN_TYPE;
10805                 goto out;
10806         }
10807
10808         ptr += btrfs_extent_inline_ref_size(type);
10809         goto next;
10810
10811 out:
10812         return err;
10813 }
10814
10815 /*
10816  * Check if a dev extent item is referred correctly by its chunk
10817  */
10818 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10819                                  struct extent_buffer *eb, int slot)
10820 {
10821         struct btrfs_root *chunk_root = fs_info->chunk_root;
10822         struct btrfs_dev_extent *ptr;
10823         struct btrfs_path path;
10824         struct btrfs_key chunk_key;
10825         struct btrfs_key devext_key;
10826         struct btrfs_chunk *chunk;
10827         struct extent_buffer *l;
10828         int num_stripes;
10829         u64 length;
10830         int i;
10831         int found_chunk = 0;
10832         int ret;
10833
10834         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10835         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10836         length = btrfs_dev_extent_length(eb, ptr);
10837
10838         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10839         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10840         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10841
10842         btrfs_init_path(&path);
10843         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10844         if (ret)
10845                 goto out;
10846
10847         l = path.nodes[0];
10848         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10849         if (btrfs_chunk_length(l, chunk) != length)
10850                 goto out;
10851
10852         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10853         for (i = 0; i < num_stripes; i++) {
10854                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10855                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10856
10857                 if (devid == devext_key.objectid &&
10858                     offset == devext_key.offset) {
10859                         found_chunk = 1;
10860                         break;
10861                 }
10862         }
10863 out:
10864         btrfs_release_path(&path);
10865         if (!found_chunk) {
10866                 error(
10867                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10868                         devext_key.objectid, devext_key.offset, length);
10869                 return REFERENCER_MISSING;
10870         }
10871         return 0;
10872 }
10873
10874 /*
10875  * Check if the used space is correct with the dev item
10876  */
10877 static int check_dev_item(struct btrfs_fs_info *fs_info,
10878                           struct extent_buffer *eb, int slot)
10879 {
10880         struct btrfs_root *dev_root = fs_info->dev_root;
10881         struct btrfs_dev_item *dev_item;
10882         struct btrfs_path path;
10883         struct btrfs_key key;
10884         struct btrfs_dev_extent *ptr;
10885         u64 dev_id;
10886         u64 used;
10887         u64 total = 0;
10888         int ret;
10889
10890         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10891         dev_id = btrfs_device_id(eb, dev_item);
10892         used = btrfs_device_bytes_used(eb, dev_item);
10893
10894         key.objectid = dev_id;
10895         key.type = BTRFS_DEV_EXTENT_KEY;
10896         key.offset = 0;
10897
10898         btrfs_init_path(&path);
10899         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10900         if (ret < 0) {
10901                 btrfs_item_key_to_cpu(eb, &key, slot);
10902                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10903                         key.objectid, key.type, key.offset);
10904                 btrfs_release_path(&path);
10905                 return REFERENCER_MISSING;
10906         }
10907
10908         /* Iterate dev_extents to calculate the used space of a device */
10909         while (1) {
10910                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10911                         goto next;
10912
10913                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10914                 if (key.objectid > dev_id)
10915                         break;
10916                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10917                         goto next;
10918
10919                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10920                                      struct btrfs_dev_extent);
10921                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10922 next:
10923                 ret = btrfs_next_item(dev_root, &path);
10924                 if (ret)
10925                         break;
10926         }
10927         btrfs_release_path(&path);
10928
10929         if (used != total) {
10930                 btrfs_item_key_to_cpu(eb, &key, slot);
10931                 error(
10932 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10933                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10934                         BTRFS_DEV_EXTENT_KEY, dev_id);
10935                 return ACCOUNTING_MISMATCH;
10936         }
10937         return 0;
10938 }
10939
10940 /*
10941  * Check a block group item with its referener (chunk) and its used space
10942  * with extent/metadata item
10943  */
10944 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10945                                   struct extent_buffer *eb, int slot)
10946 {
10947         struct btrfs_root *extent_root = fs_info->extent_root;
10948         struct btrfs_root *chunk_root = fs_info->chunk_root;
10949         struct btrfs_block_group_item *bi;
10950         struct btrfs_block_group_item bg_item;
10951         struct btrfs_path path;
10952         struct btrfs_key bg_key;
10953         struct btrfs_key chunk_key;
10954         struct btrfs_key extent_key;
10955         struct btrfs_chunk *chunk;
10956         struct extent_buffer *leaf;
10957         struct btrfs_extent_item *ei;
10958         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10959         u64 flags;
10960         u64 bg_flags;
10961         u64 used;
10962         u64 total = 0;
10963         int ret;
10964         int err = 0;
10965
10966         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10967         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10968         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10969         used = btrfs_block_group_used(&bg_item);
10970         bg_flags = btrfs_block_group_flags(&bg_item);
10971
10972         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10973         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10974         chunk_key.offset = bg_key.objectid;
10975
10976         btrfs_init_path(&path);
10977         /* Search for the referencer chunk */
10978         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10979         if (ret) {
10980                 error(
10981                 "block group[%llu %llu] did not find the related chunk item",
10982                         bg_key.objectid, bg_key.offset);
10983                 err |= REFERENCER_MISSING;
10984         } else {
10985                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10986                                         struct btrfs_chunk);
10987                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10988                                                 bg_key.offset) {
10989                         error(
10990         "block group[%llu %llu] related chunk item length does not match",
10991                                 bg_key.objectid, bg_key.offset);
10992                         err |= REFERENCER_MISMATCH;
10993                 }
10994         }
10995         btrfs_release_path(&path);
10996
10997         /* Search from the block group bytenr */
10998         extent_key.objectid = bg_key.objectid;
10999         extent_key.type = 0;
11000         extent_key.offset = 0;
11001
11002         btrfs_init_path(&path);
11003         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11004         if (ret < 0)
11005                 goto out;
11006
11007         /* Iterate extent tree to account used space */
11008         while (1) {
11009                 leaf = path.nodes[0];
11010
11011                 /* Search slot can point to the last item beyond leaf nritems */
11012                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11013                         goto next;
11014
11015                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11016                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11017                         break;
11018
11019                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11020                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11021                         goto next;
11022                 if (extent_key.objectid < bg_key.objectid)
11023                         goto next;
11024
11025                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11026                         total += nodesize;
11027                 else
11028                         total += extent_key.offset;
11029
11030                 ei = btrfs_item_ptr(leaf, path.slots[0],
11031                                     struct btrfs_extent_item);
11032                 flags = btrfs_extent_flags(leaf, ei);
11033                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11034                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11035                                 error(
11036                         "bad extent[%llu, %llu) type mismatch with chunk",
11037                                         extent_key.objectid,
11038                                         extent_key.objectid + extent_key.offset);
11039                                 err |= CHUNK_TYPE_MISMATCH;
11040                         }
11041                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11042                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11043                                     BTRFS_BLOCK_GROUP_METADATA))) {
11044                                 error(
11045                         "bad extent[%llu, %llu) type mismatch with chunk",
11046                                         extent_key.objectid,
11047                                         extent_key.objectid + nodesize);
11048                                 err |= CHUNK_TYPE_MISMATCH;
11049                         }
11050                 }
11051 next:
11052                 ret = btrfs_next_item(extent_root, &path);
11053                 if (ret)
11054                         break;
11055         }
11056
11057 out:
11058         btrfs_release_path(&path);
11059
11060         if (total != used) {
11061                 error(
11062                 "block group[%llu %llu] used %llu but extent items used %llu",
11063                         bg_key.objectid, bg_key.offset, used, total);
11064                 err |= ACCOUNTING_MISMATCH;
11065         }
11066         return err;
11067 }
11068
11069 /*
11070  * Check a chunk item.
11071  * Including checking all referred dev_extents and block group
11072  */
11073 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11074                             struct extent_buffer *eb, int slot)
11075 {
11076         struct btrfs_root *extent_root = fs_info->extent_root;
11077         struct btrfs_root *dev_root = fs_info->dev_root;
11078         struct btrfs_path path;
11079         struct btrfs_key chunk_key;
11080         struct btrfs_key bg_key;
11081         struct btrfs_key devext_key;
11082         struct btrfs_chunk *chunk;
11083         struct extent_buffer *leaf;
11084         struct btrfs_block_group_item *bi;
11085         struct btrfs_block_group_item bg_item;
11086         struct btrfs_dev_extent *ptr;
11087         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11088         u64 length;
11089         u64 chunk_end;
11090         u64 type;
11091         u64 profile;
11092         int num_stripes;
11093         u64 offset;
11094         u64 objectid;
11095         int i;
11096         int ret;
11097         int err = 0;
11098
11099         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11100         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11101         length = btrfs_chunk_length(eb, chunk);
11102         chunk_end = chunk_key.offset + length;
11103         if (!IS_ALIGNED(length, sectorsize)) {
11104                 error("chunk[%llu %llu) not aligned to %u",
11105                         chunk_key.offset, chunk_end, sectorsize);
11106                 err |= BYTES_UNALIGNED;
11107                 goto out;
11108         }
11109
11110         type = btrfs_chunk_type(eb, chunk);
11111         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11112         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11113                 error("chunk[%llu %llu) has no chunk type",
11114                         chunk_key.offset, chunk_end);
11115                 err |= UNKNOWN_TYPE;
11116         }
11117         if (profile && (profile & (profile - 1))) {
11118                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11119                         chunk_key.offset, chunk_end, profile);
11120                 err |= UNKNOWN_TYPE;
11121         }
11122
11123         bg_key.objectid = chunk_key.offset;
11124         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11125         bg_key.offset = length;
11126
11127         btrfs_init_path(&path);
11128         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11129         if (ret) {
11130                 error(
11131                 "chunk[%llu %llu) did not find the related block group item",
11132                         chunk_key.offset, chunk_end);
11133                 err |= REFERENCER_MISSING;
11134         } else{
11135                 leaf = path.nodes[0];
11136                 bi = btrfs_item_ptr(leaf, path.slots[0],
11137                                     struct btrfs_block_group_item);
11138                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11139                                    sizeof(bg_item));
11140                 if (btrfs_block_group_flags(&bg_item) != type) {
11141                         error(
11142 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11143                                 chunk_key.offset, chunk_end, type,
11144                                 btrfs_block_group_flags(&bg_item));
11145                         err |= REFERENCER_MISSING;
11146                 }
11147         }
11148
11149         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11150         for (i = 0; i < num_stripes; i++) {
11151                 btrfs_release_path(&path);
11152                 btrfs_init_path(&path);
11153                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11154                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11155                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11156
11157                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11158                                         0, 0);
11159                 if (ret)
11160                         goto not_match_dev;
11161
11162                 leaf = path.nodes[0];
11163                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11164                                      struct btrfs_dev_extent);
11165                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11166                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11167                 if (objectid != chunk_key.objectid ||
11168                     offset != chunk_key.offset ||
11169                     btrfs_dev_extent_length(leaf, ptr) != length)
11170                         goto not_match_dev;
11171                 continue;
11172 not_match_dev:
11173                 err |= BACKREF_MISSING;
11174                 error(
11175                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11176                         chunk_key.objectid, chunk_end, i);
11177                 continue;
11178         }
11179         btrfs_release_path(&path);
11180 out:
11181         return err;
11182 }
11183
11184 /*
11185  * Main entry function to check known items and update related accounting info
11186  */
11187 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11188 {
11189         struct btrfs_fs_info *fs_info = root->fs_info;
11190         struct btrfs_key key;
11191         int slot = 0;
11192         int type;
11193         struct btrfs_extent_data_ref *dref;
11194         int ret;
11195         int err = 0;
11196
11197 next:
11198         btrfs_item_key_to_cpu(eb, &key, slot);
11199         type = key.type;
11200
11201         switch (type) {
11202         case BTRFS_EXTENT_DATA_KEY:
11203                 ret = check_extent_data_item(root, eb, slot);
11204                 err |= ret;
11205                 break;
11206         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11207                 ret = check_block_group_item(fs_info, eb, slot);
11208                 err |= ret;
11209                 break;
11210         case BTRFS_DEV_ITEM_KEY:
11211                 ret = check_dev_item(fs_info, eb, slot);
11212                 err |= ret;
11213                 break;
11214         case BTRFS_CHUNK_ITEM_KEY:
11215                 ret = check_chunk_item(fs_info, eb, slot);
11216                 err |= ret;
11217                 break;
11218         case BTRFS_DEV_EXTENT_KEY:
11219                 ret = check_dev_extent_item(fs_info, eb, slot);
11220                 err |= ret;
11221                 break;
11222         case BTRFS_EXTENT_ITEM_KEY:
11223         case BTRFS_METADATA_ITEM_KEY:
11224                 ret = check_extent_item(fs_info, eb, slot);
11225                 err |= ret;
11226                 break;
11227         case BTRFS_EXTENT_CSUM_KEY:
11228                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11229                 break;
11230         case BTRFS_TREE_BLOCK_REF_KEY:
11231                 ret = check_tree_block_backref(fs_info, key.offset,
11232                                                key.objectid, -1);
11233                 err |= ret;
11234                 break;
11235         case BTRFS_EXTENT_DATA_REF_KEY:
11236                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11237                 ret = check_extent_data_backref(fs_info,
11238                                 btrfs_extent_data_ref_root(eb, dref),
11239                                 btrfs_extent_data_ref_objectid(eb, dref),
11240                                 btrfs_extent_data_ref_offset(eb, dref),
11241                                 key.objectid, 0,
11242                                 btrfs_extent_data_ref_count(eb, dref));
11243                 err |= ret;
11244                 break;
11245         case BTRFS_SHARED_BLOCK_REF_KEY:
11246                 ret = check_shared_block_backref(fs_info, key.offset,
11247                                                  key.objectid, -1);
11248                 err |= ret;
11249                 break;
11250         case BTRFS_SHARED_DATA_REF_KEY:
11251                 ret = check_shared_data_backref(fs_info, key.offset,
11252                                                 key.objectid);
11253                 err |= ret;
11254                 break;
11255         default:
11256                 break;
11257         }
11258
11259         if (++slot < btrfs_header_nritems(eb))
11260                 goto next;
11261
11262         return err;
11263 }
11264
11265 /*
11266  * Helper function for later fs/subvol tree check.  To determine if a tree
11267  * block should be checked.
11268  * This function will ensure only the direct referencer with lowest rootid to
11269  * check a fs/subvolume tree block.
11270  *
11271  * Backref check at extent tree would detect errors like missing subvolume
11272  * tree, so we can do aggressive check to reduce duplicated checks.
11273  */
11274 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11275 {
11276         struct btrfs_root *extent_root = root->fs_info->extent_root;
11277         struct btrfs_key key;
11278         struct btrfs_path path;
11279         struct extent_buffer *leaf;
11280         int slot;
11281         struct btrfs_extent_item *ei;
11282         unsigned long ptr;
11283         unsigned long end;
11284         int type;
11285         u32 item_size;
11286         u64 offset;
11287         struct btrfs_extent_inline_ref *iref;
11288         int ret;
11289
11290         btrfs_init_path(&path);
11291         key.objectid = btrfs_header_bytenr(eb);
11292         key.type = BTRFS_METADATA_ITEM_KEY;
11293         key.offset = (u64)-1;
11294
11295         /*
11296          * Any failure in backref resolving means we can't determine
11297          * whom the tree block belongs to.
11298          * So in that case, we need to check that tree block
11299          */
11300         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11301         if (ret < 0)
11302                 goto need_check;
11303
11304         ret = btrfs_previous_extent_item(extent_root, &path,
11305                                          btrfs_header_bytenr(eb));
11306         if (ret)
11307                 goto need_check;
11308
11309         leaf = path.nodes[0];
11310         slot = path.slots[0];
11311         btrfs_item_key_to_cpu(leaf, &key, slot);
11312         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11313
11314         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11315                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11316         } else {
11317                 struct btrfs_tree_block_info *info;
11318
11319                 info = (struct btrfs_tree_block_info *)(ei + 1);
11320                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11321         }
11322
11323         item_size = btrfs_item_size_nr(leaf, slot);
11324         ptr = (unsigned long)iref;
11325         end = (unsigned long)ei + item_size;
11326         while (ptr < end) {
11327                 iref = (struct btrfs_extent_inline_ref *)ptr;
11328                 type = btrfs_extent_inline_ref_type(leaf, iref);
11329                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11330
11331                 /*
11332                  * We only check the tree block if current root is
11333                  * the lowest referencer of it.
11334                  */
11335                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11336                     offset < root->objectid) {
11337                         btrfs_release_path(&path);
11338                         return 0;
11339                 }
11340
11341                 ptr += btrfs_extent_inline_ref_size(type);
11342         }
11343         /*
11344          * Normally we should also check keyed tree block ref, but that may be
11345          * very time consuming.  Inlined ref should already make us skip a lot
11346          * of refs now.  So skip search keyed tree block ref.
11347          */
11348
11349 need_check:
11350         btrfs_release_path(&path);
11351         return 1;
11352 }
11353
11354 /*
11355  * Traversal function for tree block. We will do:
11356  * 1) Skip shared fs/subvolume tree blocks
11357  * 2) Update related bytes accounting
11358  * 3) Pre-order traversal
11359  */
11360 static int traverse_tree_block(struct btrfs_root *root,
11361                                 struct extent_buffer *node)
11362 {
11363         struct extent_buffer *eb;
11364         struct btrfs_key key;
11365         struct btrfs_key drop_key;
11366         int level;
11367         u64 nr;
11368         int i;
11369         int err = 0;
11370         int ret;
11371
11372         /*
11373          * Skip shared fs/subvolume tree block, in that case they will
11374          * be checked by referencer with lowest rootid
11375          */
11376         if (is_fstree(root->objectid) && !should_check(root, node))
11377                 return 0;
11378
11379         /* Update bytes accounting */
11380         total_btree_bytes += node->len;
11381         if (fs_root_objectid(btrfs_header_owner(node)))
11382                 total_fs_tree_bytes += node->len;
11383         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11384                 total_extent_tree_bytes += node->len;
11385         if (!found_old_backref &&
11386             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11387             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11388             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11389                 found_old_backref = 1;
11390
11391         /* pre-order tranversal, check itself first */
11392         level = btrfs_header_level(node);
11393         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11394                                    btrfs_header_level(node),
11395                                    btrfs_header_owner(node));
11396         err |= ret;
11397         if (err)
11398                 error(
11399         "check %s failed root %llu bytenr %llu level %d, force continue check",
11400                         level ? "node":"leaf", root->objectid,
11401                         btrfs_header_bytenr(node), btrfs_header_level(node));
11402
11403         if (!level) {
11404                 btree_space_waste += btrfs_leaf_free_space(root, node);
11405                 ret = check_leaf_items(root, node);
11406                 err |= ret;
11407                 return err;
11408         }
11409
11410         nr = btrfs_header_nritems(node);
11411         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11412         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11413                 sizeof(struct btrfs_key_ptr);
11414
11415         /* Then check all its children */
11416         for (i = 0; i < nr; i++) {
11417                 u64 blocknr = btrfs_node_blockptr(node, i);
11418
11419                 btrfs_node_key_to_cpu(node, &key, i);
11420                 if (level == root->root_item.drop_level &&
11421                     is_dropped_key(&key, &drop_key))
11422                         continue;
11423
11424                 /*
11425                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11426                  * to call the function itself.
11427                  */
11428                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11429                 if (extent_buffer_uptodate(eb)) {
11430                         ret = traverse_tree_block(root, eb);
11431                         err |= ret;
11432                 }
11433                 free_extent_buffer(eb);
11434         }
11435
11436         return err;
11437 }
11438
11439 /*
11440  * Low memory usage version check_chunks_and_extents.
11441  */
11442 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11443 {
11444         struct btrfs_path path;
11445         struct btrfs_key key;
11446         struct btrfs_root *root1;
11447         struct btrfs_root *cur_root;
11448         int err = 0;
11449         int ret;
11450
11451         root1 = root->fs_info->chunk_root;
11452         ret = traverse_tree_block(root1, root1->node);
11453         err |= ret;
11454
11455         root1 = root->fs_info->tree_root;
11456         ret = traverse_tree_block(root1, root1->node);
11457         err |= ret;
11458
11459         btrfs_init_path(&path);
11460         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11461         key.offset = 0;
11462         key.type = BTRFS_ROOT_ITEM_KEY;
11463
11464         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11465         if (ret) {
11466                 error("cannot find extent treet in tree_root");
11467                 goto out;
11468         }
11469
11470         while (1) {
11471                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11472                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11473                         goto next;
11474                 key.offset = (u64)-1;
11475
11476                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11477                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11478                                         &key);
11479                 else
11480                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11481                 if (IS_ERR(cur_root) || !cur_root) {
11482                         error("failed to read tree: %lld", key.objectid);
11483                         goto next;
11484                 }
11485
11486                 ret = traverse_tree_block(cur_root, cur_root->node);
11487                 err |= ret;
11488
11489                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11490                         btrfs_free_fs_root(cur_root);
11491 next:
11492                 ret = btrfs_next_item(root1, &path);
11493                 if (ret)
11494                         goto out;
11495         }
11496
11497 out:
11498         btrfs_release_path(&path);
11499         return err;
11500 }
11501
11502 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11503                            struct btrfs_root *root, int overwrite)
11504 {
11505         struct extent_buffer *c;
11506         struct extent_buffer *old = root->node;
11507         int level;
11508         int ret;
11509         struct btrfs_disk_key disk_key = {0,0,0};
11510
11511         level = 0;
11512
11513         if (overwrite) {
11514                 c = old;
11515                 extent_buffer_get(c);
11516                 goto init;
11517         }
11518         c = btrfs_alloc_free_block(trans, root,
11519                                    root->nodesize,
11520                                    root->root_key.objectid,
11521                                    &disk_key, level, 0, 0);
11522         if (IS_ERR(c)) {
11523                 c = old;
11524                 extent_buffer_get(c);
11525                 overwrite = 1;
11526         }
11527 init:
11528         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11529         btrfs_set_header_level(c, level);
11530         btrfs_set_header_bytenr(c, c->start);
11531         btrfs_set_header_generation(c, trans->transid);
11532         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11533         btrfs_set_header_owner(c, root->root_key.objectid);
11534
11535         write_extent_buffer(c, root->fs_info->fsid,
11536                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11537
11538         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11539                             btrfs_header_chunk_tree_uuid(c),
11540                             BTRFS_UUID_SIZE);
11541
11542         btrfs_mark_buffer_dirty(c);
11543         /*
11544          * this case can happen in the following case:
11545          *
11546          * 1.overwrite previous root.
11547          *
11548          * 2.reinit reloc data root, this is because we skip pin
11549          * down reloc data tree before which means we can allocate
11550          * same block bytenr here.
11551          */
11552         if (old->start == c->start) {
11553                 btrfs_set_root_generation(&root->root_item,
11554                                           trans->transid);
11555                 root->root_item.level = btrfs_header_level(root->node);
11556                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11557                                         &root->root_key, &root->root_item);
11558                 if (ret) {
11559                         free_extent_buffer(c);
11560                         return ret;
11561                 }
11562         }
11563         free_extent_buffer(old);
11564         root->node = c;
11565         add_root_to_dirty_list(root);
11566         return 0;
11567 }
11568
11569 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11570                                 struct extent_buffer *eb, int tree_root)
11571 {
11572         struct extent_buffer *tmp;
11573         struct btrfs_root_item *ri;
11574         struct btrfs_key key;
11575         u64 bytenr;
11576         u32 nodesize;
11577         int level = btrfs_header_level(eb);
11578         int nritems;
11579         int ret;
11580         int i;
11581
11582         /*
11583          * If we have pinned this block before, don't pin it again.
11584          * This can not only avoid forever loop with broken filesystem
11585          * but also give us some speedups.
11586          */
11587         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11588                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11589                 return 0;
11590
11591         btrfs_pin_extent(fs_info, eb->start, eb->len);
11592
11593         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11594         nritems = btrfs_header_nritems(eb);
11595         for (i = 0; i < nritems; i++) {
11596                 if (level == 0) {
11597                         btrfs_item_key_to_cpu(eb, &key, i);
11598                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11599                                 continue;
11600                         /* Skip the extent root and reloc roots */
11601                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11602                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11603                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11604                                 continue;
11605                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11606                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11607
11608                         /*
11609                          * If at any point we start needing the real root we
11610                          * will have to build a stump root for the root we are
11611                          * in, but for now this doesn't actually use the root so
11612                          * just pass in extent_root.
11613                          */
11614                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11615                                               nodesize, 0);
11616                         if (!extent_buffer_uptodate(tmp)) {
11617                                 fprintf(stderr, "Error reading root block\n");
11618                                 return -EIO;
11619                         }
11620                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11621                         free_extent_buffer(tmp);
11622                         if (ret)
11623                                 return ret;
11624                 } else {
11625                         bytenr = btrfs_node_blockptr(eb, i);
11626
11627                         /* If we aren't the tree root don't read the block */
11628                         if (level == 1 && !tree_root) {
11629                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11630                                 continue;
11631                         }
11632
11633                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11634                                               nodesize, 0);
11635                         if (!extent_buffer_uptodate(tmp)) {
11636                                 fprintf(stderr, "Error reading tree block\n");
11637                                 return -EIO;
11638                         }
11639                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11640                         free_extent_buffer(tmp);
11641                         if (ret)
11642                                 return ret;
11643                 }
11644         }
11645
11646         return 0;
11647 }
11648
11649 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11650 {
11651         int ret;
11652
11653         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11654         if (ret)
11655                 return ret;
11656
11657         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11658 }
11659
11660 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11661 {
11662         struct btrfs_block_group_cache *cache;
11663         struct btrfs_path path;
11664         struct extent_buffer *leaf;
11665         struct btrfs_chunk *chunk;
11666         struct btrfs_key key;
11667         int ret;
11668         u64 start;
11669
11670         btrfs_init_path(&path);
11671         key.objectid = 0;
11672         key.type = BTRFS_CHUNK_ITEM_KEY;
11673         key.offset = 0;
11674         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11675         if (ret < 0) {
11676                 btrfs_release_path(&path);
11677                 return ret;
11678         }
11679
11680         /*
11681          * We do this in case the block groups were screwed up and had alloc
11682          * bits that aren't actually set on the chunks.  This happens with
11683          * restored images every time and could happen in real life I guess.
11684          */
11685         fs_info->avail_data_alloc_bits = 0;
11686         fs_info->avail_metadata_alloc_bits = 0;
11687         fs_info->avail_system_alloc_bits = 0;
11688
11689         /* First we need to create the in-memory block groups */
11690         while (1) {
11691                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11692                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11693                         if (ret < 0) {
11694                                 btrfs_release_path(&path);
11695                                 return ret;
11696                         }
11697                         if (ret) {
11698                                 ret = 0;
11699                                 break;
11700                         }
11701                 }
11702                 leaf = path.nodes[0];
11703                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11704                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11705                         path.slots[0]++;
11706                         continue;
11707                 }
11708
11709                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11710                 btrfs_add_block_group(fs_info, 0,
11711                                       btrfs_chunk_type(leaf, chunk),
11712                                       key.objectid, key.offset,
11713                                       btrfs_chunk_length(leaf, chunk));
11714                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11715                                  key.offset + btrfs_chunk_length(leaf, chunk));
11716                 path.slots[0]++;
11717         }
11718         start = 0;
11719         while (1) {
11720                 cache = btrfs_lookup_first_block_group(fs_info, start);
11721                 if (!cache)
11722                         break;
11723                 cache->cached = 1;
11724                 start = cache->key.objectid + cache->key.offset;
11725         }
11726
11727         btrfs_release_path(&path);
11728         return 0;
11729 }
11730
11731 static int reset_balance(struct btrfs_trans_handle *trans,
11732                          struct btrfs_fs_info *fs_info)
11733 {
11734         struct btrfs_root *root = fs_info->tree_root;
11735         struct btrfs_path path;
11736         struct extent_buffer *leaf;
11737         struct btrfs_key key;
11738         int del_slot, del_nr = 0;
11739         int ret;
11740         int found = 0;
11741
11742         btrfs_init_path(&path);
11743         key.objectid = BTRFS_BALANCE_OBJECTID;
11744         key.type = BTRFS_BALANCE_ITEM_KEY;
11745         key.offset = 0;
11746         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11747         if (ret) {
11748                 if (ret > 0)
11749                         ret = 0;
11750                 if (!ret)
11751                         goto reinit_data_reloc;
11752                 else
11753                         goto out;
11754         }
11755
11756         ret = btrfs_del_item(trans, root, &path);
11757         if (ret)
11758                 goto out;
11759         btrfs_release_path(&path);
11760
11761         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11762         key.type = BTRFS_ROOT_ITEM_KEY;
11763         key.offset = 0;
11764         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11765         if (ret < 0)
11766                 goto out;
11767         while (1) {
11768                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11769                         if (!found)
11770                                 break;
11771
11772                         if (del_nr) {
11773                                 ret = btrfs_del_items(trans, root, &path,
11774                                                       del_slot, del_nr);
11775                                 del_nr = 0;
11776                                 if (ret)
11777                                         goto out;
11778                         }
11779                         key.offset++;
11780                         btrfs_release_path(&path);
11781
11782                         found = 0;
11783                         ret = btrfs_search_slot(trans, root, &key, &path,
11784                                                 -1, 1);
11785                         if (ret < 0)
11786                                 goto out;
11787                         continue;
11788                 }
11789                 found = 1;
11790                 leaf = path.nodes[0];
11791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11792                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11793                         break;
11794                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11795                         path.slots[0]++;
11796                         continue;
11797                 }
11798                 if (!del_nr) {
11799                         del_slot = path.slots[0];
11800                         del_nr = 1;
11801                 } else {
11802                         del_nr++;
11803                 }
11804                 path.slots[0]++;
11805         }
11806
11807         if (del_nr) {
11808                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11809                 if (ret)
11810                         goto out;
11811         }
11812         btrfs_release_path(&path);
11813
11814 reinit_data_reloc:
11815         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11816         key.type = BTRFS_ROOT_ITEM_KEY;
11817         key.offset = (u64)-1;
11818         root = btrfs_read_fs_root(fs_info, &key);
11819         if (IS_ERR(root)) {
11820                 fprintf(stderr, "Error reading data reloc tree\n");
11821                 ret = PTR_ERR(root);
11822                 goto out;
11823         }
11824         record_root_in_trans(trans, root);
11825         ret = btrfs_fsck_reinit_root(trans, root, 0);
11826         if (ret)
11827                 goto out;
11828         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11829 out:
11830         btrfs_release_path(&path);
11831         return ret;
11832 }
11833
11834 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11835                               struct btrfs_fs_info *fs_info)
11836 {
11837         u64 start = 0;
11838         int ret;
11839
11840         /*
11841          * The only reason we don't do this is because right now we're just
11842          * walking the trees we find and pinning down their bytes, we don't look
11843          * at any of the leaves.  In order to do mixed groups we'd have to check
11844          * the leaves of any fs roots and pin down the bytes for any file
11845          * extents we find.  Not hard but why do it if we don't have to?
11846          */
11847         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11848                 fprintf(stderr, "We don't support re-initing the extent tree "
11849                         "for mixed block groups yet, please notify a btrfs "
11850                         "developer you want to do this so they can add this "
11851                         "functionality.\n");
11852                 return -EINVAL;
11853         }
11854
11855         /*
11856          * first we need to walk all of the trees except the extent tree and pin
11857          * down the bytes that are in use so we don't overwrite any existing
11858          * metadata.
11859          */
11860         ret = pin_metadata_blocks(fs_info);
11861         if (ret) {
11862                 fprintf(stderr, "error pinning down used bytes\n");
11863                 return ret;
11864         }
11865
11866         /*
11867          * Need to drop all the block groups since we're going to recreate all
11868          * of them again.
11869          */
11870         btrfs_free_block_groups(fs_info);
11871         ret = reset_block_groups(fs_info);
11872         if (ret) {
11873                 fprintf(stderr, "error resetting the block groups\n");
11874                 return ret;
11875         }
11876
11877         /* Ok we can allocate now, reinit the extent root */
11878         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11879         if (ret) {
11880                 fprintf(stderr, "extent root initialization failed\n");
11881                 /*
11882                  * When the transaction code is updated we should end the
11883                  * transaction, but for now progs only knows about commit so
11884                  * just return an error.
11885                  */
11886                 return ret;
11887         }
11888
11889         /*
11890          * Now we have all the in-memory block groups setup so we can make
11891          * allocations properly, and the metadata we care about is safe since we
11892          * pinned all of it above.
11893          */
11894         while (1) {
11895                 struct btrfs_block_group_cache *cache;
11896
11897                 cache = btrfs_lookup_first_block_group(fs_info, start);
11898                 if (!cache)
11899                         break;
11900                 start = cache->key.objectid + cache->key.offset;
11901                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11902                                         &cache->key, &cache->item,
11903                                         sizeof(cache->item));
11904                 if (ret) {
11905                         fprintf(stderr, "Error adding block group\n");
11906                         return ret;
11907                 }
11908                 btrfs_extent_post_op(trans, fs_info->extent_root);
11909         }
11910
11911         ret = reset_balance(trans, fs_info);
11912         if (ret)
11913                 fprintf(stderr, "error resetting the pending balance\n");
11914
11915         return ret;
11916 }
11917
11918 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11919 {
11920         struct btrfs_path path;
11921         struct btrfs_trans_handle *trans;
11922         struct btrfs_key key;
11923         int ret;
11924
11925         printf("Recowing metadata block %llu\n", eb->start);
11926         key.objectid = btrfs_header_owner(eb);
11927         key.type = BTRFS_ROOT_ITEM_KEY;
11928         key.offset = (u64)-1;
11929
11930         root = btrfs_read_fs_root(root->fs_info, &key);
11931         if (IS_ERR(root)) {
11932                 fprintf(stderr, "Couldn't find owner root %llu\n",
11933                         key.objectid);
11934                 return PTR_ERR(root);
11935         }
11936
11937         trans = btrfs_start_transaction(root, 1);
11938         if (IS_ERR(trans))
11939                 return PTR_ERR(trans);
11940
11941         btrfs_init_path(&path);
11942         path.lowest_level = btrfs_header_level(eb);
11943         if (path.lowest_level)
11944                 btrfs_node_key_to_cpu(eb, &key, 0);
11945         else
11946                 btrfs_item_key_to_cpu(eb, &key, 0);
11947
11948         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11949         btrfs_commit_transaction(trans, root);
11950         btrfs_release_path(&path);
11951         return ret;
11952 }
11953
11954 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11955 {
11956         struct btrfs_path path;
11957         struct btrfs_trans_handle *trans;
11958         struct btrfs_key key;
11959         int ret;
11960
11961         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11962                bad->key.type, bad->key.offset);
11963         key.objectid = bad->root_id;
11964         key.type = BTRFS_ROOT_ITEM_KEY;
11965         key.offset = (u64)-1;
11966
11967         root = btrfs_read_fs_root(root->fs_info, &key);
11968         if (IS_ERR(root)) {
11969                 fprintf(stderr, "Couldn't find owner root %llu\n",
11970                         key.objectid);
11971                 return PTR_ERR(root);
11972         }
11973
11974         trans = btrfs_start_transaction(root, 1);
11975         if (IS_ERR(trans))
11976                 return PTR_ERR(trans);
11977
11978         btrfs_init_path(&path);
11979         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11980         if (ret) {
11981                 if (ret > 0)
11982                         ret = 0;
11983                 goto out;
11984         }
11985         ret = btrfs_del_item(trans, root, &path);
11986 out:
11987         btrfs_commit_transaction(trans, root);
11988         btrfs_release_path(&path);
11989         return ret;
11990 }
11991
11992 static int zero_log_tree(struct btrfs_root *root)
11993 {
11994         struct btrfs_trans_handle *trans;
11995         int ret;
11996
11997         trans = btrfs_start_transaction(root, 1);
11998         if (IS_ERR(trans)) {
11999                 ret = PTR_ERR(trans);
12000                 return ret;
12001         }
12002         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12003         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12004         ret = btrfs_commit_transaction(trans, root);
12005         return ret;
12006 }
12007
12008 static int populate_csum(struct btrfs_trans_handle *trans,
12009                          struct btrfs_root *csum_root, char *buf, u64 start,
12010                          u64 len)
12011 {
12012         u64 offset = 0;
12013         u64 sectorsize;
12014         int ret = 0;
12015
12016         while (offset < len) {
12017                 sectorsize = csum_root->sectorsize;
12018                 ret = read_extent_data(csum_root, buf, start + offset,
12019                                        &sectorsize, 0);
12020                 if (ret)
12021                         break;
12022                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12023                                             start + offset, buf, sectorsize);
12024                 if (ret)
12025                         break;
12026                 offset += sectorsize;
12027         }
12028         return ret;
12029 }
12030
12031 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12032                                       struct btrfs_root *csum_root,
12033                                       struct btrfs_root *cur_root)
12034 {
12035         struct btrfs_path path;
12036         struct btrfs_key key;
12037         struct extent_buffer *node;
12038         struct btrfs_file_extent_item *fi;
12039         char *buf = NULL;
12040         u64 start = 0;
12041         u64 len = 0;
12042         int slot = 0;
12043         int ret = 0;
12044
12045         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12046         if (!buf)
12047                 return -ENOMEM;
12048
12049         btrfs_init_path(&path);
12050         key.objectid = 0;
12051         key.offset = 0;
12052         key.type = 0;
12053         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12054         if (ret < 0)
12055                 goto out;
12056         /* Iterate all regular file extents and fill its csum */
12057         while (1) {
12058                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12059
12060                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12061                         goto next;
12062                 node = path.nodes[0];
12063                 slot = path.slots[0];
12064                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12065                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12066                         goto next;
12067                 start = btrfs_file_extent_disk_bytenr(node, fi);
12068                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12069
12070                 ret = populate_csum(trans, csum_root, buf, start, len);
12071                 if (ret == -EEXIST)
12072                         ret = 0;
12073                 if (ret < 0)
12074                         goto out;
12075 next:
12076                 /*
12077                  * TODO: if next leaf is corrupted, jump to nearest next valid
12078                  * leaf.
12079                  */
12080                 ret = btrfs_next_item(cur_root, &path);
12081                 if (ret < 0)
12082                         goto out;
12083                 if (ret > 0) {
12084                         ret = 0;
12085                         goto out;
12086                 }
12087         }
12088
12089 out:
12090         btrfs_release_path(&path);
12091         free(buf);
12092         return ret;
12093 }
12094
12095 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12096                                   struct btrfs_root *csum_root)
12097 {
12098         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12099         struct btrfs_path path;
12100         struct btrfs_root *tree_root = fs_info->tree_root;
12101         struct btrfs_root *cur_root;
12102         struct extent_buffer *node;
12103         struct btrfs_key key;
12104         int slot = 0;
12105         int ret = 0;
12106
12107         btrfs_init_path(&path);
12108         key.objectid = BTRFS_FS_TREE_OBJECTID;
12109         key.offset = 0;
12110         key.type = BTRFS_ROOT_ITEM_KEY;
12111         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12112         if (ret < 0)
12113                 goto out;
12114         if (ret > 0) {
12115                 ret = -ENOENT;
12116                 goto out;
12117         }
12118
12119         while (1) {
12120                 node = path.nodes[0];
12121                 slot = path.slots[0];
12122                 btrfs_item_key_to_cpu(node, &key, slot);
12123                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12124                         goto out;
12125                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12126                         goto next;
12127                 if (!is_fstree(key.objectid))
12128                         goto next;
12129                 key.offset = (u64)-1;
12130
12131                 cur_root = btrfs_read_fs_root(fs_info, &key);
12132                 if (IS_ERR(cur_root) || !cur_root) {
12133                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12134                                 key.objectid);
12135                         goto out;
12136                 }
12137                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12138                                 cur_root);
12139                 if (ret < 0)
12140                         goto out;
12141 next:
12142                 ret = btrfs_next_item(tree_root, &path);
12143                 if (ret > 0) {
12144                         ret = 0;
12145                         goto out;
12146                 }
12147                 if (ret < 0)
12148                         goto out;
12149         }
12150
12151 out:
12152         btrfs_release_path(&path);
12153         return ret;
12154 }
12155
12156 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12157                                       struct btrfs_root *csum_root)
12158 {
12159         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12160         struct btrfs_path path;
12161         struct btrfs_extent_item *ei;
12162         struct extent_buffer *leaf;
12163         char *buf;
12164         struct btrfs_key key;
12165         int ret;
12166
12167         btrfs_init_path(&path);
12168         key.objectid = 0;
12169         key.type = BTRFS_EXTENT_ITEM_KEY;
12170         key.offset = 0;
12171         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12172         if (ret < 0) {
12173                 btrfs_release_path(&path);
12174                 return ret;
12175         }
12176
12177         buf = malloc(csum_root->sectorsize);
12178         if (!buf) {
12179                 btrfs_release_path(&path);
12180                 return -ENOMEM;
12181         }
12182
12183         while (1) {
12184                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12185                         ret = btrfs_next_leaf(extent_root, &path);
12186                         if (ret < 0)
12187                                 break;
12188                         if (ret) {
12189                                 ret = 0;
12190                                 break;
12191                         }
12192                 }
12193                 leaf = path.nodes[0];
12194
12195                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12196                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12197                         path.slots[0]++;
12198                         continue;
12199                 }
12200
12201                 ei = btrfs_item_ptr(leaf, path.slots[0],
12202                                     struct btrfs_extent_item);
12203                 if (!(btrfs_extent_flags(leaf, ei) &
12204                       BTRFS_EXTENT_FLAG_DATA)) {
12205                         path.slots[0]++;
12206                         continue;
12207                 }
12208
12209                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12210                                     key.offset);
12211                 if (ret)
12212                         break;
12213                 path.slots[0]++;
12214         }
12215
12216         btrfs_release_path(&path);
12217         free(buf);
12218         return ret;
12219 }
12220
12221 /*
12222  * Recalculate the csum and put it into the csum tree.
12223  *
12224  * Extent tree init will wipe out all the extent info, so in that case, we
12225  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12226  * will use fs/subvol trees to init the csum tree.
12227  */
12228 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12229                           struct btrfs_root *csum_root,
12230                           int search_fs_tree)
12231 {
12232         if (search_fs_tree)
12233                 return fill_csum_tree_from_fs(trans, csum_root);
12234         else
12235                 return fill_csum_tree_from_extent(trans, csum_root);
12236 }
12237
12238 static void free_roots_info_cache(void)
12239 {
12240         if (!roots_info_cache)
12241                 return;
12242
12243         while (!cache_tree_empty(roots_info_cache)) {
12244                 struct cache_extent *entry;
12245                 struct root_item_info *rii;
12246
12247                 entry = first_cache_extent(roots_info_cache);
12248                 if (!entry)
12249                         break;
12250                 remove_cache_extent(roots_info_cache, entry);
12251                 rii = container_of(entry, struct root_item_info, cache_extent);
12252                 free(rii);
12253         }
12254
12255         free(roots_info_cache);
12256         roots_info_cache = NULL;
12257 }
12258
12259 static int build_roots_info_cache(struct btrfs_fs_info *info)
12260 {
12261         int ret = 0;
12262         struct btrfs_key key;
12263         struct extent_buffer *leaf;
12264         struct btrfs_path path;
12265
12266         if (!roots_info_cache) {
12267                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12268                 if (!roots_info_cache)
12269                         return -ENOMEM;
12270                 cache_tree_init(roots_info_cache);
12271         }
12272
12273         btrfs_init_path(&path);
12274         key.objectid = 0;
12275         key.type = BTRFS_EXTENT_ITEM_KEY;
12276         key.offset = 0;
12277         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12278         if (ret < 0)
12279                 goto out;
12280         leaf = path.nodes[0];
12281
12282         while (1) {
12283                 struct btrfs_key found_key;
12284                 struct btrfs_extent_item *ei;
12285                 struct btrfs_extent_inline_ref *iref;
12286                 int slot = path.slots[0];
12287                 int type;
12288                 u64 flags;
12289                 u64 root_id;
12290                 u8 level;
12291                 struct cache_extent *entry;
12292                 struct root_item_info *rii;
12293
12294                 if (slot >= btrfs_header_nritems(leaf)) {
12295                         ret = btrfs_next_leaf(info->extent_root, &path);
12296                         if (ret < 0) {
12297                                 break;
12298                         } else if (ret) {
12299                                 ret = 0;
12300                                 break;
12301                         }
12302                         leaf = path.nodes[0];
12303                         slot = path.slots[0];
12304                 }
12305
12306                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12307
12308                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12309                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12310                         goto next;
12311
12312                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12313                 flags = btrfs_extent_flags(leaf, ei);
12314
12315                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12316                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12317                         goto next;
12318
12319                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12320                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12321                         level = found_key.offset;
12322                 } else {
12323                         struct btrfs_tree_block_info *binfo;
12324
12325                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12326                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12327                         level = btrfs_tree_block_level(leaf, binfo);
12328                 }
12329
12330                 /*
12331                  * For a root extent, it must be of the following type and the
12332                  * first (and only one) iref in the item.
12333                  */
12334                 type = btrfs_extent_inline_ref_type(leaf, iref);
12335                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12336                         goto next;
12337
12338                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12339                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12340                 if (!entry) {
12341                         rii = malloc(sizeof(struct root_item_info));
12342                         if (!rii) {
12343                                 ret = -ENOMEM;
12344                                 goto out;
12345                         }
12346                         rii->cache_extent.start = root_id;
12347                         rii->cache_extent.size = 1;
12348                         rii->level = (u8)-1;
12349                         entry = &rii->cache_extent;
12350                         ret = insert_cache_extent(roots_info_cache, entry);
12351                         ASSERT(ret == 0);
12352                 } else {
12353                         rii = container_of(entry, struct root_item_info,
12354                                            cache_extent);
12355                 }
12356
12357                 ASSERT(rii->cache_extent.start == root_id);
12358                 ASSERT(rii->cache_extent.size == 1);
12359
12360                 if (level > rii->level || rii->level == (u8)-1) {
12361                         rii->level = level;
12362                         rii->bytenr = found_key.objectid;
12363                         rii->gen = btrfs_extent_generation(leaf, ei);
12364                         rii->node_count = 1;
12365                 } else if (level == rii->level) {
12366                         rii->node_count++;
12367                 }
12368 next:
12369                 path.slots[0]++;
12370         }
12371
12372 out:
12373         btrfs_release_path(&path);
12374
12375         return ret;
12376 }
12377
12378 static int maybe_repair_root_item(struct btrfs_path *path,
12379                                   const struct btrfs_key *root_key,
12380                                   const int read_only_mode)
12381 {
12382         const u64 root_id = root_key->objectid;
12383         struct cache_extent *entry;
12384         struct root_item_info *rii;
12385         struct btrfs_root_item ri;
12386         unsigned long offset;
12387
12388         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12389         if (!entry) {
12390                 fprintf(stderr,
12391                         "Error: could not find extent items for root %llu\n",
12392                         root_key->objectid);
12393                 return -ENOENT;
12394         }
12395
12396         rii = container_of(entry, struct root_item_info, cache_extent);
12397         ASSERT(rii->cache_extent.start == root_id);
12398         ASSERT(rii->cache_extent.size == 1);
12399
12400         if (rii->node_count != 1) {
12401                 fprintf(stderr,
12402                         "Error: could not find btree root extent for root %llu\n",
12403                         root_id);
12404                 return -ENOENT;
12405         }
12406
12407         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12408         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12409
12410         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12411             btrfs_root_level(&ri) != rii->level ||
12412             btrfs_root_generation(&ri) != rii->gen) {
12413
12414                 /*
12415                  * If we're in repair mode but our caller told us to not update
12416                  * the root item, i.e. just check if it needs to be updated, don't
12417                  * print this message, since the caller will call us again shortly
12418                  * for the same root item without read only mode (the caller will
12419                  * open a transaction first).
12420                  */
12421                 if (!(read_only_mode && repair))
12422                         fprintf(stderr,
12423                                 "%sroot item for root %llu,"
12424                                 " current bytenr %llu, current gen %llu, current level %u,"
12425                                 " new bytenr %llu, new gen %llu, new level %u\n",
12426                                 (read_only_mode ? "" : "fixing "),
12427                                 root_id,
12428                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12429                                 btrfs_root_level(&ri),
12430                                 rii->bytenr, rii->gen, rii->level);
12431
12432                 if (btrfs_root_generation(&ri) > rii->gen) {
12433                         fprintf(stderr,
12434                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12435                                 root_id, btrfs_root_generation(&ri), rii->gen);
12436                         return -EINVAL;
12437                 }
12438
12439                 if (!read_only_mode) {
12440                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12441                         btrfs_set_root_level(&ri, rii->level);
12442                         btrfs_set_root_generation(&ri, rii->gen);
12443                         write_extent_buffer(path->nodes[0], &ri,
12444                                             offset, sizeof(ri));
12445                 }
12446
12447                 return 1;
12448         }
12449
12450         return 0;
12451 }
12452
12453 /*
12454  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12455  * caused read-only snapshots to be corrupted if they were created at a moment
12456  * when the source subvolume/snapshot had orphan items. The issue was that the
12457  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12458  * node instead of the post orphan cleanup root node.
12459  * So this function, and its callees, just detects and fixes those cases. Even
12460  * though the regression was for read-only snapshots, this function applies to
12461  * any snapshot/subvolume root.
12462  * This must be run before any other repair code - not doing it so, makes other
12463  * repair code delete or modify backrefs in the extent tree for example, which
12464  * will result in an inconsistent fs after repairing the root items.
12465  */
12466 static int repair_root_items(struct btrfs_fs_info *info)
12467 {
12468         struct btrfs_path path;
12469         struct btrfs_key key;
12470         struct extent_buffer *leaf;
12471         struct btrfs_trans_handle *trans = NULL;
12472         int ret = 0;
12473         int bad_roots = 0;
12474         int need_trans = 0;
12475
12476         btrfs_init_path(&path);
12477
12478         ret = build_roots_info_cache(info);
12479         if (ret)
12480                 goto out;
12481
12482         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12483         key.type = BTRFS_ROOT_ITEM_KEY;
12484         key.offset = 0;
12485
12486 again:
12487         /*
12488          * Avoid opening and committing transactions if a leaf doesn't have
12489          * any root items that need to be fixed, so that we avoid rotating
12490          * backup roots unnecessarily.
12491          */
12492         if (need_trans) {
12493                 trans = btrfs_start_transaction(info->tree_root, 1);
12494                 if (IS_ERR(trans)) {
12495                         ret = PTR_ERR(trans);
12496                         goto out;
12497                 }
12498         }
12499
12500         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12501                                 0, trans ? 1 : 0);
12502         if (ret < 0)
12503                 goto out;
12504         leaf = path.nodes[0];
12505
12506         while (1) {
12507                 struct btrfs_key found_key;
12508
12509                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12510                         int no_more_keys = find_next_key(&path, &key);
12511
12512                         btrfs_release_path(&path);
12513                         if (trans) {
12514                                 ret = btrfs_commit_transaction(trans,
12515                                                                info->tree_root);
12516                                 trans = NULL;
12517                                 if (ret < 0)
12518                                         goto out;
12519                         }
12520                         need_trans = 0;
12521                         if (no_more_keys)
12522                                 break;
12523                         goto again;
12524                 }
12525
12526                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12527
12528                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12529                         goto next;
12530                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12531                         goto next;
12532
12533                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12534                 if (ret < 0)
12535                         goto out;
12536                 if (ret) {
12537                         if (!trans && repair) {
12538                                 need_trans = 1;
12539                                 key = found_key;
12540                                 btrfs_release_path(&path);
12541                                 goto again;
12542                         }
12543                         bad_roots++;
12544                 }
12545 next:
12546                 path.slots[0]++;
12547         }
12548         ret = 0;
12549 out:
12550         free_roots_info_cache();
12551         btrfs_release_path(&path);
12552         if (trans)
12553                 btrfs_commit_transaction(trans, info->tree_root);
12554         if (ret < 0)
12555                 return ret;
12556
12557         return bad_roots;
12558 }
12559
12560 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12561 {
12562         struct btrfs_trans_handle *trans;
12563         struct btrfs_block_group_cache *bg_cache;
12564         u64 current = 0;
12565         int ret = 0;
12566
12567         /* Clear all free space cache inodes and its extent data */
12568         while (1) {
12569                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12570                 if (!bg_cache)
12571                         break;
12572                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12573                 if (ret < 0)
12574                         return ret;
12575                 current = bg_cache->key.objectid + bg_cache->key.offset;
12576         }
12577
12578         /* Don't forget to set cache_generation to -1 */
12579         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12580         if (IS_ERR(trans)) {
12581                 error("failed to update super block cache generation");
12582                 return PTR_ERR(trans);
12583         }
12584         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12585         btrfs_commit_transaction(trans, fs_info->tree_root);
12586
12587         return ret;
12588 }
12589
12590 const char * const cmd_check_usage[] = {
12591         "btrfs check [options] <device>",
12592         "Check structural integrity of a filesystem (unmounted).",
12593         "Check structural integrity of an unmounted filesystem. Verify internal",
12594         "trees' consistency and item connectivity. In the repair mode try to",
12595         "fix the problems found. ",
12596         "WARNING: the repair mode is considered dangerous",
12597         "",
12598         "-s|--super <superblock>     use this superblock copy",
12599         "-b|--backup                 use the first valid backup root copy",
12600         "--repair                    try to repair the filesystem",
12601         "--readonly                  run in read-only mode (default)",
12602         "--init-csum-tree            create a new CRC tree",
12603         "--init-extent-tree          create a new extent tree",
12604         "--mode <MODE>               allows choice of memory/IO trade-offs",
12605         "                            where MODE is one of:",
12606         "                            original - read inodes and extents to memory (requires",
12607         "                                       more memory, does less IO)",
12608         "                            lowmem   - try to use less memory but read blocks again",
12609         "                                       when needed",
12610         "--check-data-csum           verify checksums of data blocks",
12611         "-Q|--qgroup-report          print a report on qgroup consistency",
12612         "-E|--subvol-extents <subvolid>",
12613         "                            print subvolume extents and sharing state",
12614         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12615         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12616         "-p|--progress               indicate progress",
12617         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12618         NULL
12619 };
12620
12621 int cmd_check(int argc, char **argv)
12622 {
12623         struct cache_tree root_cache;
12624         struct btrfs_root *root;
12625         struct btrfs_fs_info *info;
12626         u64 bytenr = 0;
12627         u64 subvolid = 0;
12628         u64 tree_root_bytenr = 0;
12629         u64 chunk_root_bytenr = 0;
12630         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12631         int ret;
12632         int err = 0;
12633         u64 num;
12634         int init_csum_tree = 0;
12635         int readonly = 0;
12636         int clear_space_cache = 0;
12637         int qgroup_report = 0;
12638         int qgroups_repaired = 0;
12639         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12640
12641         while(1) {
12642                 int c;
12643                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12644                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12645                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12646                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12647                 static const struct option long_options[] = {
12648                         { "super", required_argument, NULL, 's' },
12649                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12650                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12651                         { "init-csum-tree", no_argument, NULL,
12652                                 GETOPT_VAL_INIT_CSUM },
12653                         { "init-extent-tree", no_argument, NULL,
12654                                 GETOPT_VAL_INIT_EXTENT },
12655                         { "check-data-csum", no_argument, NULL,
12656                                 GETOPT_VAL_CHECK_CSUM },
12657                         { "backup", no_argument, NULL, 'b' },
12658                         { "subvol-extents", required_argument, NULL, 'E' },
12659                         { "qgroup-report", no_argument, NULL, 'Q' },
12660                         { "tree-root", required_argument, NULL, 'r' },
12661                         { "chunk-root", required_argument, NULL,
12662                                 GETOPT_VAL_CHUNK_TREE },
12663                         { "progress", no_argument, NULL, 'p' },
12664                         { "mode", required_argument, NULL,
12665                                 GETOPT_VAL_MODE },
12666                         { "clear-space-cache", required_argument, NULL,
12667                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12668                         { NULL, 0, NULL, 0}
12669                 };
12670
12671                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12672                 if (c < 0)
12673                         break;
12674                 switch(c) {
12675                         case 'a': /* ignored */ break;
12676                         case 'b':
12677                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12678                                 break;
12679                         case 's':
12680                                 num = arg_strtou64(optarg);
12681                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12682                                         error(
12683                                         "super mirror should be less than %d",
12684                                                 BTRFS_SUPER_MIRROR_MAX);
12685                                         exit(1);
12686                                 }
12687                                 bytenr = btrfs_sb_offset(((int)num));
12688                                 printf("using SB copy %llu, bytenr %llu\n", num,
12689                                        (unsigned long long)bytenr);
12690                                 break;
12691                         case 'Q':
12692                                 qgroup_report = 1;
12693                                 break;
12694                         case 'E':
12695                                 subvolid = arg_strtou64(optarg);
12696                                 break;
12697                         case 'r':
12698                                 tree_root_bytenr = arg_strtou64(optarg);
12699                                 break;
12700                         case GETOPT_VAL_CHUNK_TREE:
12701                                 chunk_root_bytenr = arg_strtou64(optarg);
12702                                 break;
12703                         case 'p':
12704                                 ctx.progress_enabled = true;
12705                                 break;
12706                         case '?':
12707                         case 'h':
12708                                 usage(cmd_check_usage);
12709                         case GETOPT_VAL_REPAIR:
12710                                 printf("enabling repair mode\n");
12711                                 repair = 1;
12712                                 ctree_flags |= OPEN_CTREE_WRITES;
12713                                 break;
12714                         case GETOPT_VAL_READONLY:
12715                                 readonly = 1;
12716                                 break;
12717                         case GETOPT_VAL_INIT_CSUM:
12718                                 printf("Creating a new CRC tree\n");
12719                                 init_csum_tree = 1;
12720                                 repair = 1;
12721                                 ctree_flags |= OPEN_CTREE_WRITES;
12722                                 break;
12723                         case GETOPT_VAL_INIT_EXTENT:
12724                                 init_extent_tree = 1;
12725                                 ctree_flags |= (OPEN_CTREE_WRITES |
12726                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12727                                 repair = 1;
12728                                 break;
12729                         case GETOPT_VAL_CHECK_CSUM:
12730                                 check_data_csum = 1;
12731                                 break;
12732                         case GETOPT_VAL_MODE:
12733                                 check_mode = parse_check_mode(optarg);
12734                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12735                                         error("unknown mode: %s", optarg);
12736                                         exit(1);
12737                                 }
12738                                 break;
12739                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12740                                 if (strcmp(optarg, "v1") == 0) {
12741                                         clear_space_cache = 1;
12742                                 } else if (strcmp(optarg, "v2") == 0) {
12743                                         clear_space_cache = 2;
12744                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12745                                 } else {
12746                                         error(
12747                 "invalid argument to --clear-space-cache, must be v1 or v2");
12748                                         exit(1);
12749                                 }
12750                                 ctree_flags |= OPEN_CTREE_WRITES;
12751                                 break;
12752                 }
12753         }
12754
12755         if (check_argc_exact(argc - optind, 1))
12756                 usage(cmd_check_usage);
12757
12758         if (ctx.progress_enabled) {
12759                 ctx.tp = TASK_NOTHING;
12760                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12761         }
12762
12763         /* This check is the only reason for --readonly to exist */
12764         if (readonly && repair) {
12765                 error("repair options are not compatible with --readonly");
12766                 exit(1);
12767         }
12768
12769         /*
12770          * Not supported yet
12771          */
12772         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12773                 error("low memory mode doesn't support repair yet");
12774                 exit(1);
12775         }
12776
12777         radix_tree_init();
12778         cache_tree_init(&root_cache);
12779
12780         if((ret = check_mounted(argv[optind])) < 0) {
12781                 error("could not check mount status: %s", strerror(-ret));
12782                 err |= !!ret;
12783                 goto err_out;
12784         } else if(ret) {
12785                 error("%s is currently mounted, aborting", argv[optind]);
12786                 ret = -EBUSY;
12787                 err |= !!ret;
12788                 goto err_out;
12789         }
12790
12791         /* only allow partial opening under repair mode */
12792         if (repair)
12793                 ctree_flags |= OPEN_CTREE_PARTIAL;
12794
12795         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12796                                   chunk_root_bytenr, ctree_flags);
12797         if (!info) {
12798                 error("cannot open file system");
12799                 ret = -EIO;
12800                 err |= !!ret;
12801                 goto err_out;
12802         }
12803
12804         global_info = info;
12805         root = info->fs_root;
12806         if (clear_space_cache == 1) {
12807                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12808                         error(
12809                 "free space cache v2 detected, use --clear-space-cache v2");
12810                         ret = 1;
12811                         goto close_out;
12812                 }
12813                 printf("Clearing free space cache\n");
12814                 ret = clear_free_space_cache(info);
12815                 if (ret) {
12816                         error("failed to clear free space cache");
12817                         ret = 1;
12818                 } else {
12819                         printf("Free space cache cleared\n");
12820                 }
12821                 goto close_out;
12822         } else if (clear_space_cache == 2) {
12823                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12824                         printf("no free space cache v2 to clear\n");
12825                         ret = 0;
12826                         goto close_out;
12827                 }
12828                 printf("Clear free space cache v2\n");
12829                 ret = btrfs_clear_free_space_tree(info);
12830                 if (ret) {
12831                         error("failed to clear free space cache v2: %d", ret);
12832                         ret = 1;
12833                 } else {
12834                         printf("free space cache v2 cleared\n");
12835                 }
12836                 goto close_out;
12837         }
12838
12839         /*
12840          * repair mode will force us to commit transaction which
12841          * will make us fail to load log tree when mounting.
12842          */
12843         if (repair && btrfs_super_log_root(info->super_copy)) {
12844                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12845                 if (!ret) {
12846                         ret = 1;
12847                         err |= !!ret;
12848                         goto close_out;
12849                 }
12850                 ret = zero_log_tree(root);
12851                 err |= !!ret;
12852                 if (ret) {
12853                         error("failed to zero log tree: %d", ret);
12854                         goto close_out;
12855                 }
12856         }
12857
12858         uuid_unparse(info->super_copy->fsid, uuidbuf);
12859         if (qgroup_report) {
12860                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12861                        uuidbuf);
12862                 ret = qgroup_verify_all(info);
12863                 err |= !!ret;
12864                 if (ret == 0)
12865                         report_qgroups(1);
12866                 goto close_out;
12867         }
12868         if (subvolid) {
12869                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12870                        subvolid, argv[optind], uuidbuf);
12871                 ret = print_extent_state(info, subvolid);
12872                 err |= !!ret;
12873                 goto close_out;
12874         }
12875         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12876
12877         if (!extent_buffer_uptodate(info->tree_root->node) ||
12878             !extent_buffer_uptodate(info->dev_root->node) ||
12879             !extent_buffer_uptodate(info->chunk_root->node)) {
12880                 error("critical roots corrupted, unable to check the filesystem");
12881                 err |= !!ret;
12882                 ret = -EIO;
12883                 goto close_out;
12884         }
12885
12886         if (init_extent_tree || init_csum_tree) {
12887                 struct btrfs_trans_handle *trans;
12888
12889                 trans = btrfs_start_transaction(info->extent_root, 0);
12890                 if (IS_ERR(trans)) {
12891                         error("error starting transaction");
12892                         ret = PTR_ERR(trans);
12893                         err |= !!ret;
12894                         goto close_out;
12895                 }
12896
12897                 if (init_extent_tree) {
12898                         printf("Creating a new extent tree\n");
12899                         ret = reinit_extent_tree(trans, info);
12900                         err |= !!ret;
12901                         if (ret)
12902                                 goto close_out;
12903                 }
12904
12905                 if (init_csum_tree) {
12906                         printf("Reinitialize checksum tree\n");
12907                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12908                         if (ret) {
12909                                 error("checksum tree initialization failed: %d",
12910                                                 ret);
12911                                 ret = -EIO;
12912                                 err |= !!ret;
12913                                 goto close_out;
12914                         }
12915
12916                         ret = fill_csum_tree(trans, info->csum_root,
12917                                              init_extent_tree);
12918                         err |= !!ret;
12919                         if (ret) {
12920                                 error("checksum tree refilling failed: %d", ret);
12921                                 return -EIO;
12922                         }
12923                 }
12924                 /*
12925                  * Ok now we commit and run the normal fsck, which will add
12926                  * extent entries for all of the items it finds.
12927                  */
12928                 ret = btrfs_commit_transaction(trans, info->extent_root);
12929                 err |= !!ret;
12930                 if (ret)
12931                         goto close_out;
12932         }
12933         if (!extent_buffer_uptodate(info->extent_root->node)) {
12934                 error("critical: extent_root, unable to check the filesystem");
12935                 ret = -EIO;
12936                 err |= !!ret;
12937                 goto close_out;
12938         }
12939         if (!extent_buffer_uptodate(info->csum_root->node)) {
12940                 error("critical: csum_root, unable to check the filesystem");
12941                 ret = -EIO;
12942                 err |= !!ret;
12943                 goto close_out;
12944         }
12945
12946         if (!ctx.progress_enabled)
12947                 fprintf(stderr, "checking extents\n");
12948         if (check_mode == CHECK_MODE_LOWMEM)
12949                 ret = check_chunks_and_extents_v2(root);
12950         else
12951                 ret = check_chunks_and_extents(root);
12952         err |= !!ret;
12953         if (ret)
12954                 error(
12955                 "errors found in extent allocation tree or chunk allocation");
12956
12957         ret = repair_root_items(info);
12958         err |= !!ret;
12959         if (ret < 0) {
12960                 error("failed to repair root items: %s", strerror(-ret));
12961                 goto close_out;
12962         }
12963         if (repair) {
12964                 fprintf(stderr, "Fixed %d roots.\n", ret);
12965                 ret = 0;
12966         } else if (ret > 0) {
12967                 fprintf(stderr,
12968                        "Found %d roots with an outdated root item.\n",
12969                        ret);
12970                 fprintf(stderr,
12971                         "Please run a filesystem check with the option --repair to fix them.\n");
12972                 ret = 1;
12973                 err |= !!ret;
12974                 goto close_out;
12975         }
12976
12977         if (!ctx.progress_enabled) {
12978                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12979                         fprintf(stderr, "checking free space tree\n");
12980                 else
12981                         fprintf(stderr, "checking free space cache\n");
12982         }
12983         ret = check_space_cache(root);
12984         err |= !!ret;
12985         if (ret) {
12986                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12987                         error("errors found in free space tree");
12988                 else
12989                         error("errors found in free space cache");
12990                 goto out;
12991         }
12992
12993         /*
12994          * We used to have to have these hole extents in between our real
12995          * extents so if we don't have this flag set we need to make sure there
12996          * are no gaps in the file extents for inodes, otherwise we can just
12997          * ignore it when this happens.
12998          */
12999         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13000         if (!ctx.progress_enabled)
13001                 fprintf(stderr, "checking fs roots\n");
13002         if (check_mode == CHECK_MODE_LOWMEM)
13003                 ret = check_fs_roots_v2(root->fs_info);
13004         else
13005                 ret = check_fs_roots(root, &root_cache);
13006         err |= !!ret;
13007         if (ret) {
13008                 error("errors found in fs roots");
13009                 goto out;
13010         }
13011
13012         fprintf(stderr, "checking csums\n");
13013         ret = check_csums(root);
13014         err |= !!ret;
13015         if (ret) {
13016                 error("errors found in csum tree");
13017                 goto out;
13018         }
13019
13020         fprintf(stderr, "checking root refs\n");
13021         /* For low memory mode, check_fs_roots_v2 handles root refs */
13022         if (check_mode != CHECK_MODE_LOWMEM) {
13023                 ret = check_root_refs(root, &root_cache);
13024                 err |= !!ret;
13025                 if (ret) {
13026                         error("errors found in root refs");
13027                         goto out;
13028                 }
13029         }
13030
13031         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13032                 struct extent_buffer *eb;
13033
13034                 eb = list_first_entry(&root->fs_info->recow_ebs,
13035                                       struct extent_buffer, recow);
13036                 list_del_init(&eb->recow);
13037                 ret = recow_extent_buffer(root, eb);
13038                 err |= !!ret;
13039                 if (ret) {
13040                         error("fails to fix transid errors");
13041                         break;
13042                 }
13043         }
13044
13045         while (!list_empty(&delete_items)) {
13046                 struct bad_item *bad;
13047
13048                 bad = list_first_entry(&delete_items, struct bad_item, list);
13049                 list_del_init(&bad->list);
13050                 if (repair) {
13051                         ret = delete_bad_item(root, bad);
13052                         err |= !!ret;
13053                 }
13054                 free(bad);
13055         }
13056
13057         if (info->quota_enabled) {
13058                 fprintf(stderr, "checking quota groups\n");
13059                 ret = qgroup_verify_all(info);
13060                 err |= !!ret;
13061                 if (ret) {
13062                         error("failed to check quota groups");
13063                         goto out;
13064                 }
13065                 report_qgroups(0);
13066                 ret = repair_qgroups(info, &qgroups_repaired);
13067                 err |= !!ret;
13068                 if (err) {
13069                         error("failed to repair quota groups");
13070                         goto out;
13071                 }
13072                 ret = 0;
13073         }
13074
13075         if (!list_empty(&root->fs_info->recow_ebs)) {
13076                 error("transid errors in file system");
13077                 ret = 1;
13078                 err |= !!ret;
13079         }
13080 out:
13081         if (found_old_backref) { /*
13082                  * there was a disk format change when mixed
13083                  * backref was in testing tree. The old format
13084                  * existed about one week.
13085                  */
13086                 printf("\n * Found old mixed backref format. "
13087                        "The old format is not supported! *"
13088                        "\n * Please mount the FS in readonly mode, "
13089                        "backup data and re-format the FS. *\n\n");
13090                 err |= 1;
13091         }
13092         printf("found %llu bytes used, ",
13093                (unsigned long long)bytes_used);
13094         if (err)
13095                 printf("error(s) found\n");
13096         else
13097                 printf("no error found\n");
13098         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13099         printf("total tree bytes: %llu\n",
13100                (unsigned long long)total_btree_bytes);
13101         printf("total fs tree bytes: %llu\n",
13102                (unsigned long long)total_fs_tree_bytes);
13103         printf("total extent tree bytes: %llu\n",
13104                (unsigned long long)total_extent_tree_bytes);
13105         printf("btree space waste bytes: %llu\n",
13106                (unsigned long long)btree_space_waste);
13107         printf("file data blocks allocated: %llu\n referenced %llu\n",
13108                 (unsigned long long)data_bytes_allocated,
13109                 (unsigned long long)data_bytes_referenced);
13110
13111         free_qgroup_counts();
13112         free_root_recs_tree(&root_cache);
13113 close_out:
13114         close_ctree(root);
13115 err_out:
13116         if (ctx.progress_enabled)
13117                 task_deinit(ctx.info);
13118
13119         return err;
13120 }