btrfs-progs: check: Fix false alert about EXTENT_DATA that shouldn't be a hole
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (cur + sizeof(*di) + name_len > total ||
1516                     name_len > BTRFS_NAME_LEN) {
1517                         error = REF_ERR_NAME_TOO_LONG;
1518
1519                         if (cur + sizeof(*di) > total)
1520                                 break;
1521                         len = min_t(u32, total - cur - sizeof(*di),
1522                                     BTRFS_NAME_LEN);
1523                 } else {
1524                         len = name_len;
1525                         error = 0;
1526                 }
1527
1528                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1529
1530                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1531                         add_inode_backref(inode_cache, location.objectid,
1532                                           key->objectid, key->offset, namebuf,
1533                                           len, filetype, key->type, error);
1534                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1535                         add_inode_backref(root_cache, location.objectid,
1536                                           key->objectid, key->offset,
1537                                           namebuf, len, filetype,
1538                                           key->type, error);
1539                 } else {
1540                         fprintf(stderr, "invalid location in dir item %u\n",
1541                                 location.type);
1542                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1543                                           key->objectid, key->offset, namebuf,
1544                                           len, filetype, key->type, error);
1545                 }
1546
1547                 len = sizeof(*di) + name_len + data_len;
1548                 di = (struct btrfs_dir_item *)((char *)di + len);
1549                 cur += len;
1550         }
1551         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1552                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1553
1554         return 0;
1555 }
1556
1557 static int process_inode_ref(struct extent_buffer *eb,
1558                              int slot, struct btrfs_key *key,
1559                              struct shared_node *active_node)
1560 {
1561         u32 total;
1562         u32 cur = 0;
1563         u32 len;
1564         u32 name_len;
1565         u64 index;
1566         int error;
1567         struct cache_tree *inode_cache;
1568         struct btrfs_inode_ref *ref;
1569         char namebuf[BTRFS_NAME_LEN];
1570
1571         inode_cache = &active_node->inode_cache;
1572
1573         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1574         total = btrfs_item_size_nr(eb, slot);
1575         while (cur < total) {
1576                 name_len = btrfs_inode_ref_name_len(eb, ref);
1577                 index = btrfs_inode_ref_index(eb, ref);
1578
1579                 /* inode_ref + namelen should not cross item boundary */
1580                 if (cur + sizeof(*ref) + name_len > total ||
1581                     name_len > BTRFS_NAME_LEN) {
1582                         if (total < cur + sizeof(*ref))
1583                                 break;
1584
1585                         /* Still try to read out the remaining part */
1586                         len = min_t(u32, total - cur - sizeof(*ref),
1587                                     BTRFS_NAME_LEN);
1588                         error = REF_ERR_NAME_TOO_LONG;
1589                 } else {
1590                         len = name_len;
1591                         error = 0;
1592                 }
1593
1594                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1595                 add_inode_backref(inode_cache, key->objectid, key->offset,
1596                                   index, namebuf, len, 0, key->type, error);
1597
1598                 len = sizeof(*ref) + name_len;
1599                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1600                 cur += len;
1601         }
1602         return 0;
1603 }
1604
1605 static int process_inode_extref(struct extent_buffer *eb,
1606                                 int slot, struct btrfs_key *key,
1607                                 struct shared_node *active_node)
1608 {
1609         u32 total;
1610         u32 cur = 0;
1611         u32 len;
1612         u32 name_len;
1613         u64 index;
1614         u64 parent;
1615         int error;
1616         struct cache_tree *inode_cache;
1617         struct btrfs_inode_extref *extref;
1618         char namebuf[BTRFS_NAME_LEN];
1619
1620         inode_cache = &active_node->inode_cache;
1621
1622         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1623         total = btrfs_item_size_nr(eb, slot);
1624         while (cur < total) {
1625                 name_len = btrfs_inode_extref_name_len(eb, extref);
1626                 index = btrfs_inode_extref_index(eb, extref);
1627                 parent = btrfs_inode_extref_parent(eb, extref);
1628                 if (name_len <= BTRFS_NAME_LEN) {
1629                         len = name_len;
1630                         error = 0;
1631                 } else {
1632                         len = BTRFS_NAME_LEN;
1633                         error = REF_ERR_NAME_TOO_LONG;
1634                 }
1635                 read_extent_buffer(eb, namebuf,
1636                                    (unsigned long)(extref + 1), len);
1637                 add_inode_backref(inode_cache, key->objectid, parent,
1638                                   index, namebuf, len, 0, key->type, error);
1639
1640                 len = sizeof(*extref) + name_len;
1641                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1642                 cur += len;
1643         }
1644         return 0;
1645
1646 }
1647
1648 static int count_csum_range(struct btrfs_root *root, u64 start,
1649                             u64 len, u64 *found)
1650 {
1651         struct btrfs_key key;
1652         struct btrfs_path path;
1653         struct extent_buffer *leaf;
1654         int ret;
1655         size_t size;
1656         *found = 0;
1657         u64 csum_end;
1658         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1659
1660         btrfs_init_path(&path);
1661
1662         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1663         key.offset = start;
1664         key.type = BTRFS_EXTENT_CSUM_KEY;
1665
1666         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1667                                 &key, &path, 0, 0);
1668         if (ret < 0)
1669                 goto out;
1670         if (ret > 0 && path.slots[0] > 0) {
1671                 leaf = path.nodes[0];
1672                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1673                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1674                     key.type == BTRFS_EXTENT_CSUM_KEY)
1675                         path.slots[0]--;
1676         }
1677
1678         while (len > 0) {
1679                 leaf = path.nodes[0];
1680                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1681                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1682                         if (ret > 0)
1683                                 break;
1684                         else if (ret < 0)
1685                                 goto out;
1686                         leaf = path.nodes[0];
1687                 }
1688
1689                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1690                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1691                     key.type != BTRFS_EXTENT_CSUM_KEY)
1692                         break;
1693
1694                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1695                 if (key.offset >= start + len)
1696                         break;
1697
1698                 if (key.offset > start)
1699                         start = key.offset;
1700
1701                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1702                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1703                 if (csum_end > start) {
1704                         size = min(csum_end - start, len);
1705                         len -= size;
1706                         start += size;
1707                         *found += size;
1708                 }
1709
1710                 path.slots[0]++;
1711         }
1712 out:
1713         btrfs_release_path(&path);
1714         if (ret < 0)
1715                 return ret;
1716         return 0;
1717 }
1718
1719 static int process_file_extent(struct btrfs_root *root,
1720                                 struct extent_buffer *eb,
1721                                 int slot, struct btrfs_key *key,
1722                                 struct shared_node *active_node)
1723 {
1724         struct inode_record *rec;
1725         struct btrfs_file_extent_item *fi;
1726         u64 num_bytes = 0;
1727         u64 disk_bytenr = 0;
1728         u64 extent_offset = 0;
1729         u64 mask = root->sectorsize - 1;
1730         int extent_type;
1731         int ret;
1732
1733         rec = active_node->current;
1734         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1735         rec->found_file_extent = 1;
1736
1737         if (rec->extent_start == (u64)-1) {
1738                 rec->extent_start = key->offset;
1739                 rec->extent_end = key->offset;
1740         }
1741
1742         if (rec->extent_end > key->offset)
1743                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1744         else if (rec->extent_end < key->offset) {
1745                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1746                                            key->offset - rec->extent_end);
1747                 if (ret < 0)
1748                         return ret;
1749         }
1750
1751         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1752         extent_type = btrfs_file_extent_type(eb, fi);
1753
1754         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1755                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1756                 if (num_bytes == 0)
1757                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1758                 rec->found_size += num_bytes;
1759                 num_bytes = (num_bytes + mask) & ~mask;
1760         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1761                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1762                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1763                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1764                 extent_offset = btrfs_file_extent_offset(eb, fi);
1765                 if (num_bytes == 0 || (num_bytes & mask))
1766                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767                 if (num_bytes + extent_offset >
1768                     btrfs_file_extent_ram_bytes(eb, fi))
1769                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1770                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1771                     (btrfs_file_extent_compression(eb, fi) ||
1772                      btrfs_file_extent_encryption(eb, fi) ||
1773                      btrfs_file_extent_other_encoding(eb, fi)))
1774                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775                 if (disk_bytenr > 0)
1776                         rec->found_size += num_bytes;
1777         } else {
1778                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1779         }
1780         rec->extent_end = key->offset + num_bytes;
1781
1782         /*
1783          * The data reloc tree will copy full extents into its inode and then
1784          * copy the corresponding csums.  Because the extent it copied could be
1785          * a preallocated extent that hasn't been written to yet there may be no
1786          * csums to copy, ergo we won't have csums for our file extent.  This is
1787          * ok so just don't bother checking csums if the inode belongs to the
1788          * data reloc tree.
1789          */
1790         if (disk_bytenr > 0 &&
1791             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1792                 u64 found;
1793                 if (btrfs_file_extent_compression(eb, fi))
1794                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1795                 else
1796                         disk_bytenr += extent_offset;
1797
1798                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1799                 if (ret < 0)
1800                         return ret;
1801                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1802                         if (found > 0)
1803                                 rec->found_csum_item = 1;
1804                         if (found < num_bytes)
1805                                 rec->some_csum_missing = 1;
1806                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1807                         if (found > 0)
1808                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1809                 }
1810         }
1811         return 0;
1812 }
1813
1814 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1815                             struct walk_control *wc)
1816 {
1817         struct btrfs_key key;
1818         u32 nritems;
1819         int i;
1820         int ret = 0;
1821         struct cache_tree *inode_cache;
1822         struct shared_node *active_node;
1823
1824         if (wc->root_level == wc->active_node &&
1825             btrfs_root_refs(&root->root_item) == 0)
1826                 return 0;
1827
1828         active_node = wc->nodes[wc->active_node];
1829         inode_cache = &active_node->inode_cache;
1830         nritems = btrfs_header_nritems(eb);
1831         for (i = 0; i < nritems; i++) {
1832                 btrfs_item_key_to_cpu(eb, &key, i);
1833
1834                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1835                         continue;
1836                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1837                         continue;
1838
1839                 if (active_node->current == NULL ||
1840                     active_node->current->ino < key.objectid) {
1841                         if (active_node->current) {
1842                                 active_node->current->checked = 1;
1843                                 maybe_free_inode_rec(inode_cache,
1844                                                      active_node->current);
1845                         }
1846                         active_node->current = get_inode_rec(inode_cache,
1847                                                              key.objectid, 1);
1848                         BUG_ON(IS_ERR(active_node->current));
1849                 }
1850                 switch (key.type) {
1851                 case BTRFS_DIR_ITEM_KEY:
1852                 case BTRFS_DIR_INDEX_KEY:
1853                         ret = process_dir_item(eb, i, &key, active_node);
1854                         break;
1855                 case BTRFS_INODE_REF_KEY:
1856                         ret = process_inode_ref(eb, i, &key, active_node);
1857                         break;
1858                 case BTRFS_INODE_EXTREF_KEY:
1859                         ret = process_inode_extref(eb, i, &key, active_node);
1860                         break;
1861                 case BTRFS_INODE_ITEM_KEY:
1862                         ret = process_inode_item(eb, i, &key, active_node);
1863                         break;
1864                 case BTRFS_EXTENT_DATA_KEY:
1865                         ret = process_file_extent(root, eb, i, &key,
1866                                                   active_node);
1867                         break;
1868                 default:
1869                         break;
1870                 };
1871         }
1872         return ret;
1873 }
1874
1875 struct node_refs {
1876         u64 bytenr[BTRFS_MAX_LEVEL];
1877         u64 refs[BTRFS_MAX_LEVEL];
1878         int need_check[BTRFS_MAX_LEVEL];
1879 };
1880
1881 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1882                              struct node_refs *nrefs, u64 level);
1883 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1884                             unsigned int ext_ref);
1885
1886 /*
1887  * Returns >0  Found error, not fatal, should continue
1888  * Returns <0  Fatal error, must exit the whole check
1889  * Returns 0   No errors found
1890  */
1891 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1892                                struct node_refs *nrefs, int *level, int ext_ref)
1893 {
1894         struct extent_buffer *cur = path->nodes[0];
1895         struct btrfs_key key;
1896         u64 cur_bytenr;
1897         u32 nritems;
1898         u64 first_ino = 0;
1899         int root_level = btrfs_header_level(root->node);
1900         int i;
1901         int ret = 0; /* Final return value */
1902         int err = 0; /* Positive error bitmap */
1903
1904         cur_bytenr = cur->start;
1905
1906         /* skip to first inode item or the first inode number change */
1907         nritems = btrfs_header_nritems(cur);
1908         for (i = 0; i < nritems; i++) {
1909                 btrfs_item_key_to_cpu(cur, &key, i);
1910                 if (i == 0)
1911                         first_ino = key.objectid;
1912                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1913                     (first_ino && first_ino != key.objectid))
1914                         break;
1915         }
1916         if (i == nritems) {
1917                 path->slots[0] = nritems;
1918                 return 0;
1919         }
1920         path->slots[0] = i;
1921
1922 again:
1923         err |= check_inode_item(root, path, ext_ref);
1924
1925         if (err & LAST_ITEM)
1926                 goto out;
1927
1928         /* still have inode items in thie leaf */
1929         if (cur->start == cur_bytenr)
1930                 goto again;
1931
1932         /*
1933          * we have switched to another leaf, above nodes may
1934          * have changed, here walk down the path, if a node
1935          * or leaf is shared, check whether we can skip this
1936          * node or leaf.
1937          */
1938         for (i = root_level; i >= 0; i--) {
1939                 if (path->nodes[i]->start == nrefs->bytenr[i])
1940                         continue;
1941
1942                 ret = update_nodes_refs(root,
1943                                 path->nodes[i]->start,
1944                                 nrefs, i);
1945                 if (ret)
1946                         goto out;
1947
1948                 if (!nrefs->need_check[i]) {
1949                         *level += 1;
1950                         break;
1951                 }
1952         }
1953
1954         for (i = 0; i < *level; i++) {
1955                 free_extent_buffer(path->nodes[i]);
1956                 path->nodes[i] = NULL;
1957         }
1958 out:
1959         err &= ~LAST_ITEM;
1960         if (err && !ret)
1961                 ret = err;
1962         return ret;
1963 }
1964
1965 static void reada_walk_down(struct btrfs_root *root,
1966                             struct extent_buffer *node, int slot)
1967 {
1968         u64 bytenr;
1969         u64 ptr_gen;
1970         u32 nritems;
1971         u32 blocksize;
1972         int i;
1973         int level;
1974
1975         level = btrfs_header_level(node);
1976         if (level != 1)
1977                 return;
1978
1979         nritems = btrfs_header_nritems(node);
1980         blocksize = root->nodesize;
1981         for (i = slot; i < nritems; i++) {
1982                 bytenr = btrfs_node_blockptr(node, i);
1983                 ptr_gen = btrfs_node_ptr_generation(node, i);
1984                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1985         }
1986 }
1987
1988 /*
1989  * Check the child node/leaf by the following condition:
1990  * 1. the first item key of the node/leaf should be the same with the one
1991  *    in parent.
1992  * 2. block in parent node should match the child node/leaf.
1993  * 3. generation of parent node and child's header should be consistent.
1994  *
1995  * Or the child node/leaf pointed by the key in parent is not valid.
1996  *
1997  * We hope to check leaf owner too, but since subvol may share leaves,
1998  * which makes leaf owner check not so strong, key check should be
1999  * sufficient enough for that case.
2000  */
2001 static int check_child_node(struct extent_buffer *parent, int slot,
2002                             struct extent_buffer *child)
2003 {
2004         struct btrfs_key parent_key;
2005         struct btrfs_key child_key;
2006         int ret = 0;
2007
2008         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2009         if (btrfs_header_level(child) == 0)
2010                 btrfs_item_key_to_cpu(child, &child_key, 0);
2011         else
2012                 btrfs_node_key_to_cpu(child, &child_key, 0);
2013
2014         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2015                 ret = -EINVAL;
2016                 fprintf(stderr,
2017                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2018                         parent_key.objectid, parent_key.type, parent_key.offset,
2019                         child_key.objectid, child_key.type, child_key.offset);
2020         }
2021         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2022                 ret = -EINVAL;
2023                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2024                         btrfs_node_blockptr(parent, slot),
2025                         btrfs_header_bytenr(child));
2026         }
2027         if (btrfs_node_ptr_generation(parent, slot) !=
2028             btrfs_header_generation(child)) {
2029                 ret = -EINVAL;
2030                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2031                         btrfs_header_generation(child),
2032                         btrfs_node_ptr_generation(parent, slot));
2033         }
2034         return ret;
2035 }
2036
2037 /*
2038  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2039  * in every fs or file tree check. Here we find its all root ids, and only check
2040  * it in the fs or file tree which has the smallest root id.
2041  */
2042 static int need_check(struct btrfs_root *root, struct ulist *roots)
2043 {
2044         struct rb_node *node;
2045         struct ulist_node *u;
2046
2047         if (roots->nnodes == 1)
2048                 return 1;
2049
2050         node = rb_first(&roots->root);
2051         u = rb_entry(node, struct ulist_node, rb_node);
2052         /*
2053          * current root id is not smallest, we skip it and let it be checked
2054          * in the fs or file tree who hash the smallest root id.
2055          */
2056         if (root->objectid != u->val)
2057                 return 0;
2058
2059         return 1;
2060 }
2061
2062 /*
2063  * for a tree node or leaf, we record its reference count, so later if we still
2064  * process this node or leaf, don't need to compute its reference count again.
2065  */
2066 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2067                              struct node_refs *nrefs, u64 level)
2068 {
2069         int check, ret;
2070         u64 refs;
2071         struct ulist *roots;
2072
2073         if (nrefs->bytenr[level] != bytenr) {
2074                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2075                                        level, 1, &refs, NULL);
2076                 if (ret < 0)
2077                         return ret;
2078
2079                 nrefs->bytenr[level] = bytenr;
2080                 nrefs->refs[level] = refs;
2081                 if (refs > 1) {
2082                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2083                                                    0, &roots);
2084                         if (ret)
2085                                 return -EIO;
2086
2087                         check = need_check(root, roots);
2088                         ulist_free(roots);
2089                         nrefs->need_check[level] = check;
2090                 } else {
2091                         nrefs->need_check[level] = 1;
2092                 }
2093         }
2094
2095         return 0;
2096 }
2097
2098 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2099                           struct walk_control *wc, int *level,
2100                           struct node_refs *nrefs)
2101 {
2102         enum btrfs_tree_block_status status;
2103         u64 bytenr;
2104         u64 ptr_gen;
2105         struct extent_buffer *next;
2106         struct extent_buffer *cur;
2107         u32 blocksize;
2108         int ret, err = 0;
2109         u64 refs;
2110
2111         WARN_ON(*level < 0);
2112         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2113
2114         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2115                 refs = nrefs->refs[*level];
2116                 ret = 0;
2117         } else {
2118                 ret = btrfs_lookup_extent_info(NULL, root,
2119                                        path->nodes[*level]->start,
2120                                        *level, 1, &refs, NULL);
2121                 if (ret < 0) {
2122                         err = ret;
2123                         goto out;
2124                 }
2125                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2126                 nrefs->refs[*level] = refs;
2127         }
2128
2129         if (refs > 1) {
2130                 ret = enter_shared_node(root, path->nodes[*level]->start,
2131                                         refs, wc, *level);
2132                 if (ret > 0) {
2133                         err = ret;
2134                         goto out;
2135                 }
2136         }
2137
2138         while (*level >= 0) {
2139                 WARN_ON(*level < 0);
2140                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2141                 cur = path->nodes[*level];
2142
2143                 if (btrfs_header_level(cur) != *level)
2144                         WARN_ON(1);
2145
2146                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2147                         break;
2148                 if (*level == 0) {
2149                         ret = process_one_leaf(root, cur, wc);
2150                         if (ret < 0)
2151                                 err = ret;
2152                         break;
2153                 }
2154                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2155                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2156                 blocksize = root->nodesize;
2157
2158                 if (bytenr == nrefs->bytenr[*level - 1]) {
2159                         refs = nrefs->refs[*level - 1];
2160                 } else {
2161                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2162                                         *level - 1, 1, &refs, NULL);
2163                         if (ret < 0) {
2164                                 refs = 0;
2165                         } else {
2166                                 nrefs->bytenr[*level - 1] = bytenr;
2167                                 nrefs->refs[*level - 1] = refs;
2168                         }
2169                 }
2170
2171                 if (refs > 1) {
2172                         ret = enter_shared_node(root, bytenr, refs,
2173                                                 wc, *level - 1);
2174                         if (ret > 0) {
2175                                 path->slots[*level]++;
2176                                 continue;
2177                         }
2178                 }
2179
2180                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2181                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2182                         free_extent_buffer(next);
2183                         reada_walk_down(root, cur, path->slots[*level]);
2184                         next = read_tree_block(root, bytenr, blocksize,
2185                                                ptr_gen);
2186                         if (!extent_buffer_uptodate(next)) {
2187                                 struct btrfs_key node_key;
2188
2189                                 btrfs_node_key_to_cpu(path->nodes[*level],
2190                                                       &node_key,
2191                                                       path->slots[*level]);
2192                                 btrfs_add_corrupt_extent_record(root->fs_info,
2193                                                 &node_key,
2194                                                 path->nodes[*level]->start,
2195                                                 root->nodesize, *level);
2196                                 err = -EIO;
2197                                 goto out;
2198                         }
2199                 }
2200
2201                 ret = check_child_node(cur, path->slots[*level], next);
2202                 if (ret) {
2203                         free_extent_buffer(next);
2204                         err = ret;
2205                         goto out;
2206                 }
2207
2208                 if (btrfs_is_leaf(next))
2209                         status = btrfs_check_leaf(root, NULL, next);
2210                 else
2211                         status = btrfs_check_node(root, NULL, next);
2212                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2213                         free_extent_buffer(next);
2214                         err = -EIO;
2215                         goto out;
2216                 }
2217
2218                 *level = *level - 1;
2219                 free_extent_buffer(path->nodes[*level]);
2220                 path->nodes[*level] = next;
2221                 path->slots[*level] = 0;
2222         }
2223 out:
2224         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2225         return err;
2226 }
2227
2228 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2229                             unsigned int ext_ref);
2230
2231 /*
2232  * Returns >0  Found error, should continue
2233  * Returns <0  Fatal error, must exit the whole check
2234  * Returns 0   No errors found
2235  */
2236 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2237                              int *level, struct node_refs *nrefs, int ext_ref)
2238 {
2239         enum btrfs_tree_block_status status;
2240         u64 bytenr;
2241         u64 ptr_gen;
2242         struct extent_buffer *next;
2243         struct extent_buffer *cur;
2244         u32 blocksize;
2245         int ret;
2246
2247         WARN_ON(*level < 0);
2248         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2249
2250         ret = update_nodes_refs(root, path->nodes[*level]->start,
2251                                 nrefs, *level);
2252         if (ret < 0)
2253                 return ret;
2254
2255         while (*level >= 0) {
2256                 WARN_ON(*level < 0);
2257                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2258                 cur = path->nodes[*level];
2259
2260                 if (btrfs_header_level(cur) != *level)
2261                         WARN_ON(1);
2262
2263                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2264                         break;
2265                 /* Don't forgot to check leaf/node validation */
2266                 if (*level == 0) {
2267                         ret = btrfs_check_leaf(root, NULL, cur);
2268                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2269                                 ret = -EIO;
2270                                 break;
2271                         }
2272                         ret = process_one_leaf_v2(root, path, nrefs,
2273                                                   level, ext_ref);
2274                         break;
2275                 } else {
2276                         ret = btrfs_check_node(root, NULL, cur);
2277                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2278                                 ret = -EIO;
2279                                 break;
2280                         }
2281                 }
2282                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2283                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2284                 blocksize = root->nodesize;
2285
2286                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2287                 if (ret)
2288                         break;
2289                 if (!nrefs->need_check[*level - 1]) {
2290                         path->slots[*level]++;
2291                         continue;
2292                 }
2293
2294                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2295                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2296                         free_extent_buffer(next);
2297                         reada_walk_down(root, cur, path->slots[*level]);
2298                         next = read_tree_block(root, bytenr, blocksize,
2299                                                ptr_gen);
2300                         if (!extent_buffer_uptodate(next)) {
2301                                 struct btrfs_key node_key;
2302
2303                                 btrfs_node_key_to_cpu(path->nodes[*level],
2304                                                       &node_key,
2305                                                       path->slots[*level]);
2306                                 btrfs_add_corrupt_extent_record(root->fs_info,
2307                                                 &node_key,
2308                                                 path->nodes[*level]->start,
2309                                                 root->nodesize, *level);
2310                                 ret = -EIO;
2311                                 break;
2312                         }
2313                 }
2314
2315                 ret = check_child_node(cur, path->slots[*level], next);
2316                 if (ret < 0) 
2317                         break;
2318
2319                 if (btrfs_is_leaf(next))
2320                         status = btrfs_check_leaf(root, NULL, next);
2321                 else
2322                         status = btrfs_check_node(root, NULL, next);
2323                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2324                         free_extent_buffer(next);
2325                         ret = -EIO;
2326                         break;
2327                 }
2328
2329                 *level = *level - 1;
2330                 free_extent_buffer(path->nodes[*level]);
2331                 path->nodes[*level] = next;
2332                 path->slots[*level] = 0;
2333         }
2334         return ret;
2335 }
2336
2337 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2338                         struct walk_control *wc, int *level)
2339 {
2340         int i;
2341         struct extent_buffer *leaf;
2342
2343         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2344                 leaf = path->nodes[i];
2345                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2346                         path->slots[i]++;
2347                         *level = i;
2348                         return 0;
2349                 } else {
2350                         free_extent_buffer(path->nodes[*level]);
2351                         path->nodes[*level] = NULL;
2352                         BUG_ON(*level > wc->active_node);
2353                         if (*level == wc->active_node)
2354                                 leave_shared_node(root, wc, *level);
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2362                            int *level)
2363 {
2364         int i;
2365         struct extent_buffer *leaf;
2366
2367         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2368                 leaf = path->nodes[i];
2369                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2370                         path->slots[i]++;
2371                         *level = i;
2372                         return 0;
2373                 } else {
2374                         free_extent_buffer(path->nodes[*level]);
2375                         path->nodes[*level] = NULL;
2376                         *level = i + 1;
2377                 }
2378         }
2379         return 1;
2380 }
2381
2382 static int check_root_dir(struct inode_record *rec)
2383 {
2384         struct inode_backref *backref;
2385         int ret = -1;
2386
2387         if (!rec->found_inode_item || rec->errors)
2388                 goto out;
2389         if (rec->nlink != 1 || rec->found_link != 0)
2390                 goto out;
2391         if (list_empty(&rec->backrefs))
2392                 goto out;
2393         backref = to_inode_backref(rec->backrefs.next);
2394         if (!backref->found_inode_ref)
2395                 goto out;
2396         if (backref->index != 0 || backref->namelen != 2 ||
2397             memcmp(backref->name, "..", 2))
2398                 goto out;
2399         if (backref->found_dir_index || backref->found_dir_item)
2400                 goto out;
2401         ret = 0;
2402 out:
2403         return ret;
2404 }
2405
2406 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2407                               struct btrfs_root *root, struct btrfs_path *path,
2408                               struct inode_record *rec)
2409 {
2410         struct btrfs_inode_item *ei;
2411         struct btrfs_key key;
2412         int ret;
2413
2414         key.objectid = rec->ino;
2415         key.type = BTRFS_INODE_ITEM_KEY;
2416         key.offset = (u64)-1;
2417
2418         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2419         if (ret < 0)
2420                 goto out;
2421         if (ret) {
2422                 if (!path->slots[0]) {
2423                         ret = -ENOENT;
2424                         goto out;
2425                 }
2426                 path->slots[0]--;
2427                 ret = 0;
2428         }
2429         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2430         if (key.objectid != rec->ino) {
2431                 ret = -ENOENT;
2432                 goto out;
2433         }
2434
2435         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2436                             struct btrfs_inode_item);
2437         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2438         btrfs_mark_buffer_dirty(path->nodes[0]);
2439         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2440         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2441                root->root_key.objectid);
2442 out:
2443         btrfs_release_path(path);
2444         return ret;
2445 }
2446
2447 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2448                                     struct btrfs_root *root,
2449                                     struct btrfs_path *path,
2450                                     struct inode_record *rec)
2451 {
2452         int ret;
2453
2454         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2455         btrfs_release_path(path);
2456         if (!ret)
2457                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2458         return ret;
2459 }
2460
2461 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2462                                struct btrfs_root *root,
2463                                struct btrfs_path *path,
2464                                struct inode_record *rec)
2465 {
2466         struct btrfs_inode_item *ei;
2467         struct btrfs_key key;
2468         int ret = 0;
2469
2470         key.objectid = rec->ino;
2471         key.type = BTRFS_INODE_ITEM_KEY;
2472         key.offset = 0;
2473
2474         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2475         if (ret) {
2476                 if (ret > 0)
2477                         ret = -ENOENT;
2478                 goto out;
2479         }
2480
2481         /* Since ret == 0, no need to check anything */
2482         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2483                             struct btrfs_inode_item);
2484         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2485         btrfs_mark_buffer_dirty(path->nodes[0]);
2486         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2487         printf("reset nbytes for ino %llu root %llu\n",
2488                rec->ino, root->root_key.objectid);
2489 out:
2490         btrfs_release_path(path);
2491         return ret;
2492 }
2493
2494 static int add_missing_dir_index(struct btrfs_root *root,
2495                                  struct cache_tree *inode_cache,
2496                                  struct inode_record *rec,
2497                                  struct inode_backref *backref)
2498 {
2499         struct btrfs_path path;
2500         struct btrfs_trans_handle *trans;
2501         struct btrfs_dir_item *dir_item;
2502         struct extent_buffer *leaf;
2503         struct btrfs_key key;
2504         struct btrfs_disk_key disk_key;
2505         struct inode_record *dir_rec;
2506         unsigned long name_ptr;
2507         u32 data_size = sizeof(*dir_item) + backref->namelen;
2508         int ret;
2509
2510         trans = btrfs_start_transaction(root, 1);
2511         if (IS_ERR(trans))
2512                 return PTR_ERR(trans);
2513
2514         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2515                 (unsigned long long)rec->ino);
2516
2517         btrfs_init_path(&path);
2518         key.objectid = backref->dir;
2519         key.type = BTRFS_DIR_INDEX_KEY;
2520         key.offset = backref->index;
2521         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2522         BUG_ON(ret);
2523
2524         leaf = path.nodes[0];
2525         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2526
2527         disk_key.objectid = cpu_to_le64(rec->ino);
2528         disk_key.type = BTRFS_INODE_ITEM_KEY;
2529         disk_key.offset = 0;
2530
2531         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2532         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2533         btrfs_set_dir_data_len(leaf, dir_item, 0);
2534         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2535         name_ptr = (unsigned long)(dir_item + 1);
2536         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2537         btrfs_mark_buffer_dirty(leaf);
2538         btrfs_release_path(&path);
2539         btrfs_commit_transaction(trans, root);
2540
2541         backref->found_dir_index = 1;
2542         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2543         BUG_ON(IS_ERR(dir_rec));
2544         if (!dir_rec)
2545                 return 0;
2546         dir_rec->found_size += backref->namelen;
2547         if (dir_rec->found_size == dir_rec->isize &&
2548             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2549                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2550         if (dir_rec->found_size != dir_rec->isize)
2551                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2552
2553         return 0;
2554 }
2555
2556 static int delete_dir_index(struct btrfs_root *root,
2557                             struct inode_backref *backref)
2558 {
2559         struct btrfs_trans_handle *trans;
2560         struct btrfs_dir_item *di;
2561         struct btrfs_path path;
2562         int ret = 0;
2563
2564         trans = btrfs_start_transaction(root, 1);
2565         if (IS_ERR(trans))
2566                 return PTR_ERR(trans);
2567
2568         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2569                 (unsigned long long)backref->dir,
2570                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2571                 (unsigned long long)root->objectid);
2572
2573         btrfs_init_path(&path);
2574         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2575                                     backref->name, backref->namelen,
2576                                     backref->index, -1);
2577         if (IS_ERR(di)) {
2578                 ret = PTR_ERR(di);
2579                 btrfs_release_path(&path);
2580                 btrfs_commit_transaction(trans, root);
2581                 if (ret == -ENOENT)
2582                         return 0;
2583                 return ret;
2584         }
2585
2586         if (!di)
2587                 ret = btrfs_del_item(trans, root, &path);
2588         else
2589                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2590         BUG_ON(ret);
2591         btrfs_release_path(&path);
2592         btrfs_commit_transaction(trans, root);
2593         return ret;
2594 }
2595
2596 static int create_inode_item(struct btrfs_root *root,
2597                              struct inode_record *rec,
2598                              int root_dir)
2599 {
2600         struct btrfs_trans_handle *trans;
2601         struct btrfs_inode_item inode_item;
2602         time_t now = time(NULL);
2603         int ret;
2604
2605         trans = btrfs_start_transaction(root, 1);
2606         if (IS_ERR(trans)) {
2607                 ret = PTR_ERR(trans);
2608                 return ret;
2609         }
2610
2611         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2612                 "be incomplete, please check permissions and content after "
2613                 "the fsck completes.\n", (unsigned long long)root->objectid,
2614                 (unsigned long long)rec->ino);
2615
2616         memset(&inode_item, 0, sizeof(inode_item));
2617         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2618         if (root_dir)
2619                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2620         else
2621                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2622         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2623         if (rec->found_dir_item) {
2624                 if (rec->found_file_extent)
2625                         fprintf(stderr, "root %llu inode %llu has both a dir "
2626                                 "item and extents, unsure if it is a dir or a "
2627                                 "regular file so setting it as a directory\n",
2628                                 (unsigned long long)root->objectid,
2629                                 (unsigned long long)rec->ino);
2630                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2631                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2632         } else if (!rec->found_dir_item) {
2633                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2634                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2635         }
2636         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2637         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2638         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2639         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2640         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2641         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2642         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2643         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2644
2645         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2646         BUG_ON(ret);
2647         btrfs_commit_transaction(trans, root);
2648         return 0;
2649 }
2650
2651 static int repair_inode_backrefs(struct btrfs_root *root,
2652                                  struct inode_record *rec,
2653                                  struct cache_tree *inode_cache,
2654                                  int delete)
2655 {
2656         struct inode_backref *tmp, *backref;
2657         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2658         int ret = 0;
2659         int repaired = 0;
2660
2661         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2662                 if (!delete && rec->ino == root_dirid) {
2663                         if (!rec->found_inode_item) {
2664                                 ret = create_inode_item(root, rec, 1);
2665                                 if (ret)
2666                                         break;
2667                                 repaired++;
2668                         }
2669                 }
2670
2671                 /* Index 0 for root dir's are special, don't mess with it */
2672                 if (rec->ino == root_dirid && backref->index == 0)
2673                         continue;
2674
2675                 if (delete &&
2676                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2677                      (backref->found_dir_index && backref->found_inode_ref &&
2678                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2679                         ret = delete_dir_index(root, backref);
2680                         if (ret)
2681                                 break;
2682                         repaired++;
2683                         list_del(&backref->list);
2684                         free(backref);
2685                         continue;
2686                 }
2687
2688                 if (!delete && !backref->found_dir_index &&
2689                     backref->found_dir_item && backref->found_inode_ref) {
2690                         ret = add_missing_dir_index(root, inode_cache, rec,
2691                                                     backref);
2692                         if (ret)
2693                                 break;
2694                         repaired++;
2695                         if (backref->found_dir_item &&
2696                             backref->found_dir_index) {
2697                                 if (!backref->errors &&
2698                                     backref->found_inode_ref) {
2699                                         list_del(&backref->list);
2700                                         free(backref);
2701                                         continue;
2702                                 }
2703                         }
2704                 }
2705
2706                 if (!delete && (!backref->found_dir_index &&
2707                                 !backref->found_dir_item &&
2708                                 backref->found_inode_ref)) {
2709                         struct btrfs_trans_handle *trans;
2710                         struct btrfs_key location;
2711
2712                         ret = check_dir_conflict(root, backref->name,
2713                                                  backref->namelen,
2714                                                  backref->dir,
2715                                                  backref->index);
2716                         if (ret) {
2717                                 /*
2718                                  * let nlink fixing routine to handle it,
2719                                  * which can do it better.
2720                                  */
2721                                 ret = 0;
2722                                 break;
2723                         }
2724                         location.objectid = rec->ino;
2725                         location.type = BTRFS_INODE_ITEM_KEY;
2726                         location.offset = 0;
2727
2728                         trans = btrfs_start_transaction(root, 1);
2729                         if (IS_ERR(trans)) {
2730                                 ret = PTR_ERR(trans);
2731                                 break;
2732                         }
2733                         fprintf(stderr, "adding missing dir index/item pair "
2734                                 "for inode %llu\n",
2735                                 (unsigned long long)rec->ino);
2736                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2737                                                     backref->namelen,
2738                                                     backref->dir, &location,
2739                                                     imode_to_type(rec->imode),
2740                                                     backref->index);
2741                         BUG_ON(ret);
2742                         btrfs_commit_transaction(trans, root);
2743                         repaired++;
2744                 }
2745
2746                 if (!delete && (backref->found_inode_ref &&
2747                                 backref->found_dir_index &&
2748                                 backref->found_dir_item &&
2749                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2750                                 !rec->found_inode_item)) {
2751                         ret = create_inode_item(root, rec, 0);
2752                         if (ret)
2753                                 break;
2754                         repaired++;
2755                 }
2756
2757         }
2758         return ret ? ret : repaired;
2759 }
2760
2761 /*
2762  * To determine the file type for nlink/inode_item repair
2763  *
2764  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2765  * Return -ENOENT if file type is not found.
2766  */
2767 static int find_file_type(struct inode_record *rec, u8 *type)
2768 {
2769         struct inode_backref *backref;
2770
2771         /* For inode item recovered case */
2772         if (rec->found_inode_item) {
2773                 *type = imode_to_type(rec->imode);
2774                 return 0;
2775         }
2776
2777         list_for_each_entry(backref, &rec->backrefs, list) {
2778                 if (backref->found_dir_index || backref->found_dir_item) {
2779                         *type = backref->filetype;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /*
2787  * To determine the file name for nlink repair
2788  *
2789  * Return 0 if file name is found, set name and namelen.
2790  * Return -ENOENT if file name is not found.
2791  */
2792 static int find_file_name(struct inode_record *rec,
2793                           char *name, int *namelen)
2794 {
2795         struct inode_backref *backref;
2796
2797         list_for_each_entry(backref, &rec->backrefs, list) {
2798                 if (backref->found_dir_index || backref->found_dir_item ||
2799                     backref->found_inode_ref) {
2800                         memcpy(name, backref->name, backref->namelen);
2801                         *namelen = backref->namelen;
2802                         return 0;
2803                 }
2804         }
2805         return -ENOENT;
2806 }
2807
2808 /* Reset the nlink of the inode to the correct one */
2809 static int reset_nlink(struct btrfs_trans_handle *trans,
2810                        struct btrfs_root *root,
2811                        struct btrfs_path *path,
2812                        struct inode_record *rec)
2813 {
2814         struct inode_backref *backref;
2815         struct inode_backref *tmp;
2816         struct btrfs_key key;
2817         struct btrfs_inode_item *inode_item;
2818         int ret = 0;
2819
2820         /* We don't believe this either, reset it and iterate backref */
2821         rec->found_link = 0;
2822
2823         /* Remove all backref including the valid ones */
2824         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2825                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2826                                    backref->index, backref->name,
2827                                    backref->namelen, 0);
2828                 if (ret < 0)
2829                         goto out;
2830
2831                 /* remove invalid backref, so it won't be added back */
2832                 if (!(backref->found_dir_index &&
2833                       backref->found_dir_item &&
2834                       backref->found_inode_ref)) {
2835                         list_del(&backref->list);
2836                         free(backref);
2837                 } else {
2838                         rec->found_link++;
2839                 }
2840         }
2841
2842         /* Set nlink to 0 */
2843         key.objectid = rec->ino;
2844         key.type = BTRFS_INODE_ITEM_KEY;
2845         key.offset = 0;
2846         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2847         if (ret < 0)
2848                 goto out;
2849         if (ret > 0) {
2850                 ret = -ENOENT;
2851                 goto out;
2852         }
2853         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2854                                     struct btrfs_inode_item);
2855         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2856         btrfs_mark_buffer_dirty(path->nodes[0]);
2857         btrfs_release_path(path);
2858
2859         /*
2860          * Add back valid inode_ref/dir_item/dir_index,
2861          * add_link() will handle the nlink inc, so new nlink must be correct
2862          */
2863         list_for_each_entry(backref, &rec->backrefs, list) {
2864                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2865                                      backref->name, backref->namelen,
2866                                      backref->filetype, &backref->index, 1);
2867                 if (ret < 0)
2868                         goto out;
2869         }
2870 out:
2871         btrfs_release_path(path);
2872         return ret;
2873 }
2874
2875 static int get_highest_inode(struct btrfs_trans_handle *trans,
2876                                 struct btrfs_root *root,
2877                                 struct btrfs_path *path,
2878                                 u64 *highest_ino)
2879 {
2880         struct btrfs_key key, found_key;
2881         int ret;
2882
2883         btrfs_init_path(path);
2884         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2885         key.offset = -1;
2886         key.type = BTRFS_INODE_ITEM_KEY;
2887         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2888         if (ret == 1) {
2889                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2890                                 path->slots[0] - 1);
2891                 *highest_ino = found_key.objectid;
2892                 ret = 0;
2893         }
2894         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2895                 ret = -EOVERFLOW;
2896         btrfs_release_path(path);
2897         return ret;
2898 }
2899
2900 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2901                                struct btrfs_root *root,
2902                                struct btrfs_path *path,
2903                                struct inode_record *rec)
2904 {
2905         char *dir_name = "lost+found";
2906         char namebuf[BTRFS_NAME_LEN] = {0};
2907         u64 lost_found_ino;
2908         u32 mode = 0700;
2909         u8 type = 0;
2910         int namelen = 0;
2911         int name_recovered = 0;
2912         int type_recovered = 0;
2913         int ret = 0;
2914
2915         /*
2916          * Get file name and type first before these invalid inode ref
2917          * are deleted by remove_all_invalid_backref()
2918          */
2919         name_recovered = !find_file_name(rec, namebuf, &namelen);
2920         type_recovered = !find_file_type(rec, &type);
2921
2922         if (!name_recovered) {
2923                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2924                        rec->ino, rec->ino);
2925                 namelen = count_digits(rec->ino);
2926                 sprintf(namebuf, "%llu", rec->ino);
2927                 name_recovered = 1;
2928         }
2929         if (!type_recovered) {
2930                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2931                        rec->ino);
2932                 type = BTRFS_FT_REG_FILE;
2933                 type_recovered = 1;
2934         }
2935
2936         ret = reset_nlink(trans, root, path, rec);
2937         if (ret < 0) {
2938                 fprintf(stderr,
2939                         "Failed to reset nlink for inode %llu: %s\n",
2940                         rec->ino, strerror(-ret));
2941                 goto out;
2942         }
2943
2944         if (rec->found_link == 0) {
2945                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2946                 if (ret < 0)
2947                         goto out;
2948                 lost_found_ino++;
2949                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2950                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2951                                   mode);
2952                 if (ret < 0) {
2953                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2954                                 dir_name, strerror(-ret));
2955                         goto out;
2956                 }
2957                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2958                                      namebuf, namelen, type, NULL, 1);
2959                 /*
2960                  * Add ".INO" suffix several times to handle case where
2961                  * "FILENAME.INO" is already taken by another file.
2962                  */
2963                 while (ret == -EEXIST) {
2964                         /*
2965                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2966                          */
2967                         if (namelen + count_digits(rec->ino) + 1 >
2968                             BTRFS_NAME_LEN) {
2969                                 ret = -EFBIG;
2970                                 goto out;
2971                         }
2972                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2973                                  ".%llu", rec->ino);
2974                         namelen += count_digits(rec->ino) + 1;
2975                         ret = btrfs_add_link(trans, root, rec->ino,
2976                                              lost_found_ino, namebuf,
2977                                              namelen, type, NULL, 1);
2978                 }
2979                 if (ret < 0) {
2980                         fprintf(stderr,
2981                                 "Failed to link the inode %llu to %s dir: %s\n",
2982                                 rec->ino, dir_name, strerror(-ret));
2983                         goto out;
2984                 }
2985                 /*
2986                  * Just increase the found_link, don't actually add the
2987                  * backref. This will make things easier and this inode
2988                  * record will be freed after the repair is done.
2989                  * So fsck will not report problem about this inode.
2990                  */
2991                 rec->found_link++;
2992                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2993                        namelen, namebuf, dir_name);
2994         }
2995         printf("Fixed the nlink of inode %llu\n", rec->ino);
2996 out:
2997         /*
2998          * Clear the flag anyway, or we will loop forever for the same inode
2999          * as it will not be removed from the bad inode list and the dead loop
3000          * happens.
3001          */
3002         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3003         btrfs_release_path(path);
3004         return ret;
3005 }
3006
3007 /*
3008  * Check if there is any normal(reg or prealloc) file extent for given
3009  * ino.
3010  * This is used to determine the file type when neither its dir_index/item or
3011  * inode_item exists.
3012  *
3013  * This will *NOT* report error, if any error happens, just consider it does
3014  * not have any normal file extent.
3015  */
3016 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3017 {
3018         struct btrfs_path path;
3019         struct btrfs_key key;
3020         struct btrfs_key found_key;
3021         struct btrfs_file_extent_item *fi;
3022         u8 type;
3023         int ret = 0;
3024
3025         btrfs_init_path(&path);
3026         key.objectid = ino;
3027         key.type = BTRFS_EXTENT_DATA_KEY;
3028         key.offset = 0;
3029
3030         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3031         if (ret < 0) {
3032                 ret = 0;
3033                 goto out;
3034         }
3035         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3036                 ret = btrfs_next_leaf(root, &path);
3037                 if (ret) {
3038                         ret = 0;
3039                         goto out;
3040                 }
3041         }
3042         while (1) {
3043                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3044                                       path.slots[0]);
3045                 if (found_key.objectid != ino ||
3046                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3047                         break;
3048                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3049                                     struct btrfs_file_extent_item);
3050                 type = btrfs_file_extent_type(path.nodes[0], fi);
3051                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3052                         ret = 1;
3053                         goto out;
3054                 }
3055         }
3056 out:
3057         btrfs_release_path(&path);
3058         return ret;
3059 }
3060
3061 static u32 btrfs_type_to_imode(u8 type)
3062 {
3063         static u32 imode_by_btrfs_type[] = {
3064                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3065                 [BTRFS_FT_DIR]          = S_IFDIR,
3066                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3067                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3068                 [BTRFS_FT_FIFO]         = S_IFIFO,
3069                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3070                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3071         };
3072
3073         return imode_by_btrfs_type[(type)];
3074 }
3075
3076 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3077                                 struct btrfs_root *root,
3078                                 struct btrfs_path *path,
3079                                 struct inode_record *rec)
3080 {
3081         u8 filetype;
3082         u32 mode = 0700;
3083         int type_recovered = 0;
3084         int ret = 0;
3085
3086         printf("Trying to rebuild inode:%llu\n", rec->ino);
3087
3088         type_recovered = !find_file_type(rec, &filetype);
3089
3090         /*
3091          * Try to determine inode type if type not found.
3092          *
3093          * For found regular file extent, it must be FILE.
3094          * For found dir_item/index, it must be DIR.
3095          *
3096          * For undetermined one, use FILE as fallback.
3097          *
3098          * TODO:
3099          * 1. If found backref(inode_index/item is already handled) to it,
3100          *    it must be DIR.
3101          *    Need new inode-inode ref structure to allow search for that.
3102          */
3103         if (!type_recovered) {
3104                 if (rec->found_file_extent &&
3105                     find_normal_file_extent(root, rec->ino)) {
3106                         type_recovered = 1;
3107                         filetype = BTRFS_FT_REG_FILE;
3108                 } else if (rec->found_dir_item) {
3109                         type_recovered = 1;
3110                         filetype = BTRFS_FT_DIR;
3111                 } else if (!list_empty(&rec->orphan_extents)) {
3112                         type_recovered = 1;
3113                         filetype = BTRFS_FT_REG_FILE;
3114                 } else{
3115                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3116                                rec->ino);
3117                         type_recovered = 1;
3118                         filetype = BTRFS_FT_REG_FILE;
3119                 }
3120         }
3121
3122         ret = btrfs_new_inode(trans, root, rec->ino,
3123                               mode | btrfs_type_to_imode(filetype));
3124         if (ret < 0)
3125                 goto out;
3126
3127         /*
3128          * Here inode rebuild is done, we only rebuild the inode item,
3129          * don't repair the nlink(like move to lost+found).
3130          * That is the job of nlink repair.
3131          *
3132          * We just fill the record and return
3133          */
3134         rec->found_dir_item = 1;
3135         rec->imode = mode | btrfs_type_to_imode(filetype);
3136         rec->nlink = 0;
3137         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3138         /* Ensure the inode_nlinks repair function will be called */
3139         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3140 out:
3141         return ret;
3142 }
3143
3144 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3145                                       struct btrfs_root *root,
3146                                       struct btrfs_path *path,
3147                                       struct inode_record *rec)
3148 {
3149         struct orphan_data_extent *orphan;
3150         struct orphan_data_extent *tmp;
3151         int ret = 0;
3152
3153         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3154                 /*
3155                  * Check for conflicting file extents
3156                  *
3157                  * Here we don't know whether the extents is compressed or not,
3158                  * so we can only assume it not compressed nor data offset,
3159                  * and use its disk_len as extent length.
3160                  */
3161                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3162                                        orphan->offset, orphan->disk_len, 0);
3163                 btrfs_release_path(path);
3164                 if (ret < 0)
3165                         goto out;
3166                 if (!ret) {
3167                         fprintf(stderr,
3168                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3169                                 orphan->disk_bytenr, orphan->disk_len);
3170                         ret = btrfs_free_extent(trans,
3171                                         root->fs_info->extent_root,
3172                                         orphan->disk_bytenr, orphan->disk_len,
3173                                         0, root->objectid, orphan->objectid,
3174                                         orphan->offset);
3175                         if (ret < 0)
3176                                 goto out;
3177                 }
3178                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3179                                 orphan->offset, orphan->disk_bytenr,
3180                                 orphan->disk_len, orphan->disk_len);
3181                 if (ret < 0)
3182                         goto out;
3183
3184                 /* Update file size info */
3185                 rec->found_size += orphan->disk_len;
3186                 if (rec->found_size == rec->nbytes)
3187                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3188
3189                 /* Update the file extent hole info too */
3190                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3191                                            orphan->disk_len);
3192                 if (ret < 0)
3193                         goto out;
3194                 if (RB_EMPTY_ROOT(&rec->holes))
3195                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3196
3197                 list_del(&orphan->list);
3198                 free(orphan);
3199         }
3200         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3201 out:
3202         return ret;
3203 }
3204
3205 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3206                                         struct btrfs_root *root,
3207                                         struct btrfs_path *path,
3208                                         struct inode_record *rec)
3209 {
3210         struct rb_node *node;
3211         struct file_extent_hole *hole;
3212         int found = 0;
3213         int ret = 0;
3214
3215         node = rb_first(&rec->holes);
3216
3217         while (node) {
3218                 found = 1;
3219                 hole = rb_entry(node, struct file_extent_hole, node);
3220                 ret = btrfs_punch_hole(trans, root, rec->ino,
3221                                        hole->start, hole->len);
3222                 if (ret < 0)
3223                         goto out;
3224                 ret = del_file_extent_hole(&rec->holes, hole->start,
3225                                            hole->len);
3226                 if (ret < 0)
3227                         goto out;
3228                 if (RB_EMPTY_ROOT(&rec->holes))
3229                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3230                 node = rb_first(&rec->holes);
3231         }
3232         /* special case for a file losing all its file extent */
3233         if (!found) {
3234                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3235                                        round_up(rec->isize, root->sectorsize));
3236                 if (ret < 0)
3237                         goto out;
3238         }
3239         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3240                rec->ino, root->objectid);
3241 out:
3242         return ret;
3243 }
3244
3245 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3246 {
3247         struct btrfs_trans_handle *trans;
3248         struct btrfs_path path;
3249         int ret = 0;
3250
3251         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3252                              I_ERR_NO_ORPHAN_ITEM |
3253                              I_ERR_LINK_COUNT_WRONG |
3254                              I_ERR_NO_INODE_ITEM |
3255                              I_ERR_FILE_EXTENT_ORPHAN |
3256                              I_ERR_FILE_EXTENT_DISCOUNT|
3257                              I_ERR_FILE_NBYTES_WRONG)))
3258                 return rec->errors;
3259
3260         /*
3261          * For nlink repair, it may create a dir and add link, so
3262          * 2 for parent(256)'s dir_index and dir_item
3263          * 2 for lost+found dir's inode_item and inode_ref
3264          * 1 for the new inode_ref of the file
3265          * 2 for lost+found dir's dir_index and dir_item for the file
3266          */
3267         trans = btrfs_start_transaction(root, 7);
3268         if (IS_ERR(trans))
3269                 return PTR_ERR(trans);
3270
3271         btrfs_init_path(&path);
3272         if (rec->errors & I_ERR_NO_INODE_ITEM)
3273                 ret = repair_inode_no_item(trans, root, &path, rec);
3274         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3275                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3276         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3277                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3278         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3279                 ret = repair_inode_isize(trans, root, &path, rec);
3280         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3281                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3282         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3283                 ret = repair_inode_nlinks(trans, root, &path, rec);
3284         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3285                 ret = repair_inode_nbytes(trans, root, &path, rec);
3286         btrfs_commit_transaction(trans, root);
3287         btrfs_release_path(&path);
3288         return ret;
3289 }
3290
3291 static int check_inode_recs(struct btrfs_root *root,
3292                             struct cache_tree *inode_cache)
3293 {
3294         struct cache_extent *cache;
3295         struct ptr_node *node;
3296         struct inode_record *rec;
3297         struct inode_backref *backref;
3298         int stage = 0;
3299         int ret = 0;
3300         int err = 0;
3301         u64 error = 0;
3302         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3303
3304         if (btrfs_root_refs(&root->root_item) == 0) {
3305                 if (!cache_tree_empty(inode_cache))
3306                         fprintf(stderr, "warning line %d\n", __LINE__);
3307                 return 0;
3308         }
3309
3310         /*
3311          * We need to repair backrefs first because we could change some of the
3312          * errors in the inode recs.
3313          *
3314          * We also need to go through and delete invalid backrefs first and then
3315          * add the correct ones second.  We do this because we may get EEXIST
3316          * when adding back the correct index because we hadn't yet deleted the
3317          * invalid index.
3318          *
3319          * For example, if we were missing a dir index then the directories
3320          * isize would be wrong, so if we fixed the isize to what we thought it
3321          * would be and then fixed the backref we'd still have a invalid fs, so
3322          * we need to add back the dir index and then check to see if the isize
3323          * is still wrong.
3324          */
3325         while (stage < 3) {
3326                 stage++;
3327                 if (stage == 3 && !err)
3328                         break;
3329
3330                 cache = search_cache_extent(inode_cache, 0);
3331                 while (repair && cache) {
3332                         node = container_of(cache, struct ptr_node, cache);
3333                         rec = node->data;
3334                         cache = next_cache_extent(cache);
3335
3336                         /* Need to free everything up and rescan */
3337                         if (stage == 3) {
3338                                 remove_cache_extent(inode_cache, &node->cache);
3339                                 free(node);
3340                                 free_inode_rec(rec);
3341                                 continue;
3342                         }
3343
3344                         if (list_empty(&rec->backrefs))
3345                                 continue;
3346
3347                         ret = repair_inode_backrefs(root, rec, inode_cache,
3348                                                     stage == 1);
3349                         if (ret < 0) {
3350                                 err = ret;
3351                                 stage = 2;
3352                                 break;
3353                         } if (ret > 0) {
3354                                 err = -EAGAIN;
3355                         }
3356                 }
3357         }
3358         if (err)
3359                 return err;
3360
3361         rec = get_inode_rec(inode_cache, root_dirid, 0);
3362         BUG_ON(IS_ERR(rec));
3363         if (rec) {
3364                 ret = check_root_dir(rec);
3365                 if (ret) {
3366                         fprintf(stderr, "root %llu root dir %llu error\n",
3367                                 (unsigned long long)root->root_key.objectid,
3368                                 (unsigned long long)root_dirid);
3369                         print_inode_error(root, rec);
3370                         error++;
3371                 }
3372         } else {
3373                 if (repair) {
3374                         struct btrfs_trans_handle *trans;
3375
3376                         trans = btrfs_start_transaction(root, 1);
3377                         if (IS_ERR(trans)) {
3378                                 err = PTR_ERR(trans);
3379                                 return err;
3380                         }
3381
3382                         fprintf(stderr,
3383                                 "root %llu missing its root dir, recreating\n",
3384                                 (unsigned long long)root->objectid);
3385
3386                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3387                         BUG_ON(ret);
3388
3389                         btrfs_commit_transaction(trans, root);
3390                         return -EAGAIN;
3391                 }
3392
3393                 fprintf(stderr, "root %llu root dir %llu not found\n",
3394                         (unsigned long long)root->root_key.objectid,
3395                         (unsigned long long)root_dirid);
3396         }
3397
3398         while (1) {
3399                 cache = search_cache_extent(inode_cache, 0);
3400                 if (!cache)
3401                         break;
3402                 node = container_of(cache, struct ptr_node, cache);
3403                 rec = node->data;
3404                 remove_cache_extent(inode_cache, &node->cache);
3405                 free(node);
3406                 if (rec->ino == root_dirid ||
3407                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3408                         free_inode_rec(rec);
3409                         continue;
3410                 }
3411
3412                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3413                         ret = check_orphan_item(root, rec->ino);
3414                         if (ret == 0)
3415                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3416                         if (can_free_inode_rec(rec)) {
3417                                 free_inode_rec(rec);
3418                                 continue;
3419                         }
3420                 }
3421
3422                 if (!rec->found_inode_item)
3423                         rec->errors |= I_ERR_NO_INODE_ITEM;
3424                 if (rec->found_link != rec->nlink)
3425                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3426                 if (repair) {
3427                         ret = try_repair_inode(root, rec);
3428                         if (ret == 0 && can_free_inode_rec(rec)) {
3429                                 free_inode_rec(rec);
3430                                 continue;
3431                         }
3432                         ret = 0;
3433                 }
3434
3435                 if (!(repair && ret == 0))
3436                         error++;
3437                 print_inode_error(root, rec);
3438                 list_for_each_entry(backref, &rec->backrefs, list) {
3439                         if (!backref->found_dir_item)
3440                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3441                         if (!backref->found_dir_index)
3442                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3443                         if (!backref->found_inode_ref)
3444                                 backref->errors |= REF_ERR_NO_INODE_REF;
3445                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3446                                 " namelen %u name %s filetype %d errors %x",
3447                                 (unsigned long long)backref->dir,
3448                                 (unsigned long long)backref->index,
3449                                 backref->namelen, backref->name,
3450                                 backref->filetype, backref->errors);
3451                         print_ref_error(backref->errors);
3452                 }
3453                 free_inode_rec(rec);
3454         }
3455         return (error > 0) ? -1 : 0;
3456 }
3457
3458 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3459                                         u64 objectid)
3460 {
3461         struct cache_extent *cache;
3462         struct root_record *rec = NULL;
3463         int ret;
3464
3465         cache = lookup_cache_extent(root_cache, objectid, 1);
3466         if (cache) {
3467                 rec = container_of(cache, struct root_record, cache);
3468         } else {
3469                 rec = calloc(1, sizeof(*rec));
3470                 if (!rec)
3471                         return ERR_PTR(-ENOMEM);
3472                 rec->objectid = objectid;
3473                 INIT_LIST_HEAD(&rec->backrefs);
3474                 rec->cache.start = objectid;
3475                 rec->cache.size = 1;
3476
3477                 ret = insert_cache_extent(root_cache, &rec->cache);
3478                 if (ret)
3479                         return ERR_PTR(-EEXIST);
3480         }
3481         return rec;
3482 }
3483
3484 static struct root_backref *get_root_backref(struct root_record *rec,
3485                                              u64 ref_root, u64 dir, u64 index,
3486                                              const char *name, int namelen)
3487 {
3488         struct root_backref *backref;
3489
3490         list_for_each_entry(backref, &rec->backrefs, list) {
3491                 if (backref->ref_root != ref_root || backref->dir != dir ||
3492                     backref->namelen != namelen)
3493                         continue;
3494                 if (memcmp(name, backref->name, namelen))
3495                         continue;
3496                 return backref;
3497         }
3498
3499         backref = calloc(1, sizeof(*backref) + namelen + 1);
3500         if (!backref)
3501                 return NULL;
3502         backref->ref_root = ref_root;
3503         backref->dir = dir;
3504         backref->index = index;
3505         backref->namelen = namelen;
3506         memcpy(backref->name, name, namelen);
3507         backref->name[namelen] = '\0';
3508         list_add_tail(&backref->list, &rec->backrefs);
3509         return backref;
3510 }
3511
3512 static void free_root_record(struct cache_extent *cache)
3513 {
3514         struct root_record *rec;
3515         struct root_backref *backref;
3516
3517         rec = container_of(cache, struct root_record, cache);
3518         while (!list_empty(&rec->backrefs)) {
3519                 backref = to_root_backref(rec->backrefs.next);
3520                 list_del(&backref->list);
3521                 free(backref);
3522         }
3523
3524         free(rec);
3525 }
3526
3527 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3528
3529 static int add_root_backref(struct cache_tree *root_cache,
3530                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3531                             const char *name, int namelen,
3532                             int item_type, int errors)
3533 {
3534         struct root_record *rec;
3535         struct root_backref *backref;
3536
3537         rec = get_root_rec(root_cache, root_id);
3538         BUG_ON(IS_ERR(rec));
3539         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3540         BUG_ON(!backref);
3541
3542         backref->errors |= errors;
3543
3544         if (item_type != BTRFS_DIR_ITEM_KEY) {
3545                 if (backref->found_dir_index || backref->found_back_ref ||
3546                     backref->found_forward_ref) {
3547                         if (backref->index != index)
3548                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3549                 } else {
3550                         backref->index = index;
3551                 }
3552         }
3553
3554         if (item_type == BTRFS_DIR_ITEM_KEY) {
3555                 if (backref->found_forward_ref)
3556                         rec->found_ref++;
3557                 backref->found_dir_item = 1;
3558         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3559                 backref->found_dir_index = 1;
3560         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3561                 if (backref->found_forward_ref)
3562                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3563                 else if (backref->found_dir_item)
3564                         rec->found_ref++;
3565                 backref->found_forward_ref = 1;
3566         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3567                 if (backref->found_back_ref)
3568                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3569                 backref->found_back_ref = 1;
3570         } else {
3571                 BUG_ON(1);
3572         }
3573
3574         if (backref->found_forward_ref && backref->found_dir_item)
3575                 backref->reachable = 1;
3576         return 0;
3577 }
3578
3579 static int merge_root_recs(struct btrfs_root *root,
3580                            struct cache_tree *src_cache,
3581                            struct cache_tree *dst_cache)
3582 {
3583         struct cache_extent *cache;
3584         struct ptr_node *node;
3585         struct inode_record *rec;
3586         struct inode_backref *backref;
3587         int ret = 0;
3588
3589         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3590                 free_inode_recs_tree(src_cache);
3591                 return 0;
3592         }
3593
3594         while (1) {
3595                 cache = search_cache_extent(src_cache, 0);
3596                 if (!cache)
3597                         break;
3598                 node = container_of(cache, struct ptr_node, cache);
3599                 rec = node->data;
3600                 remove_cache_extent(src_cache, &node->cache);
3601                 free(node);
3602
3603                 ret = is_child_root(root, root->objectid, rec->ino);
3604                 if (ret < 0)
3605                         break;
3606                 else if (ret == 0)
3607                         goto skip;
3608
3609                 list_for_each_entry(backref, &rec->backrefs, list) {
3610                         BUG_ON(backref->found_inode_ref);
3611                         if (backref->found_dir_item)
3612                                 add_root_backref(dst_cache, rec->ino,
3613                                         root->root_key.objectid, backref->dir,
3614                                         backref->index, backref->name,
3615                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3616                                         backref->errors);
3617                         if (backref->found_dir_index)
3618                                 add_root_backref(dst_cache, rec->ino,
3619                                         root->root_key.objectid, backref->dir,
3620                                         backref->index, backref->name,
3621                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3622                                         backref->errors);
3623                 }
3624 skip:
3625                 free_inode_rec(rec);
3626         }
3627         if (ret < 0)
3628                 return ret;
3629         return 0;
3630 }
3631
3632 static int check_root_refs(struct btrfs_root *root,
3633                            struct cache_tree *root_cache)
3634 {
3635         struct root_record *rec;
3636         struct root_record *ref_root;
3637         struct root_backref *backref;
3638         struct cache_extent *cache;
3639         int loop = 1;
3640         int ret;
3641         int error;
3642         int errors = 0;
3643
3644         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3645         BUG_ON(IS_ERR(rec));
3646         rec->found_ref = 1;
3647
3648         /* fixme: this can not detect circular references */
3649         while (loop) {
3650                 loop = 0;
3651                 cache = search_cache_extent(root_cache, 0);
3652                 while (1) {
3653                         if (!cache)
3654                                 break;
3655                         rec = container_of(cache, struct root_record, cache);
3656                         cache = next_cache_extent(cache);
3657
3658                         if (rec->found_ref == 0)
3659                                 continue;
3660
3661                         list_for_each_entry(backref, &rec->backrefs, list) {
3662                                 if (!backref->reachable)
3663                                         continue;
3664
3665                                 ref_root = get_root_rec(root_cache,
3666                                                         backref->ref_root);
3667                                 BUG_ON(IS_ERR(ref_root));
3668                                 if (ref_root->found_ref > 0)
3669                                         continue;
3670
3671                                 backref->reachable = 0;
3672                                 rec->found_ref--;
3673                                 if (rec->found_ref == 0)
3674                                         loop = 1;
3675                         }
3676                 }
3677         }
3678
3679         cache = search_cache_extent(root_cache, 0);
3680         while (1) {
3681                 if (!cache)
3682                         break;
3683                 rec = container_of(cache, struct root_record, cache);
3684                 cache = next_cache_extent(cache);
3685
3686                 if (rec->found_ref == 0 &&
3687                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3688                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3689                         ret = check_orphan_item(root->fs_info->tree_root,
3690                                                 rec->objectid);
3691                         if (ret == 0)
3692                                 continue;
3693
3694                         /*
3695                          * If we don't have a root item then we likely just have
3696                          * a dir item in a snapshot for this root but no actual
3697                          * ref key or anything so it's meaningless.
3698                          */
3699                         if (!rec->found_root_item)
3700                                 continue;
3701                         errors++;
3702                         fprintf(stderr, "fs tree %llu not referenced\n",
3703                                 (unsigned long long)rec->objectid);
3704                 }
3705
3706                 error = 0;
3707                 if (rec->found_ref > 0 && !rec->found_root_item)
3708                         error = 1;
3709                 list_for_each_entry(backref, &rec->backrefs, list) {
3710                         if (!backref->found_dir_item)
3711                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3712                         if (!backref->found_dir_index)
3713                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3714                         if (!backref->found_back_ref)
3715                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3716                         if (!backref->found_forward_ref)
3717                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3718                         if (backref->reachable && backref->errors)
3719                                 error = 1;
3720                 }
3721                 if (!error)
3722                         continue;
3723
3724                 errors++;
3725                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3726                         (unsigned long long)rec->objectid, rec->found_ref,
3727                          rec->found_root_item ? "" : "not found");
3728
3729                 list_for_each_entry(backref, &rec->backrefs, list) {
3730                         if (!backref->reachable)
3731                                 continue;
3732                         if (!backref->errors && rec->found_root_item)
3733                                 continue;
3734                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3735                                 " index %llu namelen %u name %s errors %x\n",
3736                                 (unsigned long long)backref->ref_root,
3737                                 (unsigned long long)backref->dir,
3738                                 (unsigned long long)backref->index,
3739                                 backref->namelen, backref->name,
3740                                 backref->errors);
3741                         print_ref_error(backref->errors);
3742                 }
3743         }
3744         return errors > 0 ? 1 : 0;
3745 }
3746
3747 static int process_root_ref(struct extent_buffer *eb, int slot,
3748                             struct btrfs_key *key,
3749                             struct cache_tree *root_cache)
3750 {
3751         u64 dirid;
3752         u64 index;
3753         u32 len;
3754         u32 name_len;
3755         struct btrfs_root_ref *ref;
3756         char namebuf[BTRFS_NAME_LEN];
3757         int error;
3758
3759         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3760
3761         dirid = btrfs_root_ref_dirid(eb, ref);
3762         index = btrfs_root_ref_sequence(eb, ref);
3763         name_len = btrfs_root_ref_name_len(eb, ref);
3764
3765         if (name_len <= BTRFS_NAME_LEN) {
3766                 len = name_len;
3767                 error = 0;
3768         } else {
3769                 len = BTRFS_NAME_LEN;
3770                 error = REF_ERR_NAME_TOO_LONG;
3771         }
3772         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3773
3774         if (key->type == BTRFS_ROOT_REF_KEY) {
3775                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3776                                  index, namebuf, len, key->type, error);
3777         } else {
3778                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3779                                  index, namebuf, len, key->type, error);
3780         }
3781         return 0;
3782 }
3783
3784 static void free_corrupt_block(struct cache_extent *cache)
3785 {
3786         struct btrfs_corrupt_block *corrupt;
3787
3788         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3789         free(corrupt);
3790 }
3791
3792 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3793
3794 /*
3795  * Repair the btree of the given root.
3796  *
3797  * The fix is to remove the node key in corrupt_blocks cache_tree.
3798  * and rebalance the tree.
3799  * After the fix, the btree should be writeable.
3800  */
3801 static int repair_btree(struct btrfs_root *root,
3802                         struct cache_tree *corrupt_blocks)
3803 {
3804         struct btrfs_trans_handle *trans;
3805         struct btrfs_path path;
3806         struct btrfs_corrupt_block *corrupt;
3807         struct cache_extent *cache;
3808         struct btrfs_key key;
3809         u64 offset;
3810         int level;
3811         int ret = 0;
3812
3813         if (cache_tree_empty(corrupt_blocks))
3814                 return 0;
3815
3816         trans = btrfs_start_transaction(root, 1);
3817         if (IS_ERR(trans)) {
3818                 ret = PTR_ERR(trans);
3819                 fprintf(stderr, "Error starting transaction: %s\n",
3820                         strerror(-ret));
3821                 return ret;
3822         }
3823         btrfs_init_path(&path);
3824         cache = first_cache_extent(corrupt_blocks);
3825         while (cache) {
3826                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3827                                        cache);
3828                 level = corrupt->level;
3829                 path.lowest_level = level;
3830                 key.objectid = corrupt->key.objectid;
3831                 key.type = corrupt->key.type;
3832                 key.offset = corrupt->key.offset;
3833
3834                 /*
3835                  * Here we don't want to do any tree balance, since it may
3836                  * cause a balance with corrupted brother leaf/node,
3837                  * so ins_len set to 0 here.
3838                  * Balance will be done after all corrupt node/leaf is deleted.
3839                  */
3840                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3841                 if (ret < 0)
3842                         goto out;
3843                 offset = btrfs_node_blockptr(path.nodes[level],
3844                                              path.slots[level]);
3845
3846                 /* Remove the ptr */
3847                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3848                 if (ret < 0)
3849                         goto out;
3850                 /*
3851                  * Remove the corresponding extent
3852                  * return value is not concerned.
3853                  */
3854                 btrfs_release_path(&path);
3855                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3856                                         0, root->root_key.objectid,
3857                                         level - 1, 0);
3858                 cache = next_cache_extent(cache);
3859         }
3860
3861         /* Balance the btree using btrfs_search_slot() */
3862         cache = first_cache_extent(corrupt_blocks);
3863         while (cache) {
3864                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3865                                        cache);
3866                 memcpy(&key, &corrupt->key, sizeof(key));
3867                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3868                 if (ret < 0)
3869                         goto out;
3870                 /* return will always >0 since it won't find the item */
3871                 ret = 0;
3872                 btrfs_release_path(&path);
3873                 cache = next_cache_extent(cache);
3874         }
3875 out:
3876         btrfs_commit_transaction(trans, root);
3877         btrfs_release_path(&path);
3878         return ret;
3879 }
3880
3881 static int check_fs_root(struct btrfs_root *root,
3882                          struct cache_tree *root_cache,
3883                          struct walk_control *wc)
3884 {
3885         int ret = 0;
3886         int err = 0;
3887         int wret;
3888         int level;
3889         struct btrfs_path path;
3890         struct shared_node root_node;
3891         struct root_record *rec;
3892         struct btrfs_root_item *root_item = &root->root_item;
3893         struct cache_tree corrupt_blocks;
3894         struct orphan_data_extent *orphan;
3895         struct orphan_data_extent *tmp;
3896         enum btrfs_tree_block_status status;
3897         struct node_refs nrefs;
3898
3899         /*
3900          * Reuse the corrupt_block cache tree to record corrupted tree block
3901          *
3902          * Unlike the usage in extent tree check, here we do it in a per
3903          * fs/subvol tree base.
3904          */
3905         cache_tree_init(&corrupt_blocks);
3906         root->fs_info->corrupt_blocks = &corrupt_blocks;
3907
3908         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3909                 rec = get_root_rec(root_cache, root->root_key.objectid);
3910                 BUG_ON(IS_ERR(rec));
3911                 if (btrfs_root_refs(root_item) > 0)
3912                         rec->found_root_item = 1;
3913         }
3914
3915         btrfs_init_path(&path);
3916         memset(&root_node, 0, sizeof(root_node));
3917         cache_tree_init(&root_node.root_cache);
3918         cache_tree_init(&root_node.inode_cache);
3919         memset(&nrefs, 0, sizeof(nrefs));
3920
3921         /* Move the orphan extent record to corresponding inode_record */
3922         list_for_each_entry_safe(orphan, tmp,
3923                                  &root->orphan_data_extents, list) {
3924                 struct inode_record *inode;
3925
3926                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3927                                       1);
3928                 BUG_ON(IS_ERR(inode));
3929                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3930                 list_move(&orphan->list, &inode->orphan_extents);
3931         }
3932
3933         level = btrfs_header_level(root->node);
3934         memset(wc->nodes, 0, sizeof(wc->nodes));
3935         wc->nodes[level] = &root_node;
3936         wc->active_node = level;
3937         wc->root_level = level;
3938
3939         /* We may not have checked the root block, lets do that now */
3940         if (btrfs_is_leaf(root->node))
3941                 status = btrfs_check_leaf(root, NULL, root->node);
3942         else
3943                 status = btrfs_check_node(root, NULL, root->node);
3944         if (status != BTRFS_TREE_BLOCK_CLEAN)
3945                 return -EIO;
3946
3947         if (btrfs_root_refs(root_item) > 0 ||
3948             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3949                 path.nodes[level] = root->node;
3950                 extent_buffer_get(root->node);
3951                 path.slots[level] = 0;
3952         } else {
3953                 struct btrfs_key key;
3954                 struct btrfs_disk_key found_key;
3955
3956                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3957                 level = root_item->drop_level;
3958                 path.lowest_level = level;
3959                 if (level > btrfs_header_level(root->node) ||
3960                     level >= BTRFS_MAX_LEVEL) {
3961                         error("ignoring invalid drop level: %u", level);
3962                         goto skip_walking;
3963                 }
3964                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3965                 if (wret < 0)
3966                         goto skip_walking;
3967                 btrfs_node_key(path.nodes[level], &found_key,
3968                                 path.slots[level]);
3969                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3970                                         sizeof(found_key)));
3971         }
3972
3973         while (1) {
3974                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3975                 if (wret < 0)
3976                         ret = wret;
3977                 if (wret != 0)
3978                         break;
3979
3980                 wret = walk_up_tree(root, &path, wc, &level);
3981                 if (wret < 0)
3982                         ret = wret;
3983                 if (wret != 0)
3984                         break;
3985         }
3986 skip_walking:
3987         btrfs_release_path(&path);
3988
3989         if (!cache_tree_empty(&corrupt_blocks)) {
3990                 struct cache_extent *cache;
3991                 struct btrfs_corrupt_block *corrupt;
3992
3993                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3994                        root->root_key.objectid);
3995                 cache = first_cache_extent(&corrupt_blocks);
3996                 while (cache) {
3997                         corrupt = container_of(cache,
3998                                                struct btrfs_corrupt_block,
3999                                                cache);
4000                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4001                                cache->start, corrupt->level,
4002                                corrupt->key.objectid, corrupt->key.type,
4003                                corrupt->key.offset);
4004                         cache = next_cache_extent(cache);
4005                 }
4006                 if (repair) {
4007                         printf("Try to repair the btree for root %llu\n",
4008                                root->root_key.objectid);
4009                         ret = repair_btree(root, &corrupt_blocks);
4010                         if (ret < 0)
4011                                 fprintf(stderr, "Failed to repair btree: %s\n",
4012                                         strerror(-ret));
4013                         if (!ret)
4014                                 printf("Btree for root %llu is fixed\n",
4015                                        root->root_key.objectid);
4016                 }
4017         }
4018
4019         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4020         if (err < 0)
4021                 ret = err;
4022
4023         if (root_node.current) {
4024                 root_node.current->checked = 1;
4025                 maybe_free_inode_rec(&root_node.inode_cache,
4026                                 root_node.current);
4027         }
4028
4029         err = check_inode_recs(root, &root_node.inode_cache);
4030         if (!ret)
4031                 ret = err;
4032
4033         free_corrupt_blocks_tree(&corrupt_blocks);
4034         root->fs_info->corrupt_blocks = NULL;
4035         free_orphan_data_extents(&root->orphan_data_extents);
4036         return ret;
4037 }
4038
4039 static int fs_root_objectid(u64 objectid)
4040 {
4041         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4042             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4043                 return 1;
4044         return is_fstree(objectid);
4045 }
4046
4047 static int check_fs_roots(struct btrfs_root *root,
4048                           struct cache_tree *root_cache)
4049 {
4050         struct btrfs_path path;
4051         struct btrfs_key key;
4052         struct walk_control wc;
4053         struct extent_buffer *leaf, *tree_node;
4054         struct btrfs_root *tmp_root;
4055         struct btrfs_root *tree_root = root->fs_info->tree_root;
4056         int ret;
4057         int err = 0;
4058
4059         if (ctx.progress_enabled) {
4060                 ctx.tp = TASK_FS_ROOTS;
4061                 task_start(ctx.info);
4062         }
4063
4064         /*
4065          * Just in case we made any changes to the extent tree that weren't
4066          * reflected into the free space cache yet.
4067          */
4068         if (repair)
4069                 reset_cached_block_groups(root->fs_info);
4070         memset(&wc, 0, sizeof(wc));
4071         cache_tree_init(&wc.shared);
4072         btrfs_init_path(&path);
4073
4074 again:
4075         key.offset = 0;
4076         key.objectid = 0;
4077         key.type = BTRFS_ROOT_ITEM_KEY;
4078         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4079         if (ret < 0) {
4080                 err = 1;
4081                 goto out;
4082         }
4083         tree_node = tree_root->node;
4084         while (1) {
4085                 if (tree_node != tree_root->node) {
4086                         free_root_recs_tree(root_cache);
4087                         btrfs_release_path(&path);
4088                         goto again;
4089                 }
4090                 leaf = path.nodes[0];
4091                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4092                         ret = btrfs_next_leaf(tree_root, &path);
4093                         if (ret) {
4094                                 if (ret < 0)
4095                                         err = 1;
4096                                 break;
4097                         }
4098                         leaf = path.nodes[0];
4099                 }
4100                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4101                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4102                     fs_root_objectid(key.objectid)) {
4103                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4104                                 tmp_root = btrfs_read_fs_root_no_cache(
4105                                                 root->fs_info, &key);
4106                         } else {
4107                                 key.offset = (u64)-1;
4108                                 tmp_root = btrfs_read_fs_root(
4109                                                 root->fs_info, &key);
4110                         }
4111                         if (IS_ERR(tmp_root)) {
4112                                 err = 1;
4113                                 goto next;
4114                         }
4115                         ret = check_fs_root(tmp_root, root_cache, &wc);
4116                         if (ret == -EAGAIN) {
4117                                 free_root_recs_tree(root_cache);
4118                                 btrfs_release_path(&path);
4119                                 goto again;
4120                         }
4121                         if (ret)
4122                                 err = 1;
4123                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4124                                 btrfs_free_fs_root(tmp_root);
4125                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4126                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4127                         process_root_ref(leaf, path.slots[0], &key,
4128                                          root_cache);
4129                 }
4130 next:
4131                 path.slots[0]++;
4132         }
4133 out:
4134         btrfs_release_path(&path);
4135         if (err)
4136                 free_extent_cache_tree(&wc.shared);
4137         if (!cache_tree_empty(&wc.shared))
4138                 fprintf(stderr, "warning line %d\n", __LINE__);
4139
4140         task_stop(ctx.info);
4141
4142         return err;
4143 }
4144
4145 /*
4146  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4147  * INODE_REF/INODE_EXTREF match.
4148  *
4149  * @root:       the root of the fs/file tree
4150  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4151  * @key:        the key of the DIR_ITEM/DIR_INDEX
4152  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4153  *              distinguish root_dir between normal dir/file
4154  * @name:       the name in the INODE_REF/INODE_EXTREF
4155  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4156  * @mode:       the st_mode of INODE_ITEM
4157  *
4158  * Return 0 if no error occurred.
4159  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4160  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4161  * dir/file.
4162  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4163  * not match for normal dir/file.
4164  */
4165 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4166                          struct btrfs_key *key, u64 index, char *name,
4167                          u32 namelen, u32 mode)
4168 {
4169         struct btrfs_path path;
4170         struct extent_buffer *node;
4171         struct btrfs_dir_item *di;
4172         struct btrfs_key location;
4173         char namebuf[BTRFS_NAME_LEN] = {0};
4174         u32 total;
4175         u32 cur = 0;
4176         u32 len;
4177         u32 name_len;
4178         u32 data_len;
4179         u8 filetype;
4180         int slot;
4181         int ret;
4182
4183         btrfs_init_path(&path);
4184         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4185         if (ret < 0) {
4186                 ret = DIR_ITEM_MISSING;
4187                 goto out;
4188         }
4189
4190         /* Process root dir and goto out*/
4191         if (index == 0) {
4192                 if (ret == 0) {
4193                         ret = ROOT_DIR_ERROR;
4194                         error(
4195                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4196                                 root->objectid,
4197                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4198                                         "REF" : "EXTREF",
4199                                 ref_key->objectid, ref_key->offset,
4200                                 key->type == BTRFS_DIR_ITEM_KEY ?
4201                                         "DIR_ITEM" : "DIR_INDEX");
4202                 } else {
4203                         ret = 0;
4204                 }
4205
4206                 goto out;
4207         }
4208
4209         /* Process normal file/dir */
4210         if (ret > 0) {
4211                 ret = DIR_ITEM_MISSING;
4212                 error(
4213                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4214                         root->objectid,
4215                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4216                         ref_key->objectid, ref_key->offset,
4217                         key->type == BTRFS_DIR_ITEM_KEY ?
4218                                 "DIR_ITEM" : "DIR_INDEX",
4219                         key->objectid, key->offset, namelen, name,
4220                         imode_to_type(mode));
4221                 goto out;
4222         }
4223
4224         /* Check whether inode_id/filetype/name match */
4225         node = path.nodes[0];
4226         slot = path.slots[0];
4227         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4228         total = btrfs_item_size_nr(node, slot);
4229         while (cur < total) {
4230                 ret = DIR_ITEM_MISMATCH;
4231                 name_len = btrfs_dir_name_len(node, di);
4232                 data_len = btrfs_dir_data_len(node, di);
4233
4234                 btrfs_dir_item_key_to_cpu(node, di, &location);
4235                 if (location.objectid != ref_key->objectid ||
4236                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4237                     location.offset != 0)
4238                         goto next;
4239
4240                 filetype = btrfs_dir_type(node, di);
4241                 if (imode_to_type(mode) != filetype)
4242                         goto next;
4243
4244                 if (cur + sizeof(*di) + name_len > total ||
4245                     name_len > BTRFS_NAME_LEN) {
4246                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4247                                 root->objectid,
4248                                 key->type == BTRFS_DIR_ITEM_KEY ?
4249                                 "DIR_ITEM" : "DIR_INDEX",
4250                                 key->objectid, key->offset, name_len);
4251
4252                         if (cur + sizeof(*di) > total)
4253                                 break;
4254                         len = min_t(u32, total - cur - sizeof(*di),
4255                                     BTRFS_NAME_LEN);
4256                 } else {
4257                         len = name_len;
4258                 }
4259
4260                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4261                 if (len != namelen || strncmp(namebuf, name, len))
4262                         goto next;
4263
4264                 ret = 0;
4265                 goto out;
4266 next:
4267                 len = sizeof(*di) + name_len + data_len;
4268                 di = (struct btrfs_dir_item *)((char *)di + len);
4269                 cur += len;
4270         }
4271         if (ret == DIR_ITEM_MISMATCH)
4272                 error(
4273                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4274                         root->objectid,
4275                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4276                         ref_key->objectid, ref_key->offset,
4277                         key->type == BTRFS_DIR_ITEM_KEY ?
4278                                 "DIR_ITEM" : "DIR_INDEX",
4279                         key->objectid, key->offset, namelen, name,
4280                         imode_to_type(mode));
4281 out:
4282         btrfs_release_path(&path);
4283         return ret;
4284 }
4285
4286 /*
4287  * Traverse the given INODE_REF and call find_dir_item() to find related
4288  * DIR_ITEM/DIR_INDEX.
4289  *
4290  * @root:       the root of the fs/file tree
4291  * @ref_key:    the key of the INODE_REF
4292  * @refs:       the count of INODE_REF
4293  * @mode:       the st_mode of INODE_ITEM
4294  *
4295  * Return 0 if no error occurred.
4296  */
4297 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4298                            struct extent_buffer *node, int slot, u64 *refs,
4299                            int mode)
4300 {
4301         struct btrfs_key key;
4302         struct btrfs_inode_ref *ref;
4303         char namebuf[BTRFS_NAME_LEN] = {0};
4304         u32 total;
4305         u32 cur = 0;
4306         u32 len;
4307         u32 name_len;
4308         u64 index;
4309         int ret, err = 0;
4310
4311         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4312         total = btrfs_item_size_nr(node, slot);
4313
4314 next:
4315         /* Update inode ref count */
4316         (*refs)++;
4317
4318         index = btrfs_inode_ref_index(node, ref);
4319         name_len = btrfs_inode_ref_name_len(node, ref);
4320         if (cur + sizeof(*ref) + name_len > total ||
4321             name_len > BTRFS_NAME_LEN) {
4322                 warning("root %llu INODE_REF[%llu %llu] name too long",
4323                         root->objectid, ref_key->objectid, ref_key->offset);
4324
4325                 if (total < cur + sizeof(*ref))
4326                         goto out;
4327                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4328         } else {
4329                 len = name_len;
4330         }
4331
4332         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4333
4334         /* Check root dir ref name */
4335         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4336                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4337                       root->objectid, ref_key->objectid, ref_key->offset,
4338                       namebuf);
4339                 err |= ROOT_DIR_ERROR;
4340         }
4341
4342         /* Find related DIR_INDEX */
4343         key.objectid = ref_key->offset;
4344         key.type = BTRFS_DIR_INDEX_KEY;
4345         key.offset = index;
4346         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4347         err |= ret;
4348
4349         /* Find related dir_item */
4350         key.objectid = ref_key->offset;
4351         key.type = BTRFS_DIR_ITEM_KEY;
4352         key.offset = btrfs_name_hash(namebuf, len);
4353         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4354         err |= ret;
4355
4356         len = sizeof(*ref) + name_len;
4357         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4358         cur += len;
4359         if (cur < total)
4360                 goto next;
4361
4362 out:
4363         return err;
4364 }
4365
4366 /*
4367  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4368  * DIR_ITEM/DIR_INDEX.
4369  *
4370  * @root:       the root of the fs/file tree
4371  * @ref_key:    the key of the INODE_EXTREF
4372  * @refs:       the count of INODE_EXTREF
4373  * @mode:       the st_mode of INODE_ITEM
4374  *
4375  * Return 0 if no error occurred.
4376  */
4377 static int check_inode_extref(struct btrfs_root *root,
4378                               struct btrfs_key *ref_key,
4379                               struct extent_buffer *node, int slot, u64 *refs,
4380                               int mode)
4381 {
4382         struct btrfs_key key;
4383         struct btrfs_inode_extref *extref;
4384         char namebuf[BTRFS_NAME_LEN] = {0};
4385         u32 total;
4386         u32 cur = 0;
4387         u32 len;
4388         u32 name_len;
4389         u64 index;
4390         u64 parent;
4391         int ret;
4392         int err = 0;
4393
4394         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4395         total = btrfs_item_size_nr(node, slot);
4396
4397 next:
4398         /* update inode ref count */
4399         (*refs)++;
4400         name_len = btrfs_inode_extref_name_len(node, extref);
4401         index = btrfs_inode_extref_index(node, extref);
4402         parent = btrfs_inode_extref_parent(node, extref);
4403         if (name_len <= BTRFS_NAME_LEN) {
4404                 len = name_len;
4405         } else {
4406                 len = BTRFS_NAME_LEN;
4407                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4408                         root->objectid, ref_key->objectid, ref_key->offset);
4409         }
4410         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4411
4412         /* Check root dir ref name */
4413         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4414                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4415                       root->objectid, ref_key->objectid, ref_key->offset,
4416                       namebuf);
4417                 err |= ROOT_DIR_ERROR;
4418         }
4419
4420         /* find related dir_index */
4421         key.objectid = parent;
4422         key.type = BTRFS_DIR_INDEX_KEY;
4423         key.offset = index;
4424         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4425         err |= ret;
4426
4427         /* find related dir_item */
4428         key.objectid = parent;
4429         key.type = BTRFS_DIR_ITEM_KEY;
4430         key.offset = btrfs_name_hash(namebuf, len);
4431         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4432         err |= ret;
4433
4434         len = sizeof(*extref) + name_len;
4435         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4436         cur += len;
4437
4438         if (cur < total)
4439                 goto next;
4440
4441         return err;
4442 }
4443
4444 /*
4445  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4446  * DIR_ITEM/DIR_INDEX match.
4447  *
4448  * @root:       the root of the fs/file tree
4449  * @key:        the key of the INODE_REF/INODE_EXTREF
4450  * @name:       the name in the INODE_REF/INODE_EXTREF
4451  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4452  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4453  * to (u64)-1
4454  * @ext_ref:    the EXTENDED_IREF feature
4455  *
4456  * Return 0 if no error occurred.
4457  * Return >0 for error bitmap
4458  */
4459 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4460                           char *name, int namelen, u64 index,
4461                           unsigned int ext_ref)
4462 {
4463         struct btrfs_path path;
4464         struct btrfs_inode_ref *ref;
4465         struct btrfs_inode_extref *extref;
4466         struct extent_buffer *node;
4467         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4468         u32 total;
4469         u32 cur = 0;
4470         u32 len;
4471         u32 ref_namelen;
4472         u64 ref_index;
4473         u64 parent;
4474         u64 dir_id;
4475         int slot;
4476         int ret;
4477
4478         btrfs_init_path(&path);
4479         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4480         if (ret) {
4481                 ret = INODE_REF_MISSING;
4482                 goto extref;
4483         }
4484
4485         node = path.nodes[0];
4486         slot = path.slots[0];
4487
4488         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4489         total = btrfs_item_size_nr(node, slot);
4490
4491         /* Iterate all entry of INODE_REF */
4492         while (cur < total) {
4493                 ret = INODE_REF_MISSING;
4494
4495                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4496                 ref_index = btrfs_inode_ref_index(node, ref);
4497                 if (index != (u64)-1 && index != ref_index)
4498                         goto next_ref;
4499
4500                 if (cur + sizeof(*ref) + ref_namelen > total ||
4501                     ref_namelen > BTRFS_NAME_LEN) {
4502                         warning("root %llu INODE %s[%llu %llu] name too long",
4503                                 root->objectid,
4504                                 key->type == BTRFS_INODE_REF_KEY ?
4505                                         "REF" : "EXTREF",
4506                                 key->objectid, key->offset);
4507
4508                         if (cur + sizeof(*ref) > total)
4509                                 break;
4510                         len = min_t(u32, total - cur - sizeof(*ref),
4511                                     BTRFS_NAME_LEN);
4512                 } else {
4513                         len = ref_namelen;
4514                 }
4515
4516                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4517                                    len);
4518
4519                 if (len != namelen || strncmp(ref_namebuf, name, len))
4520                         goto next_ref;
4521
4522                 ret = 0;
4523                 goto out;
4524 next_ref:
4525                 len = sizeof(*ref) + ref_namelen;
4526                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4527                 cur += len;
4528         }
4529
4530 extref:
4531         /* Skip if not support EXTENDED_IREF feature */
4532         if (!ext_ref)
4533                 goto out;
4534
4535         btrfs_release_path(&path);
4536         btrfs_init_path(&path);
4537
4538         dir_id = key->offset;
4539         key->type = BTRFS_INODE_EXTREF_KEY;
4540         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4541
4542         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4543         if (ret) {
4544                 ret = INODE_REF_MISSING;
4545                 goto out;
4546         }
4547
4548         node = path.nodes[0];
4549         slot = path.slots[0];
4550
4551         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4552         cur = 0;
4553         total = btrfs_item_size_nr(node, slot);
4554
4555         /* Iterate all entry of INODE_EXTREF */
4556         while (cur < total) {
4557                 ret = INODE_REF_MISSING;
4558
4559                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4560                 ref_index = btrfs_inode_extref_index(node, extref);
4561                 parent = btrfs_inode_extref_parent(node, extref);
4562                 if (index != (u64)-1 && index != ref_index)
4563                         goto next_extref;
4564
4565                 if (parent != dir_id)
4566                         goto next_extref;
4567
4568                 if (ref_namelen <= BTRFS_NAME_LEN) {
4569                         len = ref_namelen;
4570                 } else {
4571                         len = BTRFS_NAME_LEN;
4572                         warning("root %llu INODE %s[%llu %llu] name too long",
4573                                 root->objectid,
4574                                 key->type == BTRFS_INODE_REF_KEY ?
4575                                         "REF" : "EXTREF",
4576                                 key->objectid, key->offset);
4577                 }
4578                 read_extent_buffer(node, ref_namebuf,
4579                                    (unsigned long)(extref + 1), len);
4580
4581                 if (len != namelen || strncmp(ref_namebuf, name, len))
4582                         goto next_extref;
4583
4584                 ret = 0;
4585                 goto out;
4586
4587 next_extref:
4588                 len = sizeof(*extref) + ref_namelen;
4589                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4590                 cur += len;
4591
4592         }
4593 out:
4594         btrfs_release_path(&path);
4595         return ret;
4596 }
4597
4598 /*
4599  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4600  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4601  *
4602  * @root:       the root of the fs/file tree
4603  * @key:        the key of the INODE_REF/INODE_EXTREF
4604  * @size:       the st_size of the INODE_ITEM
4605  * @ext_ref:    the EXTENDED_IREF feature
4606  *
4607  * Return 0 if no error occurred.
4608  */
4609 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4610                           struct extent_buffer *node, int slot, u64 *size,
4611                           unsigned int ext_ref)
4612 {
4613         struct btrfs_dir_item *di;
4614         struct btrfs_inode_item *ii;
4615         struct btrfs_path path;
4616         struct btrfs_key location;
4617         char namebuf[BTRFS_NAME_LEN] = {0};
4618         u32 total;
4619         u32 cur = 0;
4620         u32 len;
4621         u32 name_len;
4622         u32 data_len;
4623         u8 filetype;
4624         u32 mode;
4625         u64 index;
4626         int ret;
4627         int err = 0;
4628
4629         /*
4630          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4631          * ignore index check.
4632          */
4633         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4634
4635         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4636         total = btrfs_item_size_nr(node, slot);
4637
4638         while (cur < total) {
4639                 data_len = btrfs_dir_data_len(node, di);
4640                 if (data_len)
4641                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4642                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4643                               "DIR_ITEM" : "DIR_INDEX",
4644                               key->objectid, key->offset, data_len);
4645
4646                 name_len = btrfs_dir_name_len(node, di);
4647                 if (cur + sizeof(*di) + name_len > total ||
4648                     name_len > BTRFS_NAME_LEN) {
4649                         warning("root %llu %s[%llu %llu] name too long",
4650                                 root->objectid,
4651                                 key->type == BTRFS_DIR_ITEM_KEY ?
4652                                 "DIR_ITEM" : "DIR_INDEX",
4653                                 key->objectid, key->offset);
4654
4655                         if (cur + sizeof(*di) > total)
4656                                 break;
4657                         len = min_t(u32, total - cur - sizeof(*di),
4658                                     BTRFS_NAME_LEN);
4659                 } else {
4660                         len = name_len;
4661                 }
4662                 (*size) += name_len;
4663
4664                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4665                 filetype = btrfs_dir_type(node, di);
4666
4667                 btrfs_init_path(&path);
4668                 btrfs_dir_item_key_to_cpu(node, di, &location);
4669
4670                 /* Ignore related ROOT_ITEM check */
4671                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4672                         goto next;
4673
4674                 /* Check relative INODE_ITEM(existence/filetype) */
4675                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4676                 if (ret) {
4677                         err |= INODE_ITEM_MISSING;
4678                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4679                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4680                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4681                               key->offset, location.objectid, name_len,
4682                               namebuf, filetype);
4683                         goto next;
4684                 }
4685
4686                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4687                                     struct btrfs_inode_item);
4688                 mode = btrfs_inode_mode(path.nodes[0], ii);
4689
4690                 if (imode_to_type(mode) != filetype) {
4691                         err |= INODE_ITEM_MISMATCH;
4692                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4693                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4694                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4695                               key->offset, name_len, namebuf, filetype);
4696                 }
4697
4698                 /* Check relative INODE_REF/INODE_EXTREF */
4699                 location.type = BTRFS_INODE_REF_KEY;
4700                 location.offset = key->objectid;
4701                 ret = find_inode_ref(root, &location, namebuf, len,
4702                                        index, ext_ref);
4703                 err |= ret;
4704                 if (ret & INODE_REF_MISSING)
4705                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4706                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4707                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4708                               key->offset, name_len, namebuf, filetype);
4709
4710 next:
4711                 btrfs_release_path(&path);
4712                 len = sizeof(*di) + name_len + data_len;
4713                 di = (struct btrfs_dir_item *)((char *)di + len);
4714                 cur += len;
4715
4716                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4717                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4718                               root->objectid, key->objectid, key->offset);
4719                         break;
4720                 }
4721         }
4722
4723         return err;
4724 }
4725
4726 /*
4727  * Check file extent datasum/hole, update the size of the file extents,
4728  * check and update the last offset of the file extent.
4729  *
4730  * @root:       the root of fs/file tree.
4731  * @fkey:       the key of the file extent.
4732  * @nodatasum:  INODE_NODATASUM feature.
4733  * @size:       the sum of all EXTENT_DATA items size for this inode.
4734  * @end:        the offset of the last extent.
4735  *
4736  * Return 0 if no error occurred.
4737  */
4738 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4739                              struct extent_buffer *node, int slot,
4740                              unsigned int nodatasum, u64 *size, u64 *end)
4741 {
4742         struct btrfs_file_extent_item *fi;
4743         u64 disk_bytenr;
4744         u64 disk_num_bytes;
4745         u64 extent_num_bytes;
4746         u64 extent_offset;
4747         u64 csum_found;         /* In byte size, sectorsize aligned */
4748         u64 search_start;       /* Logical range start we search for csum */
4749         u64 search_len;         /* Logical range len we search for csum */
4750         unsigned int extent_type;
4751         unsigned int is_hole;
4752         int compressed = 0;
4753         int ret;
4754         int err = 0;
4755
4756         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4757
4758         /* Check inline extent */
4759         extent_type = btrfs_file_extent_type(node, fi);
4760         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4761                 struct btrfs_item *e = btrfs_item_nr(slot);
4762                 u32 item_inline_len;
4763
4764                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4765                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4766                 compressed = btrfs_file_extent_compression(node, fi);
4767                 if (extent_num_bytes == 0) {
4768                         error(
4769                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4770                                 root->objectid, fkey->objectid, fkey->offset);
4771                         err |= FILE_EXTENT_ERROR;
4772                 }
4773                 if (!compressed && extent_num_bytes != item_inline_len) {
4774                         error(
4775                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4776                                 root->objectid, fkey->objectid, fkey->offset,
4777                                 extent_num_bytes, item_inline_len);
4778                         err |= FILE_EXTENT_ERROR;
4779                 }
4780                 *size += extent_num_bytes;
4781                 return err;
4782         }
4783
4784         /* Check extent type */
4785         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4786                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4787                 err |= FILE_EXTENT_ERROR;
4788                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4789                       root->objectid, fkey->objectid, fkey->offset);
4790                 return err;
4791         }
4792
4793         /* Check REG_EXTENT/PREALLOC_EXTENT */
4794         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4795         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4796         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4797         extent_offset = btrfs_file_extent_offset(node, fi);
4798         compressed = btrfs_file_extent_compression(node, fi);
4799         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4800
4801         /*
4802          * Check EXTENT_DATA csum
4803          *
4804          * For plain (uncompressed) extent, we should only check the range
4805          * we're referring to, as it's possible that part of prealloc extent
4806          * has been written, and has csum:
4807          *
4808          * |<--- Original large preallocated extent A ---->|
4809          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4810          *      No csum                         Has csum
4811          *
4812          * For compressed extent, we should check the whole range.
4813          */
4814         if (!compressed) {
4815                 search_start = disk_bytenr + extent_offset;
4816                 search_len = extent_num_bytes;
4817         } else {
4818                 search_start = disk_bytenr;
4819                 search_len = disk_num_bytes;
4820         }
4821         ret = count_csum_range(root, search_start, search_len, &csum_found);
4822         if (csum_found > 0 && nodatasum) {
4823                 err |= ODD_CSUM_ITEM;
4824                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4825                       root->objectid, fkey->objectid, fkey->offset);
4826         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4827                    !is_hole && (ret < 0 || csum_found < search_len)) {
4828                 err |= CSUM_ITEM_MISSING;
4829                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4830                       root->objectid, fkey->objectid, fkey->offset,
4831                       csum_found, search_len);
4832         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4833                 err |= ODD_CSUM_ITEM;
4834                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4835                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4836         }
4837
4838         /* Check EXTENT_DATA hole */
4839         if (!no_holes && *end != fkey->offset) {
4840                 err |= FILE_EXTENT_ERROR;
4841                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4842                       root->objectid, fkey->objectid, fkey->offset);
4843         }
4844
4845         *end += extent_num_bytes;
4846         if (!is_hole)
4847                 *size += extent_num_bytes;
4848
4849         return err;
4850 }
4851
4852 /*
4853  * Check INODE_ITEM and related ITEMs (the same inode number)
4854  * 1. check link count
4855  * 2. check inode ref/extref
4856  * 3. check dir item/index
4857  *
4858  * @ext_ref:    the EXTENDED_IREF feature
4859  *
4860  * Return 0 if no error occurred.
4861  * Return >0 for error or hit the traversal is done(by error bitmap)
4862  */
4863 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4864                             unsigned int ext_ref)
4865 {
4866         struct extent_buffer *node;
4867         struct btrfs_inode_item *ii;
4868         struct btrfs_key key;
4869         u64 inode_id;
4870         u32 mode;
4871         u64 nlink;
4872         u64 nbytes;
4873         u64 isize;
4874         u64 size = 0;
4875         u64 refs = 0;
4876         u64 extent_end = 0;
4877         u64 extent_size = 0;
4878         unsigned int dir;
4879         unsigned int nodatasum;
4880         int slot;
4881         int ret;
4882         int err = 0;
4883
4884         node = path->nodes[0];
4885         slot = path->slots[0];
4886
4887         btrfs_item_key_to_cpu(node, &key, slot);
4888         inode_id = key.objectid;
4889
4890         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4891                 ret = btrfs_next_item(root, path);
4892                 if (ret > 0)
4893                         err |= LAST_ITEM;
4894                 return err;
4895         }
4896
4897         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4898         isize = btrfs_inode_size(node, ii);
4899         nbytes = btrfs_inode_nbytes(node, ii);
4900         mode = btrfs_inode_mode(node, ii);
4901         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4902         nlink = btrfs_inode_nlink(node, ii);
4903         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4904
4905         while (1) {
4906                 ret = btrfs_next_item(root, path);
4907                 if (ret < 0) {
4908                         /* out will fill 'err' rusing current statistics */
4909                         goto out;
4910                 } else if (ret > 0) {
4911                         err |= LAST_ITEM;
4912                         goto out;
4913                 }
4914
4915                 node = path->nodes[0];
4916                 slot = path->slots[0];
4917                 btrfs_item_key_to_cpu(node, &key, slot);
4918                 if (key.objectid != inode_id)
4919                         goto out;
4920
4921                 switch (key.type) {
4922                 case BTRFS_INODE_REF_KEY:
4923                         ret = check_inode_ref(root, &key, node, slot, &refs,
4924                                               mode);
4925                         err |= ret;
4926                         break;
4927                 case BTRFS_INODE_EXTREF_KEY:
4928                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4929                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4930                                         root->objectid, key.objectid,
4931                                         key.offset);
4932                         ret = check_inode_extref(root, &key, node, slot, &refs,
4933                                                  mode);
4934                         err |= ret;
4935                         break;
4936                 case BTRFS_DIR_ITEM_KEY:
4937                 case BTRFS_DIR_INDEX_KEY:
4938                         if (!dir) {
4939                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4940                                         root->objectid, inode_id,
4941                                         imode_to_type(mode), key.objectid,
4942                                         key.offset);
4943                         }
4944                         ret = check_dir_item(root, &key, node, slot, &size,
4945                                              ext_ref);
4946                         err |= ret;
4947                         break;
4948                 case BTRFS_EXTENT_DATA_KEY:
4949                         if (dir) {
4950                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4951                                         root->objectid, inode_id, key.objectid,
4952                                         key.offset);
4953                         }
4954                         ret = check_file_extent(root, &key, node, slot,
4955                                                 nodatasum, &extent_size,
4956                                                 &extent_end);
4957                         err |= ret;
4958                         break;
4959                 case BTRFS_XATTR_ITEM_KEY:
4960                         break;
4961                 default:
4962                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4963                               key.objectid, key.type, key.offset);
4964                 }
4965         }
4966
4967 out:
4968         /* verify INODE_ITEM nlink/isize/nbytes */
4969         if (dir) {
4970                 if (nlink != 1) {
4971                         err |= LINK_COUNT_ERROR;
4972                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4973                               root->objectid, inode_id, nlink);
4974                 }
4975
4976                 /*
4977                  * Just a warning, as dir inode nbytes is just an
4978                  * instructive value.
4979                  */
4980                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4981                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4982                                 root->objectid, inode_id, root->nodesize);
4983                 }
4984
4985                 if (isize != size) {
4986                         err |= ISIZE_ERROR;
4987                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4988                               root->objectid, inode_id, isize, size);
4989                 }
4990         } else {
4991                 if (nlink != refs) {
4992                         err |= LINK_COUNT_ERROR;
4993                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4994                               root->objectid, inode_id, nlink, refs);
4995                 } else if (!nlink) {
4996                         err |= ORPHAN_ITEM;
4997                 }
4998
4999                 if (!nbytes && !no_holes && extent_end < isize) {
5000                         err |= NBYTES_ERROR;
5001                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5002                               root->objectid, inode_id, isize);
5003                 }
5004
5005                 if (nbytes != extent_size) {
5006                         err |= NBYTES_ERROR;
5007                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5008                               root->objectid, inode_id, nbytes, extent_size);
5009                 }
5010         }
5011
5012         return err;
5013 }
5014
5015 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5016 {
5017         struct btrfs_path path;
5018         struct btrfs_key key;
5019         int err = 0;
5020         int ret;
5021
5022         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5023         key.type = BTRFS_INODE_ITEM_KEY;
5024         key.offset = 0;
5025
5026         /* For root being dropped, we don't need to check first inode */
5027         if (btrfs_root_refs(&root->root_item) == 0 &&
5028             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5029             key.objectid)
5030                 return 0;
5031
5032         btrfs_init_path(&path);
5033
5034         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5035         if (ret < 0)
5036                 goto out;
5037         if (ret > 0) {
5038                 ret = 0;
5039                 err |= INODE_ITEM_MISSING;
5040                 error("first inode item of root %llu is missing",
5041                       root->objectid);
5042         }
5043
5044         err |= check_inode_item(root, &path, ext_ref);
5045         err &= ~LAST_ITEM;
5046         if (err && !ret)
5047                 ret = -EIO;
5048 out:
5049         btrfs_release_path(&path);
5050         return ret;
5051 }
5052
5053 /*
5054  * Iterate all item on the tree and call check_inode_item() to check.
5055  *
5056  * @root:       the root of the tree to be checked.
5057  * @ext_ref:    the EXTENDED_IREF feature
5058  *
5059  * Return 0 if no error found.
5060  * Return <0 for error.
5061  */
5062 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5063 {
5064         struct btrfs_path path;
5065         struct node_refs nrefs;
5066         struct btrfs_root_item *root_item = &root->root_item;
5067         int ret;
5068         int level;
5069         int err = 0;
5070
5071         /*
5072          * We need to manually check the first inode item(256)
5073          * As the following traversal function will only start from
5074          * the first inode item in the leaf, if inode item(256) is missing
5075          * we will just skip it forever.
5076          */
5077         ret = check_fs_first_inode(root, ext_ref);
5078         if (ret < 0)
5079                 return ret;
5080
5081         memset(&nrefs, 0, sizeof(nrefs));
5082         level = btrfs_header_level(root->node);
5083         btrfs_init_path(&path);
5084
5085         if (btrfs_root_refs(root_item) > 0 ||
5086             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5087                 path.nodes[level] = root->node;
5088                 path.slots[level] = 0;
5089                 extent_buffer_get(root->node);
5090         } else {
5091                 struct btrfs_key key;
5092
5093                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5094                 level = root_item->drop_level;
5095                 path.lowest_level = level;
5096                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5097                 if (ret < 0)
5098                         goto out;
5099                 ret = 0;
5100         }
5101
5102         while (1) {
5103                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5104                 err |= !!ret;
5105
5106                 /* if ret is negative, walk shall stop */
5107                 if (ret < 0) {
5108                         ret = err;
5109                         break;
5110                 }
5111
5112                 ret = walk_up_tree_v2(root, &path, &level);
5113                 if (ret != 0) {
5114                         /* Normal exit, reset ret to err */
5115                         ret = err;
5116                         break;
5117                 }
5118         }
5119
5120 out:
5121         btrfs_release_path(&path);
5122         return ret;
5123 }
5124
5125 /*
5126  * Find the relative ref for root_ref and root_backref.
5127  *
5128  * @root:       the root of the root tree.
5129  * @ref_key:    the key of the root ref.
5130  *
5131  * Return 0 if no error occurred.
5132  */
5133 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5134                           struct extent_buffer *node, int slot)
5135 {
5136         struct btrfs_path path;
5137         struct btrfs_key key;
5138         struct btrfs_root_ref *ref;
5139         struct btrfs_root_ref *backref;
5140         char ref_name[BTRFS_NAME_LEN] = {0};
5141         char backref_name[BTRFS_NAME_LEN] = {0};
5142         u64 ref_dirid;
5143         u64 ref_seq;
5144         u32 ref_namelen;
5145         u64 backref_dirid;
5146         u64 backref_seq;
5147         u32 backref_namelen;
5148         u32 len;
5149         int ret;
5150         int err = 0;
5151
5152         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5153         ref_dirid = btrfs_root_ref_dirid(node, ref);
5154         ref_seq = btrfs_root_ref_sequence(node, ref);
5155         ref_namelen = btrfs_root_ref_name_len(node, ref);
5156
5157         if (ref_namelen <= BTRFS_NAME_LEN) {
5158                 len = ref_namelen;
5159         } else {
5160                 len = BTRFS_NAME_LEN;
5161                 warning("%s[%llu %llu] ref_name too long",
5162                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5163                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5164                         ref_key->offset);
5165         }
5166         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5167
5168         /* Find relative root_ref */
5169         key.objectid = ref_key->offset;
5170         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5171         key.offset = ref_key->objectid;
5172
5173         btrfs_init_path(&path);
5174         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5175         if (ret) {
5176                 err |= ROOT_REF_MISSING;
5177                 error("%s[%llu %llu] couldn't find relative ref",
5178                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5179                       "ROOT_REF" : "ROOT_BACKREF",
5180                       ref_key->objectid, ref_key->offset);
5181                 goto out;
5182         }
5183
5184         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5185                                  struct btrfs_root_ref);
5186         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5187         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5188         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5189
5190         if (backref_namelen <= BTRFS_NAME_LEN) {
5191                 len = backref_namelen;
5192         } else {
5193                 len = BTRFS_NAME_LEN;
5194                 warning("%s[%llu %llu] ref_name too long",
5195                         key.type == BTRFS_ROOT_REF_KEY ?
5196                         "ROOT_REF" : "ROOT_BACKREF",
5197                         key.objectid, key.offset);
5198         }
5199         read_extent_buffer(path.nodes[0], backref_name,
5200                            (unsigned long)(backref + 1), len);
5201
5202         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5203             ref_namelen != backref_namelen ||
5204             strncmp(ref_name, backref_name, len)) {
5205                 err |= ROOT_REF_MISMATCH;
5206                 error("%s[%llu %llu] mismatch relative ref",
5207                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5208                       "ROOT_REF" : "ROOT_BACKREF",
5209                       ref_key->objectid, ref_key->offset);
5210         }
5211 out:
5212         btrfs_release_path(&path);
5213         return err;
5214 }
5215
5216 /*
5217  * Check all fs/file tree in low_memory mode.
5218  *
5219  * 1. for fs tree root item, call check_fs_root_v2()
5220  * 2. for fs tree root ref/backref, call check_root_ref()
5221  *
5222  * Return 0 if no error occurred.
5223  */
5224 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5225 {
5226         struct btrfs_root *tree_root = fs_info->tree_root;
5227         struct btrfs_root *cur_root = NULL;
5228         struct btrfs_path path;
5229         struct btrfs_key key;
5230         struct extent_buffer *node;
5231         unsigned int ext_ref;
5232         int slot;
5233         int ret;
5234         int err = 0;
5235
5236         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5237
5238         btrfs_init_path(&path);
5239         key.objectid = BTRFS_FS_TREE_OBJECTID;
5240         key.offset = 0;
5241         key.type = BTRFS_ROOT_ITEM_KEY;
5242
5243         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5244         if (ret < 0) {
5245                 err = ret;
5246                 goto out;
5247         } else if (ret > 0) {
5248                 err = -ENOENT;
5249                 goto out;
5250         }
5251
5252         while (1) {
5253                 node = path.nodes[0];
5254                 slot = path.slots[0];
5255                 btrfs_item_key_to_cpu(node, &key, slot);
5256                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5257                         goto out;
5258                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5259                     fs_root_objectid(key.objectid)) {
5260                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5261                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5262                                                                        &key);
5263                         } else {
5264                                 key.offset = (u64)-1;
5265                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5266                         }
5267
5268                         if (IS_ERR(cur_root)) {
5269                                 error("Fail to read fs/subvol tree: %lld",
5270                                       key.objectid);
5271                                 err = -EIO;
5272                                 goto next;
5273                         }
5274
5275                         ret = check_fs_root_v2(cur_root, ext_ref);
5276                         err |= ret;
5277
5278                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5279                                 btrfs_free_fs_root(cur_root);
5280                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5281                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5282                         ret = check_root_ref(tree_root, &key, node, slot);
5283                         err |= ret;
5284                 }
5285 next:
5286                 ret = btrfs_next_item(tree_root, &path);
5287                 if (ret > 0)
5288                         goto out;
5289                 if (ret < 0) {
5290                         err = ret;
5291                         goto out;
5292                 }
5293         }
5294
5295 out:
5296         btrfs_release_path(&path);
5297         return err;
5298 }
5299
5300 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5301 {
5302         struct list_head *cur = rec->backrefs.next;
5303         struct extent_backref *back;
5304         struct tree_backref *tback;
5305         struct data_backref *dback;
5306         u64 found = 0;
5307         int err = 0;
5308
5309         while(cur != &rec->backrefs) {
5310                 back = to_extent_backref(cur);
5311                 cur = cur->next;
5312                 if (!back->found_extent_tree) {
5313                         err = 1;
5314                         if (!print_errs)
5315                                 goto out;
5316                         if (back->is_data) {
5317                                 dback = to_data_backref(back);
5318                                 fprintf(stderr, "Backref %llu %s %llu"
5319                                         " owner %llu offset %llu num_refs %lu"
5320                                         " not found in extent tree\n",
5321                                         (unsigned long long)rec->start,
5322                                         back->full_backref ?
5323                                         "parent" : "root",
5324                                         back->full_backref ?
5325                                         (unsigned long long)dback->parent:
5326                                         (unsigned long long)dback->root,
5327                                         (unsigned long long)dback->owner,
5328                                         (unsigned long long)dback->offset,
5329                                         (unsigned long)dback->num_refs);
5330                         } else {
5331                                 tback = to_tree_backref(back);
5332                                 fprintf(stderr, "Backref %llu parent %llu"
5333                                         " root %llu not found in extent tree\n",
5334                                         (unsigned long long)rec->start,
5335                                         (unsigned long long)tback->parent,
5336                                         (unsigned long long)tback->root);
5337                         }
5338                 }
5339                 if (!back->is_data && !back->found_ref) {
5340                         err = 1;
5341                         if (!print_errs)
5342                                 goto out;
5343                         tback = to_tree_backref(back);
5344                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5345                                 (unsigned long long)rec->start,
5346                                 back->full_backref ? "parent" : "root",
5347                                 back->full_backref ?
5348                                 (unsigned long long)tback->parent :
5349                                 (unsigned long long)tback->root, back);
5350                 }
5351                 if (back->is_data) {
5352                         dback = to_data_backref(back);
5353                         if (dback->found_ref != dback->num_refs) {
5354                                 err = 1;
5355                                 if (!print_errs)
5356                                         goto out;
5357                                 fprintf(stderr, "Incorrect local backref count"
5358                                         " on %llu %s %llu owner %llu"
5359                                         " offset %llu found %u wanted %u back %p\n",
5360                                         (unsigned long long)rec->start,
5361                                         back->full_backref ?
5362                                         "parent" : "root",
5363                                         back->full_backref ?
5364                                         (unsigned long long)dback->parent:
5365                                         (unsigned long long)dback->root,
5366                                         (unsigned long long)dback->owner,
5367                                         (unsigned long long)dback->offset,
5368                                         dback->found_ref, dback->num_refs, back);
5369                         }
5370                         if (dback->disk_bytenr != rec->start) {
5371                                 err = 1;
5372                                 if (!print_errs)
5373                                         goto out;
5374                                 fprintf(stderr, "Backref disk bytenr does not"
5375                                         " match extent record, bytenr=%llu, "
5376                                         "ref bytenr=%llu\n",
5377                                         (unsigned long long)rec->start,
5378                                         (unsigned long long)dback->disk_bytenr);
5379                         }
5380
5381                         if (dback->bytes != rec->nr) {
5382                                 err = 1;
5383                                 if (!print_errs)
5384                                         goto out;
5385                                 fprintf(stderr, "Backref bytes do not match "
5386                                         "extent backref, bytenr=%llu, ref "
5387                                         "bytes=%llu, backref bytes=%llu\n",
5388                                         (unsigned long long)rec->start,
5389                                         (unsigned long long)rec->nr,
5390                                         (unsigned long long)dback->bytes);
5391                         }
5392                 }
5393                 if (!back->is_data) {
5394                         found += 1;
5395                 } else {
5396                         dback = to_data_backref(back);
5397                         found += dback->found_ref;
5398                 }
5399         }
5400         if (found != rec->refs) {
5401                 err = 1;
5402                 if (!print_errs)
5403                         goto out;
5404                 fprintf(stderr, "Incorrect global backref count "
5405                         "on %llu found %llu wanted %llu\n",
5406                         (unsigned long long)rec->start,
5407                         (unsigned long long)found,
5408                         (unsigned long long)rec->refs);
5409         }
5410 out:
5411         return err;
5412 }
5413
5414 static int free_all_extent_backrefs(struct extent_record *rec)
5415 {
5416         struct extent_backref *back;
5417         struct list_head *cur;
5418         while (!list_empty(&rec->backrefs)) {
5419                 cur = rec->backrefs.next;
5420                 back = to_extent_backref(cur);
5421                 list_del(cur);
5422                 free(back);
5423         }
5424         return 0;
5425 }
5426
5427 static void free_extent_record_cache(struct cache_tree *extent_cache)
5428 {
5429         struct cache_extent *cache;
5430         struct extent_record *rec;
5431
5432         while (1) {
5433                 cache = first_cache_extent(extent_cache);
5434                 if (!cache)
5435                         break;
5436                 rec = container_of(cache, struct extent_record, cache);
5437                 remove_cache_extent(extent_cache, cache);
5438                 free_all_extent_backrefs(rec);
5439                 free(rec);
5440         }
5441 }
5442
5443 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5444                                  struct extent_record *rec)
5445 {
5446         if (rec->content_checked && rec->owner_ref_checked &&
5447             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5448             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5449             !rec->bad_full_backref && !rec->crossing_stripes &&
5450             !rec->wrong_chunk_type) {
5451                 remove_cache_extent(extent_cache, &rec->cache);
5452                 free_all_extent_backrefs(rec);
5453                 list_del_init(&rec->list);
5454                 free(rec);
5455         }
5456         return 0;
5457 }
5458
5459 static int check_owner_ref(struct btrfs_root *root,
5460                             struct extent_record *rec,
5461                             struct extent_buffer *buf)
5462 {
5463         struct extent_backref *node;
5464         struct tree_backref *back;
5465         struct btrfs_root *ref_root;
5466         struct btrfs_key key;
5467         struct btrfs_path path;
5468         struct extent_buffer *parent;
5469         int level;
5470         int found = 0;
5471         int ret;
5472
5473         list_for_each_entry(node, &rec->backrefs, list) {
5474                 if (node->is_data)
5475                         continue;
5476                 if (!node->found_ref)
5477                         continue;
5478                 if (node->full_backref)
5479                         continue;
5480                 back = to_tree_backref(node);
5481                 if (btrfs_header_owner(buf) == back->root)
5482                         return 0;
5483         }
5484         BUG_ON(rec->is_root);
5485
5486         /* try to find the block by search corresponding fs tree */
5487         key.objectid = btrfs_header_owner(buf);
5488         key.type = BTRFS_ROOT_ITEM_KEY;
5489         key.offset = (u64)-1;
5490
5491         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5492         if (IS_ERR(ref_root))
5493                 return 1;
5494
5495         level = btrfs_header_level(buf);
5496         if (level == 0)
5497                 btrfs_item_key_to_cpu(buf, &key, 0);
5498         else
5499                 btrfs_node_key_to_cpu(buf, &key, 0);
5500
5501         btrfs_init_path(&path);
5502         path.lowest_level = level + 1;
5503         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5504         if (ret < 0)
5505                 return 0;
5506
5507         parent = path.nodes[level + 1];
5508         if (parent && buf->start == btrfs_node_blockptr(parent,
5509                                                         path.slots[level + 1]))
5510                 found = 1;
5511
5512         btrfs_release_path(&path);
5513         return found ? 0 : 1;
5514 }
5515
5516 static int is_extent_tree_record(struct extent_record *rec)
5517 {
5518         struct list_head *cur = rec->backrefs.next;
5519         struct extent_backref *node;
5520         struct tree_backref *back;
5521         int is_extent = 0;
5522
5523         while(cur != &rec->backrefs) {
5524                 node = to_extent_backref(cur);
5525                 cur = cur->next;
5526                 if (node->is_data)
5527                         return 0;
5528                 back = to_tree_backref(node);
5529                 if (node->full_backref)
5530                         return 0;
5531                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5532                         is_extent = 1;
5533         }
5534         return is_extent;
5535 }
5536
5537
5538 static int record_bad_block_io(struct btrfs_fs_info *info,
5539                                struct cache_tree *extent_cache,
5540                                u64 start, u64 len)
5541 {
5542         struct extent_record *rec;
5543         struct cache_extent *cache;
5544         struct btrfs_key key;
5545
5546         cache = lookup_cache_extent(extent_cache, start, len);
5547         if (!cache)
5548                 return 0;
5549
5550         rec = container_of(cache, struct extent_record, cache);
5551         if (!is_extent_tree_record(rec))
5552                 return 0;
5553
5554         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5555         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5556 }
5557
5558 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5559                        struct extent_buffer *buf, int slot)
5560 {
5561         if (btrfs_header_level(buf)) {
5562                 struct btrfs_key_ptr ptr1, ptr2;
5563
5564                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5565                                    sizeof(struct btrfs_key_ptr));
5566                 read_extent_buffer(buf, &ptr2,
5567                                    btrfs_node_key_ptr_offset(slot + 1),
5568                                    sizeof(struct btrfs_key_ptr));
5569                 write_extent_buffer(buf, &ptr1,
5570                                     btrfs_node_key_ptr_offset(slot + 1),
5571                                     sizeof(struct btrfs_key_ptr));
5572                 write_extent_buffer(buf, &ptr2,
5573                                     btrfs_node_key_ptr_offset(slot),
5574                                     sizeof(struct btrfs_key_ptr));
5575                 if (slot == 0) {
5576                         struct btrfs_disk_key key;
5577                         btrfs_node_key(buf, &key, 0);
5578                         btrfs_fixup_low_keys(root, path, &key,
5579                                              btrfs_header_level(buf) + 1);
5580                 }
5581         } else {
5582                 struct btrfs_item *item1, *item2;
5583                 struct btrfs_key k1, k2;
5584                 char *item1_data, *item2_data;
5585                 u32 item1_offset, item2_offset, item1_size, item2_size;
5586
5587                 item1 = btrfs_item_nr(slot);
5588                 item2 = btrfs_item_nr(slot + 1);
5589                 btrfs_item_key_to_cpu(buf, &k1, slot);
5590                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5591                 item1_offset = btrfs_item_offset(buf, item1);
5592                 item2_offset = btrfs_item_offset(buf, item2);
5593                 item1_size = btrfs_item_size(buf, item1);
5594                 item2_size = btrfs_item_size(buf, item2);
5595
5596                 item1_data = malloc(item1_size);
5597                 if (!item1_data)
5598                         return -ENOMEM;
5599                 item2_data = malloc(item2_size);
5600                 if (!item2_data) {
5601                         free(item1_data);
5602                         return -ENOMEM;
5603                 }
5604
5605                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5606                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5607
5608                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5609                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5610                 free(item1_data);
5611                 free(item2_data);
5612
5613                 btrfs_set_item_offset(buf, item1, item2_offset);
5614                 btrfs_set_item_offset(buf, item2, item1_offset);
5615                 btrfs_set_item_size(buf, item1, item2_size);
5616                 btrfs_set_item_size(buf, item2, item1_size);
5617
5618                 path->slots[0] = slot;
5619                 btrfs_set_item_key_unsafe(root, path, &k2);
5620                 path->slots[0] = slot + 1;
5621                 btrfs_set_item_key_unsafe(root, path, &k1);
5622         }
5623         return 0;
5624 }
5625
5626 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5627 {
5628         struct extent_buffer *buf;
5629         struct btrfs_key k1, k2;
5630         int i;
5631         int level = path->lowest_level;
5632         int ret = -EIO;
5633
5634         buf = path->nodes[level];
5635         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5636                 if (level) {
5637                         btrfs_node_key_to_cpu(buf, &k1, i);
5638                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5639                 } else {
5640                         btrfs_item_key_to_cpu(buf, &k1, i);
5641                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5642                 }
5643                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5644                         continue;
5645                 ret = swap_values(root, path, buf, i);
5646                 if (ret)
5647                         break;
5648                 btrfs_mark_buffer_dirty(buf);
5649                 i = 0;
5650         }
5651         return ret;
5652 }
5653
5654 static int delete_bogus_item(struct btrfs_root *root,
5655                              struct btrfs_path *path,
5656                              struct extent_buffer *buf, int slot)
5657 {
5658         struct btrfs_key key;
5659         int nritems = btrfs_header_nritems(buf);
5660
5661         btrfs_item_key_to_cpu(buf, &key, slot);
5662
5663         /* These are all the keys we can deal with missing. */
5664         if (key.type != BTRFS_DIR_INDEX_KEY &&
5665             key.type != BTRFS_EXTENT_ITEM_KEY &&
5666             key.type != BTRFS_METADATA_ITEM_KEY &&
5667             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5668             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5669                 return -1;
5670
5671         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5672                (unsigned long long)key.objectid, key.type,
5673                (unsigned long long)key.offset, slot, buf->start);
5674         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5675                               btrfs_item_nr_offset(slot + 1),
5676                               sizeof(struct btrfs_item) *
5677                               (nritems - slot - 1));
5678         btrfs_set_header_nritems(buf, nritems - 1);
5679         if (slot == 0) {
5680                 struct btrfs_disk_key disk_key;
5681
5682                 btrfs_item_key(buf, &disk_key, 0);
5683                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5684         }
5685         btrfs_mark_buffer_dirty(buf);
5686         return 0;
5687 }
5688
5689 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5690 {
5691         struct extent_buffer *buf;
5692         int i;
5693         int ret = 0;
5694
5695         /* We should only get this for leaves */
5696         BUG_ON(path->lowest_level);
5697         buf = path->nodes[0];
5698 again:
5699         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5700                 unsigned int shift = 0, offset;
5701
5702                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5703                     BTRFS_LEAF_DATA_SIZE(root)) {
5704                         if (btrfs_item_end_nr(buf, i) >
5705                             BTRFS_LEAF_DATA_SIZE(root)) {
5706                                 ret = delete_bogus_item(root, path, buf, i);
5707                                 if (!ret)
5708                                         goto again;
5709                                 fprintf(stderr, "item is off the end of the "
5710                                         "leaf, can't fix\n");
5711                                 ret = -EIO;
5712                                 break;
5713                         }
5714                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5715                                 btrfs_item_end_nr(buf, i);
5716                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5717                            btrfs_item_offset_nr(buf, i - 1)) {
5718                         if (btrfs_item_end_nr(buf, i) >
5719                             btrfs_item_offset_nr(buf, i - 1)) {
5720                                 ret = delete_bogus_item(root, path, buf, i);
5721                                 if (!ret)
5722                                         goto again;
5723                                 fprintf(stderr, "items overlap, can't fix\n");
5724                                 ret = -EIO;
5725                                 break;
5726                         }
5727                         shift = btrfs_item_offset_nr(buf, i - 1) -
5728                                 btrfs_item_end_nr(buf, i);
5729                 }
5730                 if (!shift)
5731                         continue;
5732
5733                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5734                        i, shift, (unsigned long long)buf->start);
5735                 offset = btrfs_item_offset_nr(buf, i);
5736                 memmove_extent_buffer(buf,
5737                                       btrfs_leaf_data(buf) + offset + shift,
5738                                       btrfs_leaf_data(buf) + offset,
5739                                       btrfs_item_size_nr(buf, i));
5740                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5741                                       offset + shift);
5742                 btrfs_mark_buffer_dirty(buf);
5743         }
5744
5745         /*
5746          * We may have moved things, in which case we want to exit so we don't
5747          * write those changes out.  Once we have proper abort functionality in
5748          * progs this can be changed to something nicer.
5749          */
5750         BUG_ON(ret);
5751         return ret;
5752 }
5753
5754 /*
5755  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5756  * then just return -EIO.
5757  */
5758 static int try_to_fix_bad_block(struct btrfs_root *root,
5759                                 struct extent_buffer *buf,
5760                                 enum btrfs_tree_block_status status)
5761 {
5762         struct btrfs_trans_handle *trans;
5763         struct ulist *roots;
5764         struct ulist_node *node;
5765         struct btrfs_root *search_root;
5766         struct btrfs_path path;
5767         struct ulist_iterator iter;
5768         struct btrfs_key root_key, key;
5769         int ret;
5770
5771         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5772             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5773                 return -EIO;
5774
5775         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5776         if (ret)
5777                 return -EIO;
5778
5779         btrfs_init_path(&path);
5780         ULIST_ITER_INIT(&iter);
5781         while ((node = ulist_next(roots, &iter))) {
5782                 root_key.objectid = node->val;
5783                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5784                 root_key.offset = (u64)-1;
5785
5786                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5787                 if (IS_ERR(root)) {
5788                         ret = -EIO;
5789                         break;
5790                 }
5791
5792
5793                 trans = btrfs_start_transaction(search_root, 0);
5794                 if (IS_ERR(trans)) {
5795                         ret = PTR_ERR(trans);
5796                         break;
5797                 }
5798
5799                 path.lowest_level = btrfs_header_level(buf);
5800                 path.skip_check_block = 1;
5801                 if (path.lowest_level)
5802                         btrfs_node_key_to_cpu(buf, &key, 0);
5803                 else
5804                         btrfs_item_key_to_cpu(buf, &key, 0);
5805                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5806                 if (ret) {
5807                         ret = -EIO;
5808                         btrfs_commit_transaction(trans, search_root);
5809                         break;
5810                 }
5811                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5812                         ret = fix_key_order(search_root, &path);
5813                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5814                         ret = fix_item_offset(search_root, &path);
5815                 if (ret) {
5816                         btrfs_commit_transaction(trans, search_root);
5817                         break;
5818                 }
5819                 btrfs_release_path(&path);
5820                 btrfs_commit_transaction(trans, search_root);
5821         }
5822         ulist_free(roots);
5823         btrfs_release_path(&path);
5824         return ret;
5825 }
5826
5827 static int check_block(struct btrfs_root *root,
5828                        struct cache_tree *extent_cache,
5829                        struct extent_buffer *buf, u64 flags)
5830 {
5831         struct extent_record *rec;
5832         struct cache_extent *cache;
5833         struct btrfs_key key;
5834         enum btrfs_tree_block_status status;
5835         int ret = 0;
5836         int level;
5837
5838         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5839         if (!cache)
5840                 return 1;
5841         rec = container_of(cache, struct extent_record, cache);
5842         rec->generation = btrfs_header_generation(buf);
5843
5844         level = btrfs_header_level(buf);
5845         if (btrfs_header_nritems(buf) > 0) {
5846
5847                 if (level == 0)
5848                         btrfs_item_key_to_cpu(buf, &key, 0);
5849                 else
5850                         btrfs_node_key_to_cpu(buf, &key, 0);
5851
5852                 rec->info_objectid = key.objectid;
5853         }
5854         rec->info_level = level;
5855
5856         if (btrfs_is_leaf(buf))
5857                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5858         else
5859                 status = btrfs_check_node(root, &rec->parent_key, buf);
5860
5861         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5862                 if (repair)
5863                         status = try_to_fix_bad_block(root, buf, status);
5864                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5865                         ret = -EIO;
5866                         fprintf(stderr, "bad block %llu\n",
5867                                 (unsigned long long)buf->start);
5868                 } else {
5869                         /*
5870                          * Signal to callers we need to start the scan over
5871                          * again since we'll have cowed blocks.
5872                          */
5873                         ret = -EAGAIN;
5874                 }
5875         } else {
5876                 rec->content_checked = 1;
5877                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5878                         rec->owner_ref_checked = 1;
5879                 else {
5880                         ret = check_owner_ref(root, rec, buf);
5881                         if (!ret)
5882                                 rec->owner_ref_checked = 1;
5883                 }
5884         }
5885         if (!ret)
5886                 maybe_free_extent_rec(extent_cache, rec);
5887         return ret;
5888 }
5889
5890 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5891                                                 u64 parent, u64 root)
5892 {
5893         struct list_head *cur = rec->backrefs.next;
5894         struct extent_backref *node;
5895         struct tree_backref *back;
5896
5897         while(cur != &rec->backrefs) {
5898                 node = to_extent_backref(cur);
5899                 cur = cur->next;
5900                 if (node->is_data)
5901                         continue;
5902                 back = to_tree_backref(node);
5903                 if (parent > 0) {
5904                         if (!node->full_backref)
5905                                 continue;
5906                         if (parent == back->parent)
5907                                 return back;
5908                 } else {
5909                         if (node->full_backref)
5910                                 continue;
5911                         if (back->root == root)
5912                                 return back;
5913                 }
5914         }
5915         return NULL;
5916 }
5917
5918 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5919                                                 u64 parent, u64 root)
5920 {
5921         struct tree_backref *ref = malloc(sizeof(*ref));
5922
5923         if (!ref)
5924                 return NULL;
5925         memset(&ref->node, 0, sizeof(ref->node));
5926         if (parent > 0) {
5927                 ref->parent = parent;
5928                 ref->node.full_backref = 1;
5929         } else {
5930                 ref->root = root;
5931                 ref->node.full_backref = 0;
5932         }
5933         list_add_tail(&ref->node.list, &rec->backrefs);
5934
5935         return ref;
5936 }
5937
5938 static struct data_backref *find_data_backref(struct extent_record *rec,
5939                                                 u64 parent, u64 root,
5940                                                 u64 owner, u64 offset,
5941                                                 int found_ref,
5942                                                 u64 disk_bytenr, u64 bytes)
5943 {
5944         struct list_head *cur = rec->backrefs.next;
5945         struct extent_backref *node;
5946         struct data_backref *back;
5947
5948         while(cur != &rec->backrefs) {
5949                 node = to_extent_backref(cur);
5950                 cur = cur->next;
5951                 if (!node->is_data)
5952                         continue;
5953                 back = to_data_backref(node);
5954                 if (parent > 0) {
5955                         if (!node->full_backref)
5956                                 continue;
5957                         if (parent == back->parent)
5958                                 return back;
5959                 } else {
5960                         if (node->full_backref)
5961                                 continue;
5962                         if (back->root == root && back->owner == owner &&
5963                             back->offset == offset) {
5964                                 if (found_ref && node->found_ref &&
5965                                     (back->bytes != bytes ||
5966                                     back->disk_bytenr != disk_bytenr))
5967                                         continue;
5968                                 return back;
5969                         }
5970                 }
5971         }
5972         return NULL;
5973 }
5974
5975 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5976                                                 u64 parent, u64 root,
5977                                                 u64 owner, u64 offset,
5978                                                 u64 max_size)
5979 {
5980         struct data_backref *ref = malloc(sizeof(*ref));
5981
5982         if (!ref)
5983                 return NULL;
5984         memset(&ref->node, 0, sizeof(ref->node));
5985         ref->node.is_data = 1;
5986
5987         if (parent > 0) {
5988                 ref->parent = parent;
5989                 ref->owner = 0;
5990                 ref->offset = 0;
5991                 ref->node.full_backref = 1;
5992         } else {
5993                 ref->root = root;
5994                 ref->owner = owner;
5995                 ref->offset = offset;
5996                 ref->node.full_backref = 0;
5997         }
5998         ref->bytes = max_size;
5999         ref->found_ref = 0;
6000         ref->num_refs = 0;
6001         list_add_tail(&ref->node.list, &rec->backrefs);
6002         if (max_size > rec->max_size)
6003                 rec->max_size = max_size;
6004         return ref;
6005 }
6006
6007 /* Check if the type of extent matches with its chunk */
6008 static void check_extent_type(struct extent_record *rec)
6009 {
6010         struct btrfs_block_group_cache *bg_cache;
6011
6012         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6013         if (!bg_cache)
6014                 return;
6015
6016         /* data extent, check chunk directly*/
6017         if (!rec->metadata) {
6018                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6019                         rec->wrong_chunk_type = 1;
6020                 return;
6021         }
6022
6023         /* metadata extent, check the obvious case first */
6024         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6025                                  BTRFS_BLOCK_GROUP_METADATA))) {
6026                 rec->wrong_chunk_type = 1;
6027                 return;
6028         }
6029
6030         /*
6031          * Check SYSTEM extent, as it's also marked as metadata, we can only
6032          * make sure it's a SYSTEM extent by its backref
6033          */
6034         if (!list_empty(&rec->backrefs)) {
6035                 struct extent_backref *node;
6036                 struct tree_backref *tback;
6037                 u64 bg_type;
6038
6039                 node = to_extent_backref(rec->backrefs.next);
6040                 if (node->is_data) {
6041                         /* tree block shouldn't have data backref */
6042                         rec->wrong_chunk_type = 1;
6043                         return;
6044                 }
6045                 tback = container_of(node, struct tree_backref, node);
6046
6047                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6048                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6049                 else
6050                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6051                 if (!(bg_cache->flags & bg_type))
6052                         rec->wrong_chunk_type = 1;
6053         }
6054 }
6055
6056 /*
6057  * Allocate a new extent record, fill default values from @tmpl and insert int
6058  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6059  * the cache, otherwise it fails.
6060  */
6061 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6062                 struct extent_record *tmpl)
6063 {
6064         struct extent_record *rec;
6065         int ret = 0;
6066
6067         BUG_ON(tmpl->max_size == 0);
6068         rec = malloc(sizeof(*rec));
6069         if (!rec)
6070                 return -ENOMEM;
6071         rec->start = tmpl->start;
6072         rec->max_size = tmpl->max_size;
6073         rec->nr = max(tmpl->nr, tmpl->max_size);
6074         rec->found_rec = tmpl->found_rec;
6075         rec->content_checked = tmpl->content_checked;
6076         rec->owner_ref_checked = tmpl->owner_ref_checked;
6077         rec->num_duplicates = 0;
6078         rec->metadata = tmpl->metadata;
6079         rec->flag_block_full_backref = FLAG_UNSET;
6080         rec->bad_full_backref = 0;
6081         rec->crossing_stripes = 0;
6082         rec->wrong_chunk_type = 0;
6083         rec->is_root = tmpl->is_root;
6084         rec->refs = tmpl->refs;
6085         rec->extent_item_refs = tmpl->extent_item_refs;
6086         rec->parent_generation = tmpl->parent_generation;
6087         INIT_LIST_HEAD(&rec->backrefs);
6088         INIT_LIST_HEAD(&rec->dups);
6089         INIT_LIST_HEAD(&rec->list);
6090         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6091         rec->cache.start = tmpl->start;
6092         rec->cache.size = tmpl->nr;
6093         ret = insert_cache_extent(extent_cache, &rec->cache);
6094         if (ret) {
6095                 free(rec);
6096                 return ret;
6097         }
6098         bytes_used += rec->nr;
6099
6100         if (tmpl->metadata)
6101                 rec->crossing_stripes = check_crossing_stripes(global_info,
6102                                 rec->start, global_info->tree_root->nodesize);
6103         check_extent_type(rec);
6104         return ret;
6105 }
6106
6107 /*
6108  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6109  * some are hints:
6110  * - refs              - if found, increase refs
6111  * - is_root           - if found, set
6112  * - content_checked   - if found, set
6113  * - owner_ref_checked - if found, set
6114  *
6115  * If not found, create a new one, initialize and insert.
6116  */
6117 static int add_extent_rec(struct cache_tree *extent_cache,
6118                 struct extent_record *tmpl)
6119 {
6120         struct extent_record *rec;
6121         struct cache_extent *cache;
6122         int ret = 0;
6123         int dup = 0;
6124
6125         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6126         if (cache) {
6127                 rec = container_of(cache, struct extent_record, cache);
6128                 if (tmpl->refs)
6129                         rec->refs++;
6130                 if (rec->nr == 1)
6131                         rec->nr = max(tmpl->nr, tmpl->max_size);
6132
6133                 /*
6134                  * We need to make sure to reset nr to whatever the extent
6135                  * record says was the real size, this way we can compare it to
6136                  * the backrefs.
6137                  */
6138                 if (tmpl->found_rec) {
6139                         if (tmpl->start != rec->start || rec->found_rec) {
6140                                 struct extent_record *tmp;
6141
6142                                 dup = 1;
6143                                 if (list_empty(&rec->list))
6144                                         list_add_tail(&rec->list,
6145                                                       &duplicate_extents);
6146
6147                                 /*
6148                                  * We have to do this song and dance in case we
6149                                  * find an extent record that falls inside of
6150                                  * our current extent record but does not have
6151                                  * the same objectid.
6152                                  */
6153                                 tmp = malloc(sizeof(*tmp));
6154                                 if (!tmp)
6155                                         return -ENOMEM;
6156                                 tmp->start = tmpl->start;
6157                                 tmp->max_size = tmpl->max_size;
6158                                 tmp->nr = tmpl->nr;
6159                                 tmp->found_rec = 1;
6160                                 tmp->metadata = tmpl->metadata;
6161                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6162                                 INIT_LIST_HEAD(&tmp->list);
6163                                 list_add_tail(&tmp->list, &rec->dups);
6164                                 rec->num_duplicates++;
6165                         } else {
6166                                 rec->nr = tmpl->nr;
6167                                 rec->found_rec = 1;
6168                         }
6169                 }
6170
6171                 if (tmpl->extent_item_refs && !dup) {
6172                         if (rec->extent_item_refs) {
6173                                 fprintf(stderr, "block %llu rec "
6174                                         "extent_item_refs %llu, passed %llu\n",
6175                                         (unsigned long long)tmpl->start,
6176                                         (unsigned long long)
6177                                                         rec->extent_item_refs,
6178                                         (unsigned long long)tmpl->extent_item_refs);
6179                         }
6180                         rec->extent_item_refs = tmpl->extent_item_refs;
6181                 }
6182                 if (tmpl->is_root)
6183                         rec->is_root = 1;
6184                 if (tmpl->content_checked)
6185                         rec->content_checked = 1;
6186                 if (tmpl->owner_ref_checked)
6187                         rec->owner_ref_checked = 1;
6188                 memcpy(&rec->parent_key, &tmpl->parent_key,
6189                                 sizeof(tmpl->parent_key));
6190                 if (tmpl->parent_generation)
6191                         rec->parent_generation = tmpl->parent_generation;
6192                 if (rec->max_size < tmpl->max_size)
6193                         rec->max_size = tmpl->max_size;
6194
6195                 /*
6196                  * A metadata extent can't cross stripe_len boundary, otherwise
6197                  * kernel scrub won't be able to handle it.
6198                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6199                  * it.
6200                  */
6201                 if (tmpl->metadata)
6202                         rec->crossing_stripes = check_crossing_stripes(
6203                                         global_info, rec->start,
6204                                         global_info->tree_root->nodesize);
6205                 check_extent_type(rec);
6206                 maybe_free_extent_rec(extent_cache, rec);
6207                 return ret;
6208         }
6209
6210         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6211
6212         return ret;
6213 }
6214
6215 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6216                             u64 parent, u64 root, int found_ref)
6217 {
6218         struct extent_record *rec;
6219         struct tree_backref *back;
6220         struct cache_extent *cache;
6221         int ret;
6222
6223         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6224         if (!cache) {
6225                 struct extent_record tmpl;
6226
6227                 memset(&tmpl, 0, sizeof(tmpl));
6228                 tmpl.start = bytenr;
6229                 tmpl.nr = 1;
6230                 tmpl.metadata = 1;
6231                 tmpl.max_size = 1;
6232
6233                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6234                 if (ret)
6235                         return ret;
6236
6237                 /* really a bug in cache_extent implement now */
6238                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6239                 if (!cache)
6240                         return -ENOENT;
6241         }
6242
6243         rec = container_of(cache, struct extent_record, cache);
6244         if (rec->start != bytenr) {
6245                 /*
6246                  * Several cause, from unaligned bytenr to over lapping extents
6247                  */
6248                 return -EEXIST;
6249         }
6250
6251         back = find_tree_backref(rec, parent, root);
6252         if (!back) {
6253                 back = alloc_tree_backref(rec, parent, root);
6254                 if (!back)
6255                         return -ENOMEM;
6256         }
6257
6258         if (found_ref) {
6259                 if (back->node.found_ref) {
6260                         fprintf(stderr, "Extent back ref already exists "
6261                                 "for %llu parent %llu root %llu \n",
6262                                 (unsigned long long)bytenr,
6263                                 (unsigned long long)parent,
6264                                 (unsigned long long)root);
6265                 }
6266                 back->node.found_ref = 1;
6267         } else {
6268                 if (back->node.found_extent_tree) {
6269                         fprintf(stderr, "Extent back ref already exists "
6270                                 "for %llu parent %llu root %llu \n",
6271                                 (unsigned long long)bytenr,
6272                                 (unsigned long long)parent,
6273                                 (unsigned long long)root);
6274                 }
6275                 back->node.found_extent_tree = 1;
6276         }
6277         check_extent_type(rec);
6278         maybe_free_extent_rec(extent_cache, rec);
6279         return 0;
6280 }
6281
6282 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6283                             u64 parent, u64 root, u64 owner, u64 offset,
6284                             u32 num_refs, int found_ref, u64 max_size)
6285 {
6286         struct extent_record *rec;
6287         struct data_backref *back;
6288         struct cache_extent *cache;
6289         int ret;
6290
6291         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6292         if (!cache) {
6293                 struct extent_record tmpl;
6294
6295                 memset(&tmpl, 0, sizeof(tmpl));
6296                 tmpl.start = bytenr;
6297                 tmpl.nr = 1;
6298                 tmpl.max_size = max_size;
6299
6300                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6301                 if (ret)
6302                         return ret;
6303
6304                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6305                 if (!cache)
6306                         abort();
6307         }
6308
6309         rec = container_of(cache, struct extent_record, cache);
6310         if (rec->max_size < max_size)
6311                 rec->max_size = max_size;
6312
6313         /*
6314          * If found_ref is set then max_size is the real size and must match the
6315          * existing refs.  So if we have already found a ref then we need to
6316          * make sure that this ref matches the existing one, otherwise we need
6317          * to add a new backref so we can notice that the backrefs don't match
6318          * and we need to figure out who is telling the truth.  This is to
6319          * account for that awful fsync bug I introduced where we'd end up with
6320          * a btrfs_file_extent_item that would have its length include multiple
6321          * prealloc extents or point inside of a prealloc extent.
6322          */
6323         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6324                                  bytenr, max_size);
6325         if (!back) {
6326                 back = alloc_data_backref(rec, parent, root, owner, offset,
6327                                           max_size);
6328                 BUG_ON(!back);
6329         }
6330
6331         if (found_ref) {
6332                 BUG_ON(num_refs != 1);
6333                 if (back->node.found_ref)
6334                         BUG_ON(back->bytes != max_size);
6335                 back->node.found_ref = 1;
6336                 back->found_ref += 1;
6337                 back->bytes = max_size;
6338                 back->disk_bytenr = bytenr;
6339                 rec->refs += 1;
6340                 rec->content_checked = 1;
6341                 rec->owner_ref_checked = 1;
6342         } else {
6343                 if (back->node.found_extent_tree) {
6344                         fprintf(stderr, "Extent back ref already exists "
6345                                 "for %llu parent %llu root %llu "
6346                                 "owner %llu offset %llu num_refs %lu\n",
6347                                 (unsigned long long)bytenr,
6348                                 (unsigned long long)parent,
6349                                 (unsigned long long)root,
6350                                 (unsigned long long)owner,
6351                                 (unsigned long long)offset,
6352                                 (unsigned long)num_refs);
6353                 }
6354                 back->num_refs = num_refs;
6355                 back->node.found_extent_tree = 1;
6356         }
6357         maybe_free_extent_rec(extent_cache, rec);
6358         return 0;
6359 }
6360
6361 static int add_pending(struct cache_tree *pending,
6362                        struct cache_tree *seen, u64 bytenr, u32 size)
6363 {
6364         int ret;
6365         ret = add_cache_extent(seen, bytenr, size);
6366         if (ret)
6367                 return ret;
6368         add_cache_extent(pending, bytenr, size);
6369         return 0;
6370 }
6371
6372 static int pick_next_pending(struct cache_tree *pending,
6373                         struct cache_tree *reada,
6374                         struct cache_tree *nodes,
6375                         u64 last, struct block_info *bits, int bits_nr,
6376                         int *reada_bits)
6377 {
6378         unsigned long node_start = last;
6379         struct cache_extent *cache;
6380         int ret;
6381
6382         cache = search_cache_extent(reada, 0);
6383         if (cache) {
6384                 bits[0].start = cache->start;
6385                 bits[0].size = cache->size;
6386                 *reada_bits = 1;
6387                 return 1;
6388         }
6389         *reada_bits = 0;
6390         if (node_start > 32768)
6391                 node_start -= 32768;
6392
6393         cache = search_cache_extent(nodes, node_start);
6394         if (!cache)
6395                 cache = search_cache_extent(nodes, 0);
6396
6397         if (!cache) {
6398                  cache = search_cache_extent(pending, 0);
6399                  if (!cache)
6400                          return 0;
6401                  ret = 0;
6402                  do {
6403                          bits[ret].start = cache->start;
6404                          bits[ret].size = cache->size;
6405                          cache = next_cache_extent(cache);
6406                          ret++;
6407                  } while (cache && ret < bits_nr);
6408                  return ret;
6409         }
6410
6411         ret = 0;
6412         do {
6413                 bits[ret].start = cache->start;
6414                 bits[ret].size = cache->size;
6415                 cache = next_cache_extent(cache);
6416                 ret++;
6417         } while (cache && ret < bits_nr);
6418
6419         if (bits_nr - ret > 8) {
6420                 u64 lookup = bits[0].start + bits[0].size;
6421                 struct cache_extent *next;
6422                 next = search_cache_extent(pending, lookup);
6423                 while(next) {
6424                         if (next->start - lookup > 32768)
6425                                 break;
6426                         bits[ret].start = next->start;
6427                         bits[ret].size = next->size;
6428                         lookup = next->start + next->size;
6429                         ret++;
6430                         if (ret == bits_nr)
6431                                 break;
6432                         next = next_cache_extent(next);
6433                         if (!next)
6434                                 break;
6435                 }
6436         }
6437         return ret;
6438 }
6439
6440 static void free_chunk_record(struct cache_extent *cache)
6441 {
6442         struct chunk_record *rec;
6443
6444         rec = container_of(cache, struct chunk_record, cache);
6445         list_del_init(&rec->list);
6446         list_del_init(&rec->dextents);
6447         free(rec);
6448 }
6449
6450 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6451 {
6452         cache_tree_free_extents(chunk_cache, free_chunk_record);
6453 }
6454
6455 static void free_device_record(struct rb_node *node)
6456 {
6457         struct device_record *rec;
6458
6459         rec = container_of(node, struct device_record, node);
6460         free(rec);
6461 }
6462
6463 FREE_RB_BASED_TREE(device_cache, free_device_record);
6464
6465 int insert_block_group_record(struct block_group_tree *tree,
6466                               struct block_group_record *bg_rec)
6467 {
6468         int ret;
6469
6470         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6471         if (ret)
6472                 return ret;
6473
6474         list_add_tail(&bg_rec->list, &tree->block_groups);
6475         return 0;
6476 }
6477
6478 static void free_block_group_record(struct cache_extent *cache)
6479 {
6480         struct block_group_record *rec;
6481
6482         rec = container_of(cache, struct block_group_record, cache);
6483         list_del_init(&rec->list);
6484         free(rec);
6485 }
6486
6487 void free_block_group_tree(struct block_group_tree *tree)
6488 {
6489         cache_tree_free_extents(&tree->tree, free_block_group_record);
6490 }
6491
6492 int insert_device_extent_record(struct device_extent_tree *tree,
6493                                 struct device_extent_record *de_rec)
6494 {
6495         int ret;
6496
6497         /*
6498          * Device extent is a bit different from the other extents, because
6499          * the extents which belong to the different devices may have the
6500          * same start and size, so we need use the special extent cache
6501          * search/insert functions.
6502          */
6503         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6504         if (ret)
6505                 return ret;
6506
6507         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6508         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6509         return 0;
6510 }
6511
6512 static void free_device_extent_record(struct cache_extent *cache)
6513 {
6514         struct device_extent_record *rec;
6515
6516         rec = container_of(cache, struct device_extent_record, cache);
6517         if (!list_empty(&rec->chunk_list))
6518                 list_del_init(&rec->chunk_list);
6519         if (!list_empty(&rec->device_list))
6520                 list_del_init(&rec->device_list);
6521         free(rec);
6522 }
6523
6524 void free_device_extent_tree(struct device_extent_tree *tree)
6525 {
6526         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6527 }
6528
6529 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6530 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6531                                  struct extent_buffer *leaf, int slot)
6532 {
6533         struct btrfs_extent_ref_v0 *ref0;
6534         struct btrfs_key key;
6535         int ret;
6536
6537         btrfs_item_key_to_cpu(leaf, &key, slot);
6538         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6539         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6540                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6541                                 0, 0);
6542         } else {
6543                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6544                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6545         }
6546         return ret;
6547 }
6548 #endif
6549
6550 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6551                                             struct btrfs_key *key,
6552                                             int slot)
6553 {
6554         struct btrfs_chunk *ptr;
6555         struct chunk_record *rec;
6556         int num_stripes, i;
6557
6558         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6559         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6560
6561         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6562         if (!rec) {
6563                 fprintf(stderr, "memory allocation failed\n");
6564                 exit(-1);
6565         }
6566
6567         INIT_LIST_HEAD(&rec->list);
6568         INIT_LIST_HEAD(&rec->dextents);
6569         rec->bg_rec = NULL;
6570
6571         rec->cache.start = key->offset;
6572         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6573
6574         rec->generation = btrfs_header_generation(leaf);
6575
6576         rec->objectid = key->objectid;
6577         rec->type = key->type;
6578         rec->offset = key->offset;
6579
6580         rec->length = rec->cache.size;
6581         rec->owner = btrfs_chunk_owner(leaf, ptr);
6582         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6583         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6584         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6585         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6586         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6587         rec->num_stripes = num_stripes;
6588         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6589
6590         for (i = 0; i < rec->num_stripes; ++i) {
6591                 rec->stripes[i].devid =
6592                         btrfs_stripe_devid_nr(leaf, ptr, i);
6593                 rec->stripes[i].offset =
6594                         btrfs_stripe_offset_nr(leaf, ptr, i);
6595                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6596                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6597                                 BTRFS_UUID_SIZE);
6598         }
6599
6600         return rec;
6601 }
6602
6603 static int process_chunk_item(struct cache_tree *chunk_cache,
6604                               struct btrfs_key *key, struct extent_buffer *eb,
6605                               int slot)
6606 {
6607         struct chunk_record *rec;
6608         struct btrfs_chunk *chunk;
6609         int ret = 0;
6610
6611         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6612         /*
6613          * Do extra check for this chunk item,
6614          *
6615          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6616          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6617          * and owner<->key_type check.
6618          */
6619         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6620                                       key->offset);
6621         if (ret < 0) {
6622                 error("chunk(%llu, %llu) is not valid, ignore it",
6623                       key->offset, btrfs_chunk_length(eb, chunk));
6624                 return 0;
6625         }
6626         rec = btrfs_new_chunk_record(eb, key, slot);
6627         ret = insert_cache_extent(chunk_cache, &rec->cache);
6628         if (ret) {
6629                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6630                         rec->offset, rec->length);
6631                 free(rec);
6632         }
6633
6634         return ret;
6635 }
6636
6637 static int process_device_item(struct rb_root *dev_cache,
6638                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6639 {
6640         struct btrfs_dev_item *ptr;
6641         struct device_record *rec;
6642         int ret = 0;
6643
6644         ptr = btrfs_item_ptr(eb,
6645                 slot, struct btrfs_dev_item);
6646
6647         rec = malloc(sizeof(*rec));
6648         if (!rec) {
6649                 fprintf(stderr, "memory allocation failed\n");
6650                 return -ENOMEM;
6651         }
6652
6653         rec->devid = key->offset;
6654         rec->generation = btrfs_header_generation(eb);
6655
6656         rec->objectid = key->objectid;
6657         rec->type = key->type;
6658         rec->offset = key->offset;
6659
6660         rec->devid = btrfs_device_id(eb, ptr);
6661         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6662         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6663
6664         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6665         if (ret) {
6666                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6667                 free(rec);
6668         }
6669
6670         return ret;
6671 }
6672
6673 struct block_group_record *
6674 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6675                              int slot)
6676 {
6677         struct btrfs_block_group_item *ptr;
6678         struct block_group_record *rec;
6679
6680         rec = calloc(1, sizeof(*rec));
6681         if (!rec) {
6682                 fprintf(stderr, "memory allocation failed\n");
6683                 exit(-1);
6684         }
6685
6686         rec->cache.start = key->objectid;
6687         rec->cache.size = key->offset;
6688
6689         rec->generation = btrfs_header_generation(leaf);
6690
6691         rec->objectid = key->objectid;
6692         rec->type = key->type;
6693         rec->offset = key->offset;
6694
6695         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6696         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6697
6698         INIT_LIST_HEAD(&rec->list);
6699
6700         return rec;
6701 }
6702
6703 static int process_block_group_item(struct block_group_tree *block_group_cache,
6704                                     struct btrfs_key *key,
6705                                     struct extent_buffer *eb, int slot)
6706 {
6707         struct block_group_record *rec;
6708         int ret = 0;
6709
6710         rec = btrfs_new_block_group_record(eb, key, slot);
6711         ret = insert_block_group_record(block_group_cache, rec);
6712         if (ret) {
6713                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6714                         rec->objectid, rec->offset);
6715                 free(rec);
6716         }
6717
6718         return ret;
6719 }
6720
6721 struct device_extent_record *
6722 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6723                                struct btrfs_key *key, int slot)
6724 {
6725         struct device_extent_record *rec;
6726         struct btrfs_dev_extent *ptr;
6727
6728         rec = calloc(1, sizeof(*rec));
6729         if (!rec) {
6730                 fprintf(stderr, "memory allocation failed\n");
6731                 exit(-1);
6732         }
6733
6734         rec->cache.objectid = key->objectid;
6735         rec->cache.start = key->offset;
6736
6737         rec->generation = btrfs_header_generation(leaf);
6738
6739         rec->objectid = key->objectid;
6740         rec->type = key->type;
6741         rec->offset = key->offset;
6742
6743         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6744         rec->chunk_objecteid =
6745                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6746         rec->chunk_offset =
6747                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6748         rec->length = btrfs_dev_extent_length(leaf, ptr);
6749         rec->cache.size = rec->length;
6750
6751         INIT_LIST_HEAD(&rec->chunk_list);
6752         INIT_LIST_HEAD(&rec->device_list);
6753
6754         return rec;
6755 }
6756
6757 static int
6758 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6759                            struct btrfs_key *key, struct extent_buffer *eb,
6760                            int slot)
6761 {
6762         struct device_extent_record *rec;
6763         int ret;
6764
6765         rec = btrfs_new_device_extent_record(eb, key, slot);
6766         ret = insert_device_extent_record(dev_extent_cache, rec);
6767         if (ret) {
6768                 fprintf(stderr,
6769                         "Device extent[%llu, %llu, %llu] existed.\n",
6770                         rec->objectid, rec->offset, rec->length);
6771                 free(rec);
6772         }
6773
6774         return ret;
6775 }
6776
6777 static int process_extent_item(struct btrfs_root *root,
6778                                struct cache_tree *extent_cache,
6779                                struct extent_buffer *eb, int slot)
6780 {
6781         struct btrfs_extent_item *ei;
6782         struct btrfs_extent_inline_ref *iref;
6783         struct btrfs_extent_data_ref *dref;
6784         struct btrfs_shared_data_ref *sref;
6785         struct btrfs_key key;
6786         struct extent_record tmpl;
6787         unsigned long end;
6788         unsigned long ptr;
6789         int ret;
6790         int type;
6791         u32 item_size = btrfs_item_size_nr(eb, slot);
6792         u64 refs = 0;
6793         u64 offset;
6794         u64 num_bytes;
6795         int metadata = 0;
6796
6797         btrfs_item_key_to_cpu(eb, &key, slot);
6798
6799         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6800                 metadata = 1;
6801                 num_bytes = root->nodesize;
6802         } else {
6803                 num_bytes = key.offset;
6804         }
6805
6806         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6807                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6808                       key.objectid, root->sectorsize);
6809                 return -EIO;
6810         }
6811         if (item_size < sizeof(*ei)) {
6812 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6813                 struct btrfs_extent_item_v0 *ei0;
6814                 BUG_ON(item_size != sizeof(*ei0));
6815                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6816                 refs = btrfs_extent_refs_v0(eb, ei0);
6817 #else
6818                 BUG();
6819 #endif
6820                 memset(&tmpl, 0, sizeof(tmpl));
6821                 tmpl.start = key.objectid;
6822                 tmpl.nr = num_bytes;
6823                 tmpl.extent_item_refs = refs;
6824                 tmpl.metadata = metadata;
6825                 tmpl.found_rec = 1;
6826                 tmpl.max_size = num_bytes;
6827
6828                 return add_extent_rec(extent_cache, &tmpl);
6829         }
6830
6831         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6832         refs = btrfs_extent_refs(eb, ei);
6833         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6834                 metadata = 1;
6835         else
6836                 metadata = 0;
6837         if (metadata && num_bytes != root->nodesize) {
6838                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6839                       num_bytes, root->nodesize);
6840                 return -EIO;
6841         }
6842         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6843                 error("ignore invalid data extent, length %llu is not aligned to %u",
6844                       num_bytes, root->sectorsize);
6845                 return -EIO;
6846         }
6847
6848         memset(&tmpl, 0, sizeof(tmpl));
6849         tmpl.start = key.objectid;
6850         tmpl.nr = num_bytes;
6851         tmpl.extent_item_refs = refs;
6852         tmpl.metadata = metadata;
6853         tmpl.found_rec = 1;
6854         tmpl.max_size = num_bytes;
6855         add_extent_rec(extent_cache, &tmpl);
6856
6857         ptr = (unsigned long)(ei + 1);
6858         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6859             key.type == BTRFS_EXTENT_ITEM_KEY)
6860                 ptr += sizeof(struct btrfs_tree_block_info);
6861
6862         end = (unsigned long)ei + item_size;
6863         while (ptr < end) {
6864                 iref = (struct btrfs_extent_inline_ref *)ptr;
6865                 type = btrfs_extent_inline_ref_type(eb, iref);
6866                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6867                 switch (type) {
6868                 case BTRFS_TREE_BLOCK_REF_KEY:
6869                         ret = add_tree_backref(extent_cache, key.objectid,
6870                                         0, offset, 0);
6871                         if (ret < 0)
6872                                 error(
6873                         "add_tree_backref failed (extent items tree block): %s",
6874                                       strerror(-ret));
6875                         break;
6876                 case BTRFS_SHARED_BLOCK_REF_KEY:
6877                         ret = add_tree_backref(extent_cache, key.objectid,
6878                                         offset, 0, 0);
6879                         if (ret < 0)
6880                                 error(
6881                         "add_tree_backref failed (extent items shared block): %s",
6882                                       strerror(-ret));
6883                         break;
6884                 case BTRFS_EXTENT_DATA_REF_KEY:
6885                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6886                         add_data_backref(extent_cache, key.objectid, 0,
6887                                         btrfs_extent_data_ref_root(eb, dref),
6888                                         btrfs_extent_data_ref_objectid(eb,
6889                                                                        dref),
6890                                         btrfs_extent_data_ref_offset(eb, dref),
6891                                         btrfs_extent_data_ref_count(eb, dref),
6892                                         0, num_bytes);
6893                         break;
6894                 case BTRFS_SHARED_DATA_REF_KEY:
6895                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6896                         add_data_backref(extent_cache, key.objectid, offset,
6897                                         0, 0, 0,
6898                                         btrfs_shared_data_ref_count(eb, sref),
6899                                         0, num_bytes);
6900                         break;
6901                 default:
6902                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6903                                 key.objectid, key.type, num_bytes);
6904                         goto out;
6905                 }
6906                 ptr += btrfs_extent_inline_ref_size(type);
6907         }
6908         WARN_ON(ptr > end);
6909 out:
6910         return 0;
6911 }
6912
6913 static int check_cache_range(struct btrfs_root *root,
6914                              struct btrfs_block_group_cache *cache,
6915                              u64 offset, u64 bytes)
6916 {
6917         struct btrfs_free_space *entry;
6918         u64 *logical;
6919         u64 bytenr;
6920         int stripe_len;
6921         int i, nr, ret;
6922
6923         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6924                 bytenr = btrfs_sb_offset(i);
6925                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6926                                        cache->key.objectid, bytenr, 0,
6927                                        &logical, &nr, &stripe_len);
6928                 if (ret)
6929                         return ret;
6930
6931                 while (nr--) {
6932                         if (logical[nr] + stripe_len <= offset)
6933                                 continue;
6934                         if (offset + bytes <= logical[nr])
6935                                 continue;
6936                         if (logical[nr] == offset) {
6937                                 if (stripe_len >= bytes) {
6938                                         free(logical);
6939                                         return 0;
6940                                 }
6941                                 bytes -= stripe_len;
6942                                 offset += stripe_len;
6943                         } else if (logical[nr] < offset) {
6944                                 if (logical[nr] + stripe_len >=
6945                                     offset + bytes) {
6946                                         free(logical);
6947                                         return 0;
6948                                 }
6949                                 bytes = (offset + bytes) -
6950                                         (logical[nr] + stripe_len);
6951                                 offset = logical[nr] + stripe_len;
6952                         } else {
6953                                 /*
6954                                  * Could be tricky, the super may land in the
6955                                  * middle of the area we're checking.  First
6956                                  * check the easiest case, it's at the end.
6957                                  */
6958                                 if (logical[nr] + stripe_len >=
6959                                     bytes + offset) {
6960                                         bytes = logical[nr] - offset;
6961                                         continue;
6962                                 }
6963
6964                                 /* Check the left side */
6965                                 ret = check_cache_range(root, cache,
6966                                                         offset,
6967                                                         logical[nr] - offset);
6968                                 if (ret) {
6969                                         free(logical);
6970                                         return ret;
6971                                 }
6972
6973                                 /* Now we continue with the right side */
6974                                 bytes = (offset + bytes) -
6975                                         (logical[nr] + stripe_len);
6976                                 offset = logical[nr] + stripe_len;
6977                         }
6978                 }
6979
6980                 free(logical);
6981         }
6982
6983         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6984         if (!entry) {
6985                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6986                         offset, offset+bytes);
6987                 return -EINVAL;
6988         }
6989
6990         if (entry->offset != offset) {
6991                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6992                         entry->offset);
6993                 return -EINVAL;
6994         }
6995
6996         if (entry->bytes != bytes) {
6997                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6998                         bytes, entry->bytes, offset);
6999                 return -EINVAL;
7000         }
7001
7002         unlink_free_space(cache->free_space_ctl, entry);
7003         free(entry);
7004         return 0;
7005 }
7006
7007 static int verify_space_cache(struct btrfs_root *root,
7008                               struct btrfs_block_group_cache *cache)
7009 {
7010         struct btrfs_path path;
7011         struct extent_buffer *leaf;
7012         struct btrfs_key key;
7013         u64 last;
7014         int ret = 0;
7015
7016         root = root->fs_info->extent_root;
7017
7018         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7019
7020         btrfs_init_path(&path);
7021         key.objectid = last;
7022         key.offset = 0;
7023         key.type = BTRFS_EXTENT_ITEM_KEY;
7024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7025         if (ret < 0)
7026                 goto out;
7027         ret = 0;
7028         while (1) {
7029                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7030                         ret = btrfs_next_leaf(root, &path);
7031                         if (ret < 0)
7032                                 goto out;
7033                         if (ret > 0) {
7034                                 ret = 0;
7035                                 break;
7036                         }
7037                 }
7038                 leaf = path.nodes[0];
7039                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7040                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7041                         break;
7042                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7043                     key.type != BTRFS_METADATA_ITEM_KEY) {
7044                         path.slots[0]++;
7045                         continue;
7046                 }
7047
7048                 if (last == key.objectid) {
7049                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7050                                 last = key.objectid + key.offset;
7051                         else
7052                                 last = key.objectid + root->nodesize;
7053                         path.slots[0]++;
7054                         continue;
7055                 }
7056
7057                 ret = check_cache_range(root, cache, last,
7058                                         key.objectid - last);
7059                 if (ret)
7060                         break;
7061                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7062                         last = key.objectid + key.offset;
7063                 else
7064                         last = key.objectid + root->nodesize;
7065                 path.slots[0]++;
7066         }
7067
7068         if (last < cache->key.objectid + cache->key.offset)
7069                 ret = check_cache_range(root, cache, last,
7070                                         cache->key.objectid +
7071                                         cache->key.offset - last);
7072
7073 out:
7074         btrfs_release_path(&path);
7075
7076         if (!ret &&
7077             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7078                 fprintf(stderr, "There are still entries left in the space "
7079                         "cache\n");
7080                 ret = -EINVAL;
7081         }
7082
7083         return ret;
7084 }
7085
7086 static int check_space_cache(struct btrfs_root *root)
7087 {
7088         struct btrfs_block_group_cache *cache;
7089         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7090         int ret;
7091         int error = 0;
7092
7093         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7094             btrfs_super_generation(root->fs_info->super_copy) !=
7095             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7096                 printf("cache and super generation don't match, space cache "
7097                        "will be invalidated\n");
7098                 return 0;
7099         }
7100
7101         if (ctx.progress_enabled) {
7102                 ctx.tp = TASK_FREE_SPACE;
7103                 task_start(ctx.info);
7104         }
7105
7106         while (1) {
7107                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7108                 if (!cache)
7109                         break;
7110
7111                 start = cache->key.objectid + cache->key.offset;
7112                 if (!cache->free_space_ctl) {
7113                         if (btrfs_init_free_space_ctl(cache,
7114                                                       root->sectorsize)) {
7115                                 ret = -ENOMEM;
7116                                 break;
7117                         }
7118                 } else {
7119                         btrfs_remove_free_space_cache(cache);
7120                 }
7121
7122                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7123                         ret = exclude_super_stripes(root, cache);
7124                         if (ret) {
7125                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7126                                         strerror(-ret));
7127                                 error++;
7128                                 continue;
7129                         }
7130                         ret = load_free_space_tree(root->fs_info, cache);
7131                         free_excluded_extents(root, cache);
7132                         if (ret < 0) {
7133                                 fprintf(stderr, "could not load free space tree: %s\n",
7134                                         strerror(-ret));
7135                                 error++;
7136                                 continue;
7137                         }
7138                         error += ret;
7139                 } else {
7140                         ret = load_free_space_cache(root->fs_info, cache);
7141                         if (!ret)
7142                                 continue;
7143                 }
7144
7145                 ret = verify_space_cache(root, cache);
7146                 if (ret) {
7147                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7148                                 cache->key.objectid);
7149                         error++;
7150                 }
7151         }
7152
7153         task_stop(ctx.info);
7154
7155         return error ? -EINVAL : 0;
7156 }
7157
7158 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7159                         u64 num_bytes, unsigned long leaf_offset,
7160                         struct extent_buffer *eb) {
7161
7162         u64 offset = 0;
7163         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7164         char *data;
7165         unsigned long csum_offset;
7166         u32 csum;
7167         u32 csum_expected;
7168         u64 read_len;
7169         u64 data_checked = 0;
7170         u64 tmp;
7171         int ret = 0;
7172         int mirror;
7173         int num_copies;
7174
7175         if (num_bytes % root->sectorsize)
7176                 return -EINVAL;
7177
7178         data = malloc(num_bytes);
7179         if (!data)
7180                 return -ENOMEM;
7181
7182         while (offset < num_bytes) {
7183                 mirror = 0;
7184 again:
7185                 read_len = num_bytes - offset;
7186                 /* read as much space once a time */
7187                 ret = read_extent_data(root, data + offset,
7188                                 bytenr + offset, &read_len, mirror);
7189                 if (ret)
7190                         goto out;
7191                 data_checked = 0;
7192                 /* verify every 4k data's checksum */
7193                 while (data_checked < read_len) {
7194                         csum = ~(u32)0;
7195                         tmp = offset + data_checked;
7196
7197                         csum = btrfs_csum_data((char *)data + tmp,
7198                                                csum, root->sectorsize);
7199                         btrfs_csum_final(csum, (u8 *)&csum);
7200
7201                         csum_offset = leaf_offset +
7202                                  tmp / root->sectorsize * csum_size;
7203                         read_extent_buffer(eb, (char *)&csum_expected,
7204                                            csum_offset, csum_size);
7205                         /* try another mirror */
7206                         if (csum != csum_expected) {
7207                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7208                                                 mirror, bytenr + tmp,
7209                                                 csum, csum_expected);
7210                                 num_copies = btrfs_num_copies(
7211                                                 &root->fs_info->mapping_tree,
7212                                                 bytenr, num_bytes);
7213                                 if (mirror < num_copies - 1) {
7214                                         mirror += 1;
7215                                         goto again;
7216                                 }
7217                         }
7218                         data_checked += root->sectorsize;
7219                 }
7220                 offset += read_len;
7221         }
7222 out:
7223         free(data);
7224         return ret;
7225 }
7226
7227 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7228                                u64 num_bytes)
7229 {
7230         struct btrfs_path path;
7231         struct extent_buffer *leaf;
7232         struct btrfs_key key;
7233         int ret;
7234
7235         btrfs_init_path(&path);
7236         key.objectid = bytenr;
7237         key.type = BTRFS_EXTENT_ITEM_KEY;
7238         key.offset = (u64)-1;
7239
7240 again:
7241         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7242                                 0, 0);
7243         if (ret < 0) {
7244                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7245                 btrfs_release_path(&path);
7246                 return ret;
7247         } else if (ret) {
7248                 if (path.slots[0] > 0) {
7249                         path.slots[0]--;
7250                 } else {
7251                         ret = btrfs_prev_leaf(root, &path);
7252                         if (ret < 0) {
7253                                 goto out;
7254                         } else if (ret > 0) {
7255                                 ret = 0;
7256                                 goto out;
7257                         }
7258                 }
7259         }
7260
7261         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7262
7263         /*
7264          * Block group items come before extent items if they have the same
7265          * bytenr, so walk back one more just in case.  Dear future traveller,
7266          * first congrats on mastering time travel.  Now if it's not too much
7267          * trouble could you go back to 2006 and tell Chris to make the
7268          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7269          * EXTENT_ITEM_KEY please?
7270          */
7271         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7272                 if (path.slots[0] > 0) {
7273                         path.slots[0]--;
7274                 } else {
7275                         ret = btrfs_prev_leaf(root, &path);
7276                         if (ret < 0) {
7277                                 goto out;
7278                         } else if (ret > 0) {
7279                                 ret = 0;
7280                                 goto out;
7281                         }
7282                 }
7283                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7284         }
7285
7286         while (num_bytes) {
7287                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7288                         ret = btrfs_next_leaf(root, &path);
7289                         if (ret < 0) {
7290                                 fprintf(stderr, "Error going to next leaf "
7291                                         "%d\n", ret);
7292                                 btrfs_release_path(&path);
7293                                 return ret;
7294                         } else if (ret) {
7295                                 break;
7296                         }
7297                 }
7298                 leaf = path.nodes[0];
7299                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7300                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7301                         path.slots[0]++;
7302                         continue;
7303                 }
7304                 if (key.objectid + key.offset < bytenr) {
7305                         path.slots[0]++;
7306                         continue;
7307                 }
7308                 if (key.objectid > bytenr + num_bytes)
7309                         break;
7310
7311                 if (key.objectid == bytenr) {
7312                         if (key.offset >= num_bytes) {
7313                                 num_bytes = 0;
7314                                 break;
7315                         }
7316                         num_bytes -= key.offset;
7317                         bytenr += key.offset;
7318                 } else if (key.objectid < bytenr) {
7319                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7320                                 num_bytes = 0;
7321                                 break;
7322                         }
7323                         num_bytes = (bytenr + num_bytes) -
7324                                 (key.objectid + key.offset);
7325                         bytenr = key.objectid + key.offset;
7326                 } else {
7327                         if (key.objectid + key.offset < bytenr + num_bytes) {
7328                                 u64 new_start = key.objectid + key.offset;
7329                                 u64 new_bytes = bytenr + num_bytes - new_start;
7330
7331                                 /*
7332                                  * Weird case, the extent is in the middle of
7333                                  * our range, we'll have to search one side
7334                                  * and then the other.  Not sure if this happens
7335                                  * in real life, but no harm in coding it up
7336                                  * anyway just in case.
7337                                  */
7338                                 btrfs_release_path(&path);
7339                                 ret = check_extent_exists(root, new_start,
7340                                                           new_bytes);
7341                                 if (ret) {
7342                                         fprintf(stderr, "Right section didn't "
7343                                                 "have a record\n");
7344                                         break;
7345                                 }
7346                                 num_bytes = key.objectid - bytenr;
7347                                 goto again;
7348                         }
7349                         num_bytes = key.objectid - bytenr;
7350                 }
7351                 path.slots[0]++;
7352         }
7353         ret = 0;
7354
7355 out:
7356         if (num_bytes && !ret) {
7357                 fprintf(stderr, "There are no extents for csum range "
7358                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7359                 ret = 1;
7360         }
7361
7362         btrfs_release_path(&path);
7363         return ret;
7364 }
7365
7366 static int check_csums(struct btrfs_root *root)
7367 {
7368         struct btrfs_path path;
7369         struct extent_buffer *leaf;
7370         struct btrfs_key key;
7371         u64 offset = 0, num_bytes = 0;
7372         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7373         int errors = 0;
7374         int ret;
7375         u64 data_len;
7376         unsigned long leaf_offset;
7377
7378         root = root->fs_info->csum_root;
7379         if (!extent_buffer_uptodate(root->node)) {
7380                 fprintf(stderr, "No valid csum tree found\n");
7381                 return -ENOENT;
7382         }
7383
7384         btrfs_init_path(&path);
7385         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7386         key.type = BTRFS_EXTENT_CSUM_KEY;
7387         key.offset = 0;
7388         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7389         if (ret < 0) {
7390                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7391                 btrfs_release_path(&path);
7392                 return ret;
7393         }
7394
7395         if (ret > 0 && path.slots[0])
7396                 path.slots[0]--;
7397         ret = 0;
7398
7399         while (1) {
7400                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7401                         ret = btrfs_next_leaf(root, &path);
7402                         if (ret < 0) {
7403                                 fprintf(stderr, "Error going to next leaf "
7404                                         "%d\n", ret);
7405                                 break;
7406                         }
7407                         if (ret)
7408                                 break;
7409                 }
7410                 leaf = path.nodes[0];
7411
7412                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7413                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7414                         path.slots[0]++;
7415                         continue;
7416                 }
7417
7418                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7419                               csum_size) * root->sectorsize;
7420                 if (!check_data_csum)
7421                         goto skip_csum_check;
7422                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7423                 ret = check_extent_csums(root, key.offset, data_len,
7424                                          leaf_offset, leaf);
7425                 if (ret)
7426                         break;
7427 skip_csum_check:
7428                 if (!num_bytes) {
7429                         offset = key.offset;
7430                 } else if (key.offset != offset + num_bytes) {
7431                         ret = check_extent_exists(root, offset, num_bytes);
7432                         if (ret) {
7433                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7434                                         "there is no extent record\n",
7435                                         offset, offset+num_bytes);
7436                                 errors++;
7437                         }
7438                         offset = key.offset;
7439                         num_bytes = 0;
7440                 }
7441                 num_bytes += data_len;
7442                 path.slots[0]++;
7443         }
7444
7445         btrfs_release_path(&path);
7446         return errors;
7447 }
7448
7449 static int is_dropped_key(struct btrfs_key *key,
7450                           struct btrfs_key *drop_key) {
7451         if (key->objectid < drop_key->objectid)
7452                 return 1;
7453         else if (key->objectid == drop_key->objectid) {
7454                 if (key->type < drop_key->type)
7455                         return 1;
7456                 else if (key->type == drop_key->type) {
7457                         if (key->offset < drop_key->offset)
7458                                 return 1;
7459                 }
7460         }
7461         return 0;
7462 }
7463
7464 /*
7465  * Here are the rules for FULL_BACKREF.
7466  *
7467  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7468  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7469  *      FULL_BACKREF set.
7470  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7471  *    if it happened after the relocation occurred since we'll have dropped the
7472  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7473  *    have no real way to know for sure.
7474  *
7475  * We process the blocks one root at a time, and we start from the lowest root
7476  * objectid and go to the highest.  So we can just lookup the owner backref for
7477  * the record and if we don't find it then we know it doesn't exist and we have
7478  * a FULL BACKREF.
7479  *
7480  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7481  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7482  * be set or not and then we can check later once we've gathered all the refs.
7483  */
7484 static int calc_extent_flag(struct cache_tree *extent_cache,
7485                            struct extent_buffer *buf,
7486                            struct root_item_record *ri,
7487                            u64 *flags)
7488 {
7489         struct extent_record *rec;
7490         struct cache_extent *cache;
7491         struct tree_backref *tback;
7492         u64 owner = 0;
7493
7494         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7495         /* we have added this extent before */
7496         if (!cache)
7497                 return -ENOENT;
7498
7499         rec = container_of(cache, struct extent_record, cache);
7500
7501         /*
7502          * Except file/reloc tree, we can not have
7503          * FULL BACKREF MODE
7504          */
7505         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7506                 goto normal;
7507         /*
7508          * root node
7509          */
7510         if (buf->start == ri->bytenr)
7511                 goto normal;
7512
7513         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7514                 goto full_backref;
7515
7516         owner = btrfs_header_owner(buf);
7517         if (owner == ri->objectid)
7518                 goto normal;
7519
7520         tback = find_tree_backref(rec, 0, owner);
7521         if (!tback)
7522                 goto full_backref;
7523 normal:
7524         *flags = 0;
7525         if (rec->flag_block_full_backref != FLAG_UNSET &&
7526             rec->flag_block_full_backref != 0)
7527                 rec->bad_full_backref = 1;
7528         return 0;
7529 full_backref:
7530         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7531         if (rec->flag_block_full_backref != FLAG_UNSET &&
7532             rec->flag_block_full_backref != 1)
7533                 rec->bad_full_backref = 1;
7534         return 0;
7535 }
7536
7537 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7538 {
7539         fprintf(stderr, "Invalid key type(");
7540         print_key_type(stderr, 0, key_type);
7541         fprintf(stderr, ") found in root(");
7542         print_objectid(stderr, rootid, 0);
7543         fprintf(stderr, ")\n");
7544 }
7545
7546 /*
7547  * Check if the key is valid with its extent buffer.
7548  *
7549  * This is a early check in case invalid key exists in a extent buffer
7550  * This is not comprehensive yet, but should prevent wrong key/item passed
7551  * further
7552  */
7553 static int check_type_with_root(u64 rootid, u8 key_type)
7554 {
7555         switch (key_type) {
7556         /* Only valid in chunk tree */
7557         case BTRFS_DEV_ITEM_KEY:
7558         case BTRFS_CHUNK_ITEM_KEY:
7559                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7560                         goto err;
7561                 break;
7562         /* valid in csum and log tree */
7563         case BTRFS_CSUM_TREE_OBJECTID:
7564                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7565                       is_fstree(rootid)))
7566                         goto err;
7567                 break;
7568         case BTRFS_EXTENT_ITEM_KEY:
7569         case BTRFS_METADATA_ITEM_KEY:
7570         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7571                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7572                         goto err;
7573                 break;
7574         case BTRFS_ROOT_ITEM_KEY:
7575                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7576                         goto err;
7577                 break;
7578         case BTRFS_DEV_EXTENT_KEY:
7579                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7580                         goto err;
7581                 break;
7582         }
7583         return 0;
7584 err:
7585         report_mismatch_key_root(key_type, rootid);
7586         return -EINVAL;
7587 }
7588
7589 static int run_next_block(struct btrfs_root *root,
7590                           struct block_info *bits,
7591                           int bits_nr,
7592                           u64 *last,
7593                           struct cache_tree *pending,
7594                           struct cache_tree *seen,
7595                           struct cache_tree *reada,
7596                           struct cache_tree *nodes,
7597                           struct cache_tree *extent_cache,
7598                           struct cache_tree *chunk_cache,
7599                           struct rb_root *dev_cache,
7600                           struct block_group_tree *block_group_cache,
7601                           struct device_extent_tree *dev_extent_cache,
7602                           struct root_item_record *ri)
7603 {
7604         struct extent_buffer *buf;
7605         struct extent_record *rec = NULL;
7606         u64 bytenr;
7607         u32 size;
7608         u64 parent;
7609         u64 owner;
7610         u64 flags;
7611         u64 ptr;
7612         u64 gen = 0;
7613         int ret = 0;
7614         int i;
7615         int nritems;
7616         struct btrfs_key key;
7617         struct cache_extent *cache;
7618         int reada_bits;
7619
7620         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7621                                     bits_nr, &reada_bits);
7622         if (nritems == 0)
7623                 return 1;
7624
7625         if (!reada_bits) {
7626                 for(i = 0; i < nritems; i++) {
7627                         ret = add_cache_extent(reada, bits[i].start,
7628                                                bits[i].size);
7629                         if (ret == -EEXIST)
7630                                 continue;
7631
7632                         /* fixme, get the parent transid */
7633                         readahead_tree_block(root, bits[i].start,
7634                                              bits[i].size, 0);
7635                 }
7636         }
7637         *last = bits[0].start;
7638         bytenr = bits[0].start;
7639         size = bits[0].size;
7640
7641         cache = lookup_cache_extent(pending, bytenr, size);
7642         if (cache) {
7643                 remove_cache_extent(pending, cache);
7644                 free(cache);
7645         }
7646         cache = lookup_cache_extent(reada, bytenr, size);
7647         if (cache) {
7648                 remove_cache_extent(reada, cache);
7649                 free(cache);
7650         }
7651         cache = lookup_cache_extent(nodes, bytenr, size);
7652         if (cache) {
7653                 remove_cache_extent(nodes, cache);
7654                 free(cache);
7655         }
7656         cache = lookup_cache_extent(extent_cache, bytenr, size);
7657         if (cache) {
7658                 rec = container_of(cache, struct extent_record, cache);
7659                 gen = rec->parent_generation;
7660         }
7661
7662         /* fixme, get the real parent transid */
7663         buf = read_tree_block(root, bytenr, size, gen);
7664         if (!extent_buffer_uptodate(buf)) {
7665                 record_bad_block_io(root->fs_info,
7666                                     extent_cache, bytenr, size);
7667                 goto out;
7668         }
7669
7670         nritems = btrfs_header_nritems(buf);
7671
7672         flags = 0;
7673         if (!init_extent_tree) {
7674                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7675                                        btrfs_header_level(buf), 1, NULL,
7676                                        &flags);
7677                 if (ret < 0) {
7678                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7679                         if (ret < 0) {
7680                                 fprintf(stderr, "Couldn't calc extent flags\n");
7681                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7682                         }
7683                 }
7684         } else {
7685                 flags = 0;
7686                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7687                 if (ret < 0) {
7688                         fprintf(stderr, "Couldn't calc extent flags\n");
7689                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7690                 }
7691         }
7692
7693         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7694                 if (ri != NULL &&
7695                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7696                     ri->objectid == btrfs_header_owner(buf)) {
7697                         /*
7698                          * Ok we got to this block from it's original owner and
7699                          * we have FULL_BACKREF set.  Relocation can leave
7700                          * converted blocks over so this is altogether possible,
7701                          * however it's not possible if the generation > the
7702                          * last snapshot, so check for this case.
7703                          */
7704                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7705                             btrfs_header_generation(buf) > ri->last_snapshot) {
7706                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7707                                 rec->bad_full_backref = 1;
7708                         }
7709                 }
7710         } else {
7711                 if (ri != NULL &&
7712                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7713                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7714                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7715                         rec->bad_full_backref = 1;
7716                 }
7717         }
7718
7719         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7720                 rec->flag_block_full_backref = 1;
7721                 parent = bytenr;
7722                 owner = 0;
7723         } else {
7724                 rec->flag_block_full_backref = 0;
7725                 parent = 0;
7726                 owner = btrfs_header_owner(buf);
7727         }
7728
7729         ret = check_block(root, extent_cache, buf, flags);
7730         if (ret)
7731                 goto out;
7732
7733         if (btrfs_is_leaf(buf)) {
7734                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7735                 for (i = 0; i < nritems; i++) {
7736                         struct btrfs_file_extent_item *fi;
7737                         btrfs_item_key_to_cpu(buf, &key, i);
7738                         /*
7739                          * Check key type against the leaf owner.
7740                          * Could filter quite a lot of early error if
7741                          * owner is correct
7742                          */
7743                         if (check_type_with_root(btrfs_header_owner(buf),
7744                                                  key.type)) {
7745                                 fprintf(stderr, "ignoring invalid key\n");
7746                                 continue;
7747                         }
7748                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7749                                 process_extent_item(root, extent_cache, buf,
7750                                                     i);
7751                                 continue;
7752                         }
7753                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7754                                 process_extent_item(root, extent_cache, buf,
7755                                                     i);
7756                                 continue;
7757                         }
7758                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7759                                 total_csum_bytes +=
7760                                         btrfs_item_size_nr(buf, i);
7761                                 continue;
7762                         }
7763                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7764                                 process_chunk_item(chunk_cache, &key, buf, i);
7765                                 continue;
7766                         }
7767                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7768                                 process_device_item(dev_cache, &key, buf, i);
7769                                 continue;
7770                         }
7771                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7772                                 process_block_group_item(block_group_cache,
7773                                         &key, buf, i);
7774                                 continue;
7775                         }
7776                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7777                                 process_device_extent_item(dev_extent_cache,
7778                                         &key, buf, i);
7779                                 continue;
7780
7781                         }
7782                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7783 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7784                                 process_extent_ref_v0(extent_cache, buf, i);
7785 #else
7786                                 BUG();
7787 #endif
7788                                 continue;
7789                         }
7790
7791                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7792                                 ret = add_tree_backref(extent_cache,
7793                                                 key.objectid, 0, key.offset, 0);
7794                                 if (ret < 0)
7795                                         error(
7796                                 "add_tree_backref failed (leaf tree block): %s",
7797                                               strerror(-ret));
7798                                 continue;
7799                         }
7800                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7801                                 ret = add_tree_backref(extent_cache,
7802                                                 key.objectid, key.offset, 0, 0);
7803                                 if (ret < 0)
7804                                         error(
7805                                 "add_tree_backref failed (leaf shared block): %s",
7806                                               strerror(-ret));
7807                                 continue;
7808                         }
7809                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7810                                 struct btrfs_extent_data_ref *ref;
7811                                 ref = btrfs_item_ptr(buf, i,
7812                                                 struct btrfs_extent_data_ref);
7813                                 add_data_backref(extent_cache,
7814                                         key.objectid, 0,
7815                                         btrfs_extent_data_ref_root(buf, ref),
7816                                         btrfs_extent_data_ref_objectid(buf,
7817                                                                        ref),
7818                                         btrfs_extent_data_ref_offset(buf, ref),
7819                                         btrfs_extent_data_ref_count(buf, ref),
7820                                         0, root->sectorsize);
7821                                 continue;
7822                         }
7823                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7824                                 struct btrfs_shared_data_ref *ref;
7825                                 ref = btrfs_item_ptr(buf, i,
7826                                                 struct btrfs_shared_data_ref);
7827                                 add_data_backref(extent_cache,
7828                                         key.objectid, key.offset, 0, 0, 0,
7829                                         btrfs_shared_data_ref_count(buf, ref),
7830                                         0, root->sectorsize);
7831                                 continue;
7832                         }
7833                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7834                                 struct bad_item *bad;
7835
7836                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7837                                         continue;
7838                                 if (!owner)
7839                                         continue;
7840                                 bad = malloc(sizeof(struct bad_item));
7841                                 if (!bad)
7842                                         continue;
7843                                 INIT_LIST_HEAD(&bad->list);
7844                                 memcpy(&bad->key, &key,
7845                                        sizeof(struct btrfs_key));
7846                                 bad->root_id = owner;
7847                                 list_add_tail(&bad->list, &delete_items);
7848                                 continue;
7849                         }
7850                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7851                                 continue;
7852                         fi = btrfs_item_ptr(buf, i,
7853                                             struct btrfs_file_extent_item);
7854                         if (btrfs_file_extent_type(buf, fi) ==
7855                             BTRFS_FILE_EXTENT_INLINE)
7856                                 continue;
7857                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7858                                 continue;
7859
7860                         data_bytes_allocated +=
7861                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7862                         if (data_bytes_allocated < root->sectorsize) {
7863                                 abort();
7864                         }
7865                         data_bytes_referenced +=
7866                                 btrfs_file_extent_num_bytes(buf, fi);
7867                         add_data_backref(extent_cache,
7868                                 btrfs_file_extent_disk_bytenr(buf, fi),
7869                                 parent, owner, key.objectid, key.offset -
7870                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7871                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7872                 }
7873         } else {
7874                 int level;
7875                 struct btrfs_key first_key;
7876
7877                 first_key.objectid = 0;
7878
7879                 if (nritems > 0)
7880                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7881                 level = btrfs_header_level(buf);
7882                 for (i = 0; i < nritems; i++) {
7883                         struct extent_record tmpl;
7884
7885                         ptr = btrfs_node_blockptr(buf, i);
7886                         size = root->nodesize;
7887                         btrfs_node_key_to_cpu(buf, &key, i);
7888                         if (ri != NULL) {
7889                                 if ((level == ri->drop_level)
7890                                     && is_dropped_key(&key, &ri->drop_key)) {
7891                                         continue;
7892                                 }
7893                         }
7894
7895                         memset(&tmpl, 0, sizeof(tmpl));
7896                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7897                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7898                         tmpl.start = ptr;
7899                         tmpl.nr = size;
7900                         tmpl.refs = 1;
7901                         tmpl.metadata = 1;
7902                         tmpl.max_size = size;
7903                         ret = add_extent_rec(extent_cache, &tmpl);
7904                         if (ret < 0)
7905                                 goto out;
7906
7907                         ret = add_tree_backref(extent_cache, ptr, parent,
7908                                         owner, 1);
7909                         if (ret < 0) {
7910                                 error(
7911                                 "add_tree_backref failed (non-leaf block): %s",
7912                                       strerror(-ret));
7913                                 continue;
7914                         }
7915
7916                         if (level > 1) {
7917                                 add_pending(nodes, seen, ptr, size);
7918                         } else {
7919                                 add_pending(pending, seen, ptr, size);
7920                         }
7921                 }
7922                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7923                                       nritems) * sizeof(struct btrfs_key_ptr);
7924         }
7925         total_btree_bytes += buf->len;
7926         if (fs_root_objectid(btrfs_header_owner(buf)))
7927                 total_fs_tree_bytes += buf->len;
7928         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7929                 total_extent_tree_bytes += buf->len;
7930         if (!found_old_backref &&
7931             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7932             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7933             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7934                 found_old_backref = 1;
7935 out:
7936         free_extent_buffer(buf);
7937         return ret;
7938 }
7939
7940 static int add_root_to_pending(struct extent_buffer *buf,
7941                                struct cache_tree *extent_cache,
7942                                struct cache_tree *pending,
7943                                struct cache_tree *seen,
7944                                struct cache_tree *nodes,
7945                                u64 objectid)
7946 {
7947         struct extent_record tmpl;
7948         int ret;
7949
7950         if (btrfs_header_level(buf) > 0)
7951                 add_pending(nodes, seen, buf->start, buf->len);
7952         else
7953                 add_pending(pending, seen, buf->start, buf->len);
7954
7955         memset(&tmpl, 0, sizeof(tmpl));
7956         tmpl.start = buf->start;
7957         tmpl.nr = buf->len;
7958         tmpl.is_root = 1;
7959         tmpl.refs = 1;
7960         tmpl.metadata = 1;
7961         tmpl.max_size = buf->len;
7962         add_extent_rec(extent_cache, &tmpl);
7963
7964         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7965             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7966                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7967                                 0, 1);
7968         else
7969                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7970                                 1);
7971         return ret;
7972 }
7973
7974 /* as we fix the tree, we might be deleting blocks that
7975  * we're tracking for repair.  This hook makes sure we
7976  * remove any backrefs for blocks as we are fixing them.
7977  */
7978 static int free_extent_hook(struct btrfs_trans_handle *trans,
7979                             struct btrfs_root *root,
7980                             u64 bytenr, u64 num_bytes, u64 parent,
7981                             u64 root_objectid, u64 owner, u64 offset,
7982                             int refs_to_drop)
7983 {
7984         struct extent_record *rec;
7985         struct cache_extent *cache;
7986         int is_data;
7987         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7988
7989         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7990         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7991         if (!cache)
7992                 return 0;
7993
7994         rec = container_of(cache, struct extent_record, cache);
7995         if (is_data) {
7996                 struct data_backref *back;
7997                 back = find_data_backref(rec, parent, root_objectid, owner,
7998                                          offset, 1, bytenr, num_bytes);
7999                 if (!back)
8000                         goto out;
8001                 if (back->node.found_ref) {
8002                         back->found_ref -= refs_to_drop;
8003                         if (rec->refs)
8004                                 rec->refs -= refs_to_drop;
8005                 }
8006                 if (back->node.found_extent_tree) {
8007                         back->num_refs -= refs_to_drop;
8008                         if (rec->extent_item_refs)
8009                                 rec->extent_item_refs -= refs_to_drop;
8010                 }
8011                 if (back->found_ref == 0)
8012                         back->node.found_ref = 0;
8013                 if (back->num_refs == 0)
8014                         back->node.found_extent_tree = 0;
8015
8016                 if (!back->node.found_extent_tree && back->node.found_ref) {
8017                         list_del(&back->node.list);
8018                         free(back);
8019                 }
8020         } else {
8021                 struct tree_backref *back;
8022                 back = find_tree_backref(rec, parent, root_objectid);
8023                 if (!back)
8024                         goto out;
8025                 if (back->node.found_ref) {
8026                         if (rec->refs)
8027                                 rec->refs--;
8028                         back->node.found_ref = 0;
8029                 }
8030                 if (back->node.found_extent_tree) {
8031                         if (rec->extent_item_refs)
8032                                 rec->extent_item_refs--;
8033                         back->node.found_extent_tree = 0;
8034                 }
8035                 if (!back->node.found_extent_tree && back->node.found_ref) {
8036                         list_del(&back->node.list);
8037                         free(back);
8038                 }
8039         }
8040         maybe_free_extent_rec(extent_cache, rec);
8041 out:
8042         return 0;
8043 }
8044
8045 static int delete_extent_records(struct btrfs_trans_handle *trans,
8046                                  struct btrfs_root *root,
8047                                  struct btrfs_path *path,
8048                                  u64 bytenr)
8049 {
8050         struct btrfs_key key;
8051         struct btrfs_key found_key;
8052         struct extent_buffer *leaf;
8053         int ret;
8054         int slot;
8055
8056
8057         key.objectid = bytenr;
8058         key.type = (u8)-1;
8059         key.offset = (u64)-1;
8060
8061         while(1) {
8062                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8063                                         &key, path, 0, 1);
8064                 if (ret < 0)
8065                         break;
8066
8067                 if (ret > 0) {
8068                         ret = 0;
8069                         if (path->slots[0] == 0)
8070                                 break;
8071                         path->slots[0]--;
8072                 }
8073                 ret = 0;
8074
8075                 leaf = path->nodes[0];
8076                 slot = path->slots[0];
8077
8078                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8079                 if (found_key.objectid != bytenr)
8080                         break;
8081
8082                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8083                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8084                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8085                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8086                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8087                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8088                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8089                         btrfs_release_path(path);
8090                         if (found_key.type == 0) {
8091                                 if (found_key.offset == 0)
8092                                         break;
8093                                 key.offset = found_key.offset - 1;
8094                                 key.type = found_key.type;
8095                         }
8096                         key.type = found_key.type - 1;
8097                         key.offset = (u64)-1;
8098                         continue;
8099                 }
8100
8101                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8102                         found_key.objectid, found_key.type, found_key.offset);
8103
8104                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8105                 if (ret)
8106                         break;
8107                 btrfs_release_path(path);
8108
8109                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8110                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8111                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8112                                 found_key.offset : root->nodesize;
8113
8114                         ret = btrfs_update_block_group(trans, root, bytenr,
8115                                                        bytes, 0, 0);
8116                         if (ret)
8117                                 break;
8118                 }
8119         }
8120
8121         btrfs_release_path(path);
8122         return ret;
8123 }
8124
8125 /*
8126  * for a single backref, this will allocate a new extent
8127  * and add the backref to it.
8128  */
8129 static int record_extent(struct btrfs_trans_handle *trans,
8130                          struct btrfs_fs_info *info,
8131                          struct btrfs_path *path,
8132                          struct extent_record *rec,
8133                          struct extent_backref *back,
8134                          int allocated, u64 flags)
8135 {
8136         int ret = 0;
8137         struct btrfs_root *extent_root = info->extent_root;
8138         struct extent_buffer *leaf;
8139         struct btrfs_key ins_key;
8140         struct btrfs_extent_item *ei;
8141         struct data_backref *dback;
8142         struct btrfs_tree_block_info *bi;
8143
8144         if (!back->is_data)
8145                 rec->max_size = max_t(u64, rec->max_size,
8146                                     info->extent_root->nodesize);
8147
8148         if (!allocated) {
8149                 u32 item_size = sizeof(*ei);
8150
8151                 if (!back->is_data)
8152                         item_size += sizeof(*bi);
8153
8154                 ins_key.objectid = rec->start;
8155                 ins_key.offset = rec->max_size;
8156                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8157
8158                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8159                                         &ins_key, item_size);
8160                 if (ret)
8161                         goto fail;
8162
8163                 leaf = path->nodes[0];
8164                 ei = btrfs_item_ptr(leaf, path->slots[0],
8165                                     struct btrfs_extent_item);
8166
8167                 btrfs_set_extent_refs(leaf, ei, 0);
8168                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8169
8170                 if (back->is_data) {
8171                         btrfs_set_extent_flags(leaf, ei,
8172                                                BTRFS_EXTENT_FLAG_DATA);
8173                 } else {
8174                         struct btrfs_disk_key copy_key;;
8175
8176                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8177                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8178                                              sizeof(*bi));
8179
8180                         btrfs_set_disk_key_objectid(&copy_key,
8181                                                     rec->info_objectid);
8182                         btrfs_set_disk_key_type(&copy_key, 0);
8183                         btrfs_set_disk_key_offset(&copy_key, 0);
8184
8185                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8186                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8187
8188                         btrfs_set_extent_flags(leaf, ei,
8189                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8190                 }
8191
8192                 btrfs_mark_buffer_dirty(leaf);
8193                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8194                                                rec->max_size, 1, 0);
8195                 if (ret)
8196                         goto fail;
8197                 btrfs_release_path(path);
8198         }
8199
8200         if (back->is_data) {
8201                 u64 parent;
8202                 int i;
8203
8204                 dback = to_data_backref(back);
8205                 if (back->full_backref)
8206                         parent = dback->parent;
8207                 else
8208                         parent = 0;
8209
8210                 for (i = 0; i < dback->found_ref; i++) {
8211                         /* if parent != 0, we're doing a full backref
8212                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8213                          * just makes the backref allocator create a data
8214                          * backref
8215                          */
8216                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8217                                                    rec->start, rec->max_size,
8218                                                    parent,
8219                                                    dback->root,
8220                                                    parent ?
8221                                                    BTRFS_FIRST_FREE_OBJECTID :
8222                                                    dback->owner,
8223                                                    dback->offset);
8224                         if (ret)
8225                                 break;
8226                 }
8227                 fprintf(stderr, "adding new data backref"
8228                                 " on %llu %s %llu owner %llu"
8229                                 " offset %llu found %d\n",
8230                                 (unsigned long long)rec->start,
8231                                 back->full_backref ?
8232                                 "parent" : "root",
8233                                 back->full_backref ?
8234                                 (unsigned long long)parent :
8235                                 (unsigned long long)dback->root,
8236                                 (unsigned long long)dback->owner,
8237                                 (unsigned long long)dback->offset,
8238                                 dback->found_ref);
8239         } else {
8240                 u64 parent;
8241                 struct tree_backref *tback;
8242
8243                 tback = to_tree_backref(back);
8244                 if (back->full_backref)
8245                         parent = tback->parent;
8246                 else
8247                         parent = 0;
8248
8249                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8250                                            rec->start, rec->max_size,
8251                                            parent, tback->root, 0, 0);
8252                 fprintf(stderr, "adding new tree backref on "
8253                         "start %llu len %llu parent %llu root %llu\n",
8254                         rec->start, rec->max_size, parent, tback->root);
8255         }
8256 fail:
8257         btrfs_release_path(path);
8258         return ret;
8259 }
8260
8261 static struct extent_entry *find_entry(struct list_head *entries,
8262                                        u64 bytenr, u64 bytes)
8263 {
8264         struct extent_entry *entry = NULL;
8265
8266         list_for_each_entry(entry, entries, list) {
8267                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8268                         return entry;
8269         }
8270
8271         return NULL;
8272 }
8273
8274 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8275 {
8276         struct extent_entry *entry, *best = NULL, *prev = NULL;
8277
8278         list_for_each_entry(entry, entries, list) {
8279                 /*
8280                  * If there are as many broken entries as entries then we know
8281                  * not to trust this particular entry.
8282                  */
8283                 if (entry->broken == entry->count)
8284                         continue;
8285
8286                 /*
8287                  * Special case, when there are only two entries and 'best' is
8288                  * the first one
8289                  */
8290                 if (!prev) {
8291                         best = entry;
8292                         prev = entry;
8293                         continue;
8294                 }
8295
8296                 /*
8297                  * If our current entry == best then we can't be sure our best
8298                  * is really the best, so we need to keep searching.
8299                  */
8300                 if (best && best->count == entry->count) {
8301                         prev = entry;
8302                         best = NULL;
8303                         continue;
8304                 }
8305
8306                 /* Prev == entry, not good enough, have to keep searching */
8307                 if (!prev->broken && prev->count == entry->count)
8308                         continue;
8309
8310                 if (!best)
8311                         best = (prev->count > entry->count) ? prev : entry;
8312                 else if (best->count < entry->count)
8313                         best = entry;
8314                 prev = entry;
8315         }
8316
8317         return best;
8318 }
8319
8320 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8321                       struct data_backref *dback, struct extent_entry *entry)
8322 {
8323         struct btrfs_trans_handle *trans;
8324         struct btrfs_root *root;
8325         struct btrfs_file_extent_item *fi;
8326         struct extent_buffer *leaf;
8327         struct btrfs_key key;
8328         u64 bytenr, bytes;
8329         int ret, err;
8330
8331         key.objectid = dback->root;
8332         key.type = BTRFS_ROOT_ITEM_KEY;
8333         key.offset = (u64)-1;
8334         root = btrfs_read_fs_root(info, &key);
8335         if (IS_ERR(root)) {
8336                 fprintf(stderr, "Couldn't find root for our ref\n");
8337                 return -EINVAL;
8338         }
8339
8340         /*
8341          * The backref points to the original offset of the extent if it was
8342          * split, so we need to search down to the offset we have and then walk
8343          * forward until we find the backref we're looking for.
8344          */
8345         key.objectid = dback->owner;
8346         key.type = BTRFS_EXTENT_DATA_KEY;
8347         key.offset = dback->offset;
8348         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8349         if (ret < 0) {
8350                 fprintf(stderr, "Error looking up ref %d\n", ret);
8351                 return ret;
8352         }
8353
8354         while (1) {
8355                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8356                         ret = btrfs_next_leaf(root, path);
8357                         if (ret) {
8358                                 fprintf(stderr, "Couldn't find our ref, next\n");
8359                                 return -EINVAL;
8360                         }
8361                 }
8362                 leaf = path->nodes[0];
8363                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8364                 if (key.objectid != dback->owner ||
8365                     key.type != BTRFS_EXTENT_DATA_KEY) {
8366                         fprintf(stderr, "Couldn't find our ref, search\n");
8367                         return -EINVAL;
8368                 }
8369                 fi = btrfs_item_ptr(leaf, path->slots[0],
8370                                     struct btrfs_file_extent_item);
8371                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8372                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8373
8374                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8375                         break;
8376                 path->slots[0]++;
8377         }
8378
8379         btrfs_release_path(path);
8380
8381         trans = btrfs_start_transaction(root, 1);
8382         if (IS_ERR(trans))
8383                 return PTR_ERR(trans);
8384
8385         /*
8386          * Ok we have the key of the file extent we want to fix, now we can cow
8387          * down to the thing and fix it.
8388          */
8389         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8390         if (ret < 0) {
8391                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8392                         key.objectid, key.type, key.offset, ret);
8393                 goto out;
8394         }
8395         if (ret > 0) {
8396                 fprintf(stderr, "Well that's odd, we just found this key "
8397                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8398                         key.offset);
8399                 ret = -EINVAL;
8400                 goto out;
8401         }
8402         leaf = path->nodes[0];
8403         fi = btrfs_item_ptr(leaf, path->slots[0],
8404                             struct btrfs_file_extent_item);
8405
8406         if (btrfs_file_extent_compression(leaf, fi) &&
8407             dback->disk_bytenr != entry->bytenr) {
8408                 fprintf(stderr, "Ref doesn't match the record start and is "
8409                         "compressed, please take a btrfs-image of this file "
8410                         "system and send it to a btrfs developer so they can "
8411                         "complete this functionality for bytenr %Lu\n",
8412                         dback->disk_bytenr);
8413                 ret = -EINVAL;
8414                 goto out;
8415         }
8416
8417         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8418                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8419         } else if (dback->disk_bytenr > entry->bytenr) {
8420                 u64 off_diff, offset;
8421
8422                 off_diff = dback->disk_bytenr - entry->bytenr;
8423                 offset = btrfs_file_extent_offset(leaf, fi);
8424                 if (dback->disk_bytenr + offset +
8425                     btrfs_file_extent_num_bytes(leaf, fi) >
8426                     entry->bytenr + entry->bytes) {
8427                         fprintf(stderr, "Ref is past the entry end, please "
8428                                 "take a btrfs-image of this file system and "
8429                                 "send it to a btrfs developer, ref %Lu\n",
8430                                 dback->disk_bytenr);
8431                         ret = -EINVAL;
8432                         goto out;
8433                 }
8434                 offset += off_diff;
8435                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8436                 btrfs_set_file_extent_offset(leaf, fi, offset);
8437         } else if (dback->disk_bytenr < entry->bytenr) {
8438                 u64 offset;
8439
8440                 offset = btrfs_file_extent_offset(leaf, fi);
8441                 if (dback->disk_bytenr + offset < entry->bytenr) {
8442                         fprintf(stderr, "Ref is before the entry start, please"
8443                                 " take a btrfs-image of this file system and "
8444                                 "send it to a btrfs developer, ref %Lu\n",
8445                                 dback->disk_bytenr);
8446                         ret = -EINVAL;
8447                         goto out;
8448                 }
8449
8450                 offset += dback->disk_bytenr;
8451                 offset -= entry->bytenr;
8452                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8453                 btrfs_set_file_extent_offset(leaf, fi, offset);
8454         }
8455
8456         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8457
8458         /*
8459          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8460          * only do this if we aren't using compression, otherwise it's a
8461          * trickier case.
8462          */
8463         if (!btrfs_file_extent_compression(leaf, fi))
8464                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8465         else
8466                 printf("ram bytes may be wrong?\n");
8467         btrfs_mark_buffer_dirty(leaf);
8468 out:
8469         err = btrfs_commit_transaction(trans, root);
8470         btrfs_release_path(path);
8471         return ret ? ret : err;
8472 }
8473
8474 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8475                            struct extent_record *rec)
8476 {
8477         struct extent_backref *back;
8478         struct data_backref *dback;
8479         struct extent_entry *entry, *best = NULL;
8480         LIST_HEAD(entries);
8481         int nr_entries = 0;
8482         int broken_entries = 0;
8483         int ret = 0;
8484         short mismatch = 0;
8485
8486         /*
8487          * Metadata is easy and the backrefs should always agree on bytenr and
8488          * size, if not we've got bigger issues.
8489          */
8490         if (rec->metadata)
8491                 return 0;
8492
8493         list_for_each_entry(back, &rec->backrefs, list) {
8494                 if (back->full_backref || !back->is_data)
8495                         continue;
8496
8497                 dback = to_data_backref(back);
8498
8499                 /*
8500                  * We only pay attention to backrefs that we found a real
8501                  * backref for.
8502                  */
8503                 if (dback->found_ref == 0)
8504                         continue;
8505
8506                 /*
8507                  * For now we only catch when the bytes don't match, not the
8508                  * bytenr.  We can easily do this at the same time, but I want
8509                  * to have a fs image to test on before we just add repair
8510                  * functionality willy-nilly so we know we won't screw up the
8511                  * repair.
8512                  */
8513
8514                 entry = find_entry(&entries, dback->disk_bytenr,
8515                                    dback->bytes);
8516                 if (!entry) {
8517                         entry = malloc(sizeof(struct extent_entry));
8518                         if (!entry) {
8519                                 ret = -ENOMEM;
8520                                 goto out;
8521                         }
8522                         memset(entry, 0, sizeof(*entry));
8523                         entry->bytenr = dback->disk_bytenr;
8524                         entry->bytes = dback->bytes;
8525                         list_add_tail(&entry->list, &entries);
8526                         nr_entries++;
8527                 }
8528
8529                 /*
8530                  * If we only have on entry we may think the entries agree when
8531                  * in reality they don't so we have to do some extra checking.
8532                  */
8533                 if (dback->disk_bytenr != rec->start ||
8534                     dback->bytes != rec->nr || back->broken)
8535                         mismatch = 1;
8536
8537                 if (back->broken) {
8538                         entry->broken++;
8539                         broken_entries++;
8540                 }
8541
8542                 entry->count++;
8543         }
8544
8545         /* Yay all the backrefs agree, carry on good sir */
8546         if (nr_entries <= 1 && !mismatch)
8547                 goto out;
8548
8549         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8550                 "%Lu\n", rec->start);
8551
8552         /*
8553          * First we want to see if the backrefs can agree amongst themselves who
8554          * is right, so figure out which one of the entries has the highest
8555          * count.
8556          */
8557         best = find_most_right_entry(&entries);
8558
8559         /*
8560          * Ok so we may have an even split between what the backrefs think, so
8561          * this is where we use the extent ref to see what it thinks.
8562          */
8563         if (!best) {
8564                 entry = find_entry(&entries, rec->start, rec->nr);
8565                 if (!entry && (!broken_entries || !rec->found_rec)) {
8566                         fprintf(stderr, "Backrefs don't agree with each other "
8567                                 "and extent record doesn't agree with anybody,"
8568                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8569                                 rec->start, rec->nr);
8570                         ret = -EINVAL;
8571                         goto out;
8572                 } else if (!entry) {
8573                         /*
8574                          * Ok our backrefs were broken, we'll assume this is the
8575                          * correct value and add an entry for this range.
8576                          */
8577                         entry = malloc(sizeof(struct extent_entry));
8578                         if (!entry) {
8579                                 ret = -ENOMEM;
8580                                 goto out;
8581                         }
8582                         memset(entry, 0, sizeof(*entry));
8583                         entry->bytenr = rec->start;
8584                         entry->bytes = rec->nr;
8585                         list_add_tail(&entry->list, &entries);
8586                         nr_entries++;
8587                 }
8588                 entry->count++;
8589                 best = find_most_right_entry(&entries);
8590                 if (!best) {
8591                         fprintf(stderr, "Backrefs and extent record evenly "
8592                                 "split on who is right, this is going to "
8593                                 "require user input to fix bytenr %Lu bytes "
8594                                 "%Lu\n", rec->start, rec->nr);
8595                         ret = -EINVAL;
8596                         goto out;
8597                 }
8598         }
8599
8600         /*
8601          * I don't think this can happen currently as we'll abort() if we catch
8602          * this case higher up, but in case somebody removes that we still can't
8603          * deal with it properly here yet, so just bail out of that's the case.
8604          */
8605         if (best->bytenr != rec->start) {
8606                 fprintf(stderr, "Extent start and backref starts don't match, "
8607                         "please use btrfs-image on this file system and send "
8608                         "it to a btrfs developer so they can make fsck fix "
8609                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8610                         rec->start, rec->nr);
8611                 ret = -EINVAL;
8612                 goto out;
8613         }
8614
8615         /*
8616          * Ok great we all agreed on an extent record, let's go find the real
8617          * references and fix up the ones that don't match.
8618          */
8619         list_for_each_entry(back, &rec->backrefs, list) {
8620                 if (back->full_backref || !back->is_data)
8621                         continue;
8622
8623                 dback = to_data_backref(back);
8624
8625                 /*
8626                  * Still ignoring backrefs that don't have a real ref attached
8627                  * to them.
8628                  */
8629                 if (dback->found_ref == 0)
8630                         continue;
8631
8632                 if (dback->bytes == best->bytes &&
8633                     dback->disk_bytenr == best->bytenr)
8634                         continue;
8635
8636                 ret = repair_ref(info, path, dback, best);
8637                 if (ret)
8638                         goto out;
8639         }
8640
8641         /*
8642          * Ok we messed with the actual refs, which means we need to drop our
8643          * entire cache and go back and rescan.  I know this is a huge pain and
8644          * adds a lot of extra work, but it's the only way to be safe.  Once all
8645          * the backrefs agree we may not need to do anything to the extent
8646          * record itself.
8647          */
8648         ret = -EAGAIN;
8649 out:
8650         while (!list_empty(&entries)) {
8651                 entry = list_entry(entries.next, struct extent_entry, list);
8652                 list_del_init(&entry->list);
8653                 free(entry);
8654         }
8655         return ret;
8656 }
8657
8658 static int process_duplicates(struct cache_tree *extent_cache,
8659                               struct extent_record *rec)
8660 {
8661         struct extent_record *good, *tmp;
8662         struct cache_extent *cache;
8663         int ret;
8664
8665         /*
8666          * If we found a extent record for this extent then return, or if we
8667          * have more than one duplicate we are likely going to need to delete
8668          * something.
8669          */
8670         if (rec->found_rec || rec->num_duplicates > 1)
8671                 return 0;
8672
8673         /* Shouldn't happen but just in case */
8674         BUG_ON(!rec->num_duplicates);
8675
8676         /*
8677          * So this happens if we end up with a backref that doesn't match the
8678          * actual extent entry.  So either the backref is bad or the extent
8679          * entry is bad.  Either way we want to have the extent_record actually
8680          * reflect what we found in the extent_tree, so we need to take the
8681          * duplicate out and use that as the extent_record since the only way we
8682          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8683          */
8684         remove_cache_extent(extent_cache, &rec->cache);
8685
8686         good = to_extent_record(rec->dups.next);
8687         list_del_init(&good->list);
8688         INIT_LIST_HEAD(&good->backrefs);
8689         INIT_LIST_HEAD(&good->dups);
8690         good->cache.start = good->start;
8691         good->cache.size = good->nr;
8692         good->content_checked = 0;
8693         good->owner_ref_checked = 0;
8694         good->num_duplicates = 0;
8695         good->refs = rec->refs;
8696         list_splice_init(&rec->backrefs, &good->backrefs);
8697         while (1) {
8698                 cache = lookup_cache_extent(extent_cache, good->start,
8699                                             good->nr);
8700                 if (!cache)
8701                         break;
8702                 tmp = container_of(cache, struct extent_record, cache);
8703
8704                 /*
8705                  * If we find another overlapping extent and it's found_rec is
8706                  * set then it's a duplicate and we need to try and delete
8707                  * something.
8708                  */
8709                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8710                         if (list_empty(&good->list))
8711                                 list_add_tail(&good->list,
8712                                               &duplicate_extents);
8713                         good->num_duplicates += tmp->num_duplicates + 1;
8714                         list_splice_init(&tmp->dups, &good->dups);
8715                         list_del_init(&tmp->list);
8716                         list_add_tail(&tmp->list, &good->dups);
8717                         remove_cache_extent(extent_cache, &tmp->cache);
8718                         continue;
8719                 }
8720
8721                 /*
8722                  * Ok we have another non extent item backed extent rec, so lets
8723                  * just add it to this extent and carry on like we did above.
8724                  */
8725                 good->refs += tmp->refs;
8726                 list_splice_init(&tmp->backrefs, &good->backrefs);
8727                 remove_cache_extent(extent_cache, &tmp->cache);
8728                 free(tmp);
8729         }
8730         ret = insert_cache_extent(extent_cache, &good->cache);
8731         BUG_ON(ret);
8732         free(rec);
8733         return good->num_duplicates ? 0 : 1;
8734 }
8735
8736 static int delete_duplicate_records(struct btrfs_root *root,
8737                                     struct extent_record *rec)
8738 {
8739         struct btrfs_trans_handle *trans;
8740         LIST_HEAD(delete_list);
8741         struct btrfs_path path;
8742         struct extent_record *tmp, *good, *n;
8743         int nr_del = 0;
8744         int ret = 0, err;
8745         struct btrfs_key key;
8746
8747         btrfs_init_path(&path);
8748
8749         good = rec;
8750         /* Find the record that covers all of the duplicates. */
8751         list_for_each_entry(tmp, &rec->dups, list) {
8752                 if (good->start < tmp->start)
8753                         continue;
8754                 if (good->nr > tmp->nr)
8755                         continue;
8756
8757                 if (tmp->start + tmp->nr < good->start + good->nr) {
8758                         fprintf(stderr, "Ok we have overlapping extents that "
8759                                 "aren't completely covered by each other, this "
8760                                 "is going to require more careful thought.  "
8761                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8762                                 tmp->start, tmp->nr, good->start, good->nr);
8763                         abort();
8764                 }
8765                 good = tmp;
8766         }
8767
8768         if (good != rec)
8769                 list_add_tail(&rec->list, &delete_list);
8770
8771         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8772                 if (tmp == good)
8773                         continue;
8774                 list_move_tail(&tmp->list, &delete_list);
8775         }
8776
8777         root = root->fs_info->extent_root;
8778         trans = btrfs_start_transaction(root, 1);
8779         if (IS_ERR(trans)) {
8780                 ret = PTR_ERR(trans);
8781                 goto out;
8782         }
8783
8784         list_for_each_entry(tmp, &delete_list, list) {
8785                 if (tmp->found_rec == 0)
8786                         continue;
8787                 key.objectid = tmp->start;
8788                 key.type = BTRFS_EXTENT_ITEM_KEY;
8789                 key.offset = tmp->nr;
8790
8791                 /* Shouldn't happen but just in case */
8792                 if (tmp->metadata) {
8793                         fprintf(stderr, "Well this shouldn't happen, extent "
8794                                 "record overlaps but is metadata? "
8795                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8796                         abort();
8797                 }
8798
8799                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8800                 if (ret) {
8801                         if (ret > 0)
8802                                 ret = -EINVAL;
8803                         break;
8804                 }
8805                 ret = btrfs_del_item(trans, root, &path);
8806                 if (ret)
8807                         break;
8808                 btrfs_release_path(&path);
8809                 nr_del++;
8810         }
8811         err = btrfs_commit_transaction(trans, root);
8812         if (err && !ret)
8813                 ret = err;
8814 out:
8815         while (!list_empty(&delete_list)) {
8816                 tmp = to_extent_record(delete_list.next);
8817                 list_del_init(&tmp->list);
8818                 if (tmp == rec)
8819                         continue;
8820                 free(tmp);
8821         }
8822
8823         while (!list_empty(&rec->dups)) {
8824                 tmp = to_extent_record(rec->dups.next);
8825                 list_del_init(&tmp->list);
8826                 free(tmp);
8827         }
8828
8829         btrfs_release_path(&path);
8830
8831         if (!ret && !nr_del)
8832                 rec->num_duplicates = 0;
8833
8834         return ret ? ret : nr_del;
8835 }
8836
8837 static int find_possible_backrefs(struct btrfs_fs_info *info,
8838                                   struct btrfs_path *path,
8839                                   struct cache_tree *extent_cache,
8840                                   struct extent_record *rec)
8841 {
8842         struct btrfs_root *root;
8843         struct extent_backref *back;
8844         struct data_backref *dback;
8845         struct cache_extent *cache;
8846         struct btrfs_file_extent_item *fi;
8847         struct btrfs_key key;
8848         u64 bytenr, bytes;
8849         int ret;
8850
8851         list_for_each_entry(back, &rec->backrefs, list) {
8852                 /* Don't care about full backrefs (poor unloved backrefs) */
8853                 if (back->full_backref || !back->is_data)
8854                         continue;
8855
8856                 dback = to_data_backref(back);
8857
8858                 /* We found this one, we don't need to do a lookup */
8859                 if (dback->found_ref)
8860                         continue;
8861
8862                 key.objectid = dback->root;
8863                 key.type = BTRFS_ROOT_ITEM_KEY;
8864                 key.offset = (u64)-1;
8865
8866                 root = btrfs_read_fs_root(info, &key);
8867
8868                 /* No root, definitely a bad ref, skip */
8869                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8870                         continue;
8871                 /* Other err, exit */
8872                 if (IS_ERR(root))
8873                         return PTR_ERR(root);
8874
8875                 key.objectid = dback->owner;
8876                 key.type = BTRFS_EXTENT_DATA_KEY;
8877                 key.offset = dback->offset;
8878                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8879                 if (ret) {
8880                         btrfs_release_path(path);
8881                         if (ret < 0)
8882                                 return ret;
8883                         /* Didn't find it, we can carry on */
8884                         ret = 0;
8885                         continue;
8886                 }
8887
8888                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8889                                     struct btrfs_file_extent_item);
8890                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8891                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8892                 btrfs_release_path(path);
8893                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8894                 if (cache) {
8895                         struct extent_record *tmp;
8896                         tmp = container_of(cache, struct extent_record, cache);
8897
8898                         /*
8899                          * If we found an extent record for the bytenr for this
8900                          * particular backref then we can't add it to our
8901                          * current extent record.  We only want to add backrefs
8902                          * that don't have a corresponding extent item in the
8903                          * extent tree since they likely belong to this record
8904                          * and we need to fix it if it doesn't match bytenrs.
8905                          */
8906                         if  (tmp->found_rec)
8907                                 continue;
8908                 }
8909
8910                 dback->found_ref += 1;
8911                 dback->disk_bytenr = bytenr;
8912                 dback->bytes = bytes;
8913
8914                 /*
8915                  * Set this so the verify backref code knows not to trust the
8916                  * values in this backref.
8917                  */
8918                 back->broken = 1;
8919         }
8920
8921         return 0;
8922 }
8923
8924 /*
8925  * Record orphan data ref into corresponding root.
8926  *
8927  * Return 0 if the extent item contains data ref and recorded.
8928  * Return 1 if the extent item contains no useful data ref
8929  *   On that case, it may contains only shared_dataref or metadata backref
8930  *   or the file extent exists(this should be handled by the extent bytenr
8931  *   recovery routine)
8932  * Return <0 if something goes wrong.
8933  */
8934 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8935                                       struct extent_record *rec)
8936 {
8937         struct btrfs_key key;
8938         struct btrfs_root *dest_root;
8939         struct extent_backref *back;
8940         struct data_backref *dback;
8941         struct orphan_data_extent *orphan;
8942         struct btrfs_path path;
8943         int recorded_data_ref = 0;
8944         int ret = 0;
8945
8946         if (rec->metadata)
8947                 return 1;
8948         btrfs_init_path(&path);
8949         list_for_each_entry(back, &rec->backrefs, list) {
8950                 if (back->full_backref || !back->is_data ||
8951                     !back->found_extent_tree)
8952                         continue;
8953                 dback = to_data_backref(back);
8954                 if (dback->found_ref)
8955                         continue;
8956                 key.objectid = dback->root;
8957                 key.type = BTRFS_ROOT_ITEM_KEY;
8958                 key.offset = (u64)-1;
8959
8960                 dest_root = btrfs_read_fs_root(fs_info, &key);
8961
8962                 /* For non-exist root we just skip it */
8963                 if (IS_ERR(dest_root) || !dest_root)
8964                         continue;
8965
8966                 key.objectid = dback->owner;
8967                 key.type = BTRFS_EXTENT_DATA_KEY;
8968                 key.offset = dback->offset;
8969
8970                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8971                 btrfs_release_path(&path);
8972                 /*
8973                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8974                  * we need to record it for inode/file extent rebuild.
8975                  * For ret > 0, we record it only for file extent rebuild.
8976                  * For ret == 0, the file extent exists but only bytenr
8977                  * mismatch, let the original bytenr fix routine to handle,
8978                  * don't record it.
8979                  */
8980                 if (ret == 0)
8981                         continue;
8982                 ret = 0;
8983                 orphan = malloc(sizeof(*orphan));
8984                 if (!orphan) {
8985                         ret = -ENOMEM;
8986                         goto out;
8987                 }
8988                 INIT_LIST_HEAD(&orphan->list);
8989                 orphan->root = dback->root;
8990                 orphan->objectid = dback->owner;
8991                 orphan->offset = dback->offset;
8992                 orphan->disk_bytenr = rec->cache.start;
8993                 orphan->disk_len = rec->cache.size;
8994                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8995                 recorded_data_ref = 1;
8996         }
8997 out:
8998         btrfs_release_path(&path);
8999         if (!ret)
9000                 return !recorded_data_ref;
9001         else
9002                 return ret;
9003 }
9004
9005 /*
9006  * when an incorrect extent item is found, this will delete
9007  * all of the existing entries for it and recreate them
9008  * based on what the tree scan found.
9009  */
9010 static int fixup_extent_refs(struct btrfs_fs_info *info,
9011                              struct cache_tree *extent_cache,
9012                              struct extent_record *rec)
9013 {
9014         struct btrfs_trans_handle *trans = NULL;
9015         int ret;
9016         struct btrfs_path path;
9017         struct list_head *cur = rec->backrefs.next;
9018         struct cache_extent *cache;
9019         struct extent_backref *back;
9020         int allocated = 0;
9021         u64 flags = 0;
9022
9023         if (rec->flag_block_full_backref)
9024                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9025
9026         btrfs_init_path(&path);
9027         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9028                 /*
9029                  * Sometimes the backrefs themselves are so broken they don't
9030                  * get attached to any meaningful rec, so first go back and
9031                  * check any of our backrefs that we couldn't find and throw
9032                  * them into the list if we find the backref so that
9033                  * verify_backrefs can figure out what to do.
9034                  */
9035                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9036                 if (ret < 0)
9037                         goto out;
9038         }
9039
9040         /* step one, make sure all of the backrefs agree */
9041         ret = verify_backrefs(info, &path, rec);
9042         if (ret < 0)
9043                 goto out;
9044
9045         trans = btrfs_start_transaction(info->extent_root, 1);
9046         if (IS_ERR(trans)) {
9047                 ret = PTR_ERR(trans);
9048                 goto out;
9049         }
9050
9051         /* step two, delete all the existing records */
9052         ret = delete_extent_records(trans, info->extent_root, &path,
9053                                     rec->start);
9054
9055         if (ret < 0)
9056                 goto out;
9057
9058         /* was this block corrupt?  If so, don't add references to it */
9059         cache = lookup_cache_extent(info->corrupt_blocks,
9060                                     rec->start, rec->max_size);
9061         if (cache) {
9062                 ret = 0;
9063                 goto out;
9064         }
9065
9066         /* step three, recreate all the refs we did find */
9067         while(cur != &rec->backrefs) {
9068                 back = to_extent_backref(cur);
9069                 cur = cur->next;
9070
9071                 /*
9072                  * if we didn't find any references, don't create a
9073                  * new extent record
9074                  */
9075                 if (!back->found_ref)
9076                         continue;
9077
9078                 rec->bad_full_backref = 0;
9079                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9080                 allocated = 1;
9081
9082                 if (ret)
9083                         goto out;
9084         }
9085 out:
9086         if (trans) {
9087                 int err = btrfs_commit_transaction(trans, info->extent_root);
9088                 if (!ret)
9089                         ret = err;
9090         }
9091
9092         if (!ret)
9093                 fprintf(stderr, "Repaired extent references for %llu\n",
9094                                 (unsigned long long)rec->start);
9095
9096         btrfs_release_path(&path);
9097         return ret;
9098 }
9099
9100 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9101                               struct extent_record *rec)
9102 {
9103         struct btrfs_trans_handle *trans;
9104         struct btrfs_root *root = fs_info->extent_root;
9105         struct btrfs_path path;
9106         struct btrfs_extent_item *ei;
9107         struct btrfs_key key;
9108         u64 flags;
9109         int ret = 0;
9110
9111         key.objectid = rec->start;
9112         if (rec->metadata) {
9113                 key.type = BTRFS_METADATA_ITEM_KEY;
9114                 key.offset = rec->info_level;
9115         } else {
9116                 key.type = BTRFS_EXTENT_ITEM_KEY;
9117                 key.offset = rec->max_size;
9118         }
9119
9120         trans = btrfs_start_transaction(root, 0);
9121         if (IS_ERR(trans))
9122                 return PTR_ERR(trans);
9123
9124         btrfs_init_path(&path);
9125         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9126         if (ret < 0) {
9127                 btrfs_release_path(&path);
9128                 btrfs_commit_transaction(trans, root);
9129                 return ret;
9130         } else if (ret) {
9131                 fprintf(stderr, "Didn't find extent for %llu\n",
9132                         (unsigned long long)rec->start);
9133                 btrfs_release_path(&path);
9134                 btrfs_commit_transaction(trans, root);
9135                 return -ENOENT;
9136         }
9137
9138         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9139                             struct btrfs_extent_item);
9140         flags = btrfs_extent_flags(path.nodes[0], ei);
9141         if (rec->flag_block_full_backref) {
9142                 fprintf(stderr, "setting full backref on %llu\n",
9143                         (unsigned long long)key.objectid);
9144                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9145         } else {
9146                 fprintf(stderr, "clearing full backref on %llu\n",
9147                         (unsigned long long)key.objectid);
9148                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9149         }
9150         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9151         btrfs_mark_buffer_dirty(path.nodes[0]);
9152         btrfs_release_path(&path);
9153         ret = btrfs_commit_transaction(trans, root);
9154         if (!ret)
9155                 fprintf(stderr, "Repaired extent flags for %llu\n",
9156                                 (unsigned long long)rec->start);
9157
9158         return ret;
9159 }
9160
9161 /* right now we only prune from the extent allocation tree */
9162 static int prune_one_block(struct btrfs_trans_handle *trans,
9163                            struct btrfs_fs_info *info,
9164                            struct btrfs_corrupt_block *corrupt)
9165 {
9166         int ret;
9167         struct btrfs_path path;
9168         struct extent_buffer *eb;
9169         u64 found;
9170         int slot;
9171         int nritems;
9172         int level = corrupt->level + 1;
9173
9174         btrfs_init_path(&path);
9175 again:
9176         /* we want to stop at the parent to our busted block */
9177         path.lowest_level = level;
9178
9179         ret = btrfs_search_slot(trans, info->extent_root,
9180                                 &corrupt->key, &path, -1, 1);
9181
9182         if (ret < 0)
9183                 goto out;
9184
9185         eb = path.nodes[level];
9186         if (!eb) {
9187                 ret = -ENOENT;
9188                 goto out;
9189         }
9190
9191         /*
9192          * hopefully the search gave us the block we want to prune,
9193          * lets try that first
9194          */
9195         slot = path.slots[level];
9196         found =  btrfs_node_blockptr(eb, slot);
9197         if (found == corrupt->cache.start)
9198                 goto del_ptr;
9199
9200         nritems = btrfs_header_nritems(eb);
9201
9202         /* the search failed, lets scan this node and hope we find it */
9203         for (slot = 0; slot < nritems; slot++) {
9204                 found =  btrfs_node_blockptr(eb, slot);
9205                 if (found == corrupt->cache.start)
9206                         goto del_ptr;
9207         }
9208         /*
9209          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9210          * to this block
9211          */
9212         if (eb == info->extent_root->node) {
9213                 ret = -ENOENT;
9214                 goto out;
9215         } else {
9216                 level++;
9217                 btrfs_release_path(&path);
9218                 goto again;
9219         }
9220
9221 del_ptr:
9222         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9223         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9224
9225 out:
9226         btrfs_release_path(&path);
9227         return ret;
9228 }
9229
9230 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9231 {
9232         struct btrfs_trans_handle *trans = NULL;
9233         struct cache_extent *cache;
9234         struct btrfs_corrupt_block *corrupt;
9235
9236         while (1) {
9237                 cache = search_cache_extent(info->corrupt_blocks, 0);
9238                 if (!cache)
9239                         break;
9240                 if (!trans) {
9241                         trans = btrfs_start_transaction(info->extent_root, 1);
9242                         if (IS_ERR(trans))
9243                                 return PTR_ERR(trans);
9244                 }
9245                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9246                 prune_one_block(trans, info, corrupt);
9247                 remove_cache_extent(info->corrupt_blocks, cache);
9248         }
9249         if (trans)
9250                 return btrfs_commit_transaction(trans, info->extent_root);
9251         return 0;
9252 }
9253
9254 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9255 {
9256         struct btrfs_block_group_cache *cache;
9257         u64 start, end;
9258         int ret;
9259
9260         while (1) {
9261                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9262                                             &start, &end, EXTENT_DIRTY);
9263                 if (ret)
9264                         break;
9265                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9266         }
9267
9268         start = 0;
9269         while (1) {
9270                 cache = btrfs_lookup_first_block_group(fs_info, start);
9271                 if (!cache)
9272                         break;
9273                 if (cache->cached)
9274                         cache->cached = 0;
9275                 start = cache->key.objectid + cache->key.offset;
9276         }
9277 }
9278
9279 static int check_extent_refs(struct btrfs_root *root,
9280                              struct cache_tree *extent_cache)
9281 {
9282         struct extent_record *rec;
9283         struct cache_extent *cache;
9284         int ret = 0;
9285         int had_dups = 0;
9286
9287         if (repair) {
9288                 /*
9289                  * if we're doing a repair, we have to make sure
9290                  * we don't allocate from the problem extents.
9291                  * In the worst case, this will be all the
9292                  * extents in the FS
9293                  */
9294                 cache = search_cache_extent(extent_cache, 0);
9295                 while(cache) {
9296                         rec = container_of(cache, struct extent_record, cache);
9297                         set_extent_dirty(root->fs_info->excluded_extents,
9298                                          rec->start,
9299                                          rec->start + rec->max_size - 1);
9300                         cache = next_cache_extent(cache);
9301                 }
9302
9303                 /* pin down all the corrupted blocks too */
9304                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9305                 while(cache) {
9306                         set_extent_dirty(root->fs_info->excluded_extents,
9307                                          cache->start,
9308                                          cache->start + cache->size - 1);
9309                         cache = next_cache_extent(cache);
9310                 }
9311                 prune_corrupt_blocks(root->fs_info);
9312                 reset_cached_block_groups(root->fs_info);
9313         }
9314
9315         reset_cached_block_groups(root->fs_info);
9316
9317         /*
9318          * We need to delete any duplicate entries we find first otherwise we
9319          * could mess up the extent tree when we have backrefs that actually
9320          * belong to a different extent item and not the weird duplicate one.
9321          */
9322         while (repair && !list_empty(&duplicate_extents)) {
9323                 rec = to_extent_record(duplicate_extents.next);
9324                 list_del_init(&rec->list);
9325
9326                 /* Sometimes we can find a backref before we find an actual
9327                  * extent, so we need to process it a little bit to see if there
9328                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9329                  * if this is a backref screwup.  If we need to delete stuff
9330                  * process_duplicates() will return 0, otherwise it will return
9331                  * 1 and we
9332                  */
9333                 if (process_duplicates(extent_cache, rec))
9334                         continue;
9335                 ret = delete_duplicate_records(root, rec);
9336                 if (ret < 0)
9337                         return ret;
9338                 /*
9339                  * delete_duplicate_records will return the number of entries
9340                  * deleted, so if it's greater than 0 then we know we actually
9341                  * did something and we need to remove.
9342                  */
9343                 if (ret)
9344                         had_dups = 1;
9345         }
9346
9347         if (had_dups)
9348                 return -EAGAIN;
9349
9350         while(1) {
9351                 int cur_err = 0;
9352                 int fix = 0;
9353
9354                 cache = search_cache_extent(extent_cache, 0);
9355                 if (!cache)
9356                         break;
9357                 rec = container_of(cache, struct extent_record, cache);
9358                 if (rec->num_duplicates) {
9359                         fprintf(stderr, "extent item %llu has multiple extent "
9360                                 "items\n", (unsigned long long)rec->start);
9361                         cur_err = 1;
9362                 }
9363
9364                 if (rec->refs != rec->extent_item_refs) {
9365                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9366                                 (unsigned long long)rec->start,
9367                                 (unsigned long long)rec->nr);
9368                         fprintf(stderr, "extent item %llu, found %llu\n",
9369                                 (unsigned long long)rec->extent_item_refs,
9370                                 (unsigned long long)rec->refs);
9371                         ret = record_orphan_data_extents(root->fs_info, rec);
9372                         if (ret < 0)
9373                                 goto repair_abort;
9374                         fix = ret;
9375                         cur_err = 1;
9376                 }
9377                 if (all_backpointers_checked(rec, 1)) {
9378                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9379                                 (unsigned long long)rec->start,
9380                                 (unsigned long long)rec->nr);
9381                         fix = 1;
9382                         cur_err = 1;
9383                 }
9384                 if (!rec->owner_ref_checked) {
9385                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9386                                 (unsigned long long)rec->start,
9387                                 (unsigned long long)rec->nr);
9388                         fix = 1;
9389                         cur_err = 1;
9390                 }
9391
9392                 if (repair && fix) {
9393                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9394                         if (ret)
9395                                 goto repair_abort;
9396                 }
9397
9398
9399                 if (rec->bad_full_backref) {
9400                         fprintf(stderr, "bad full backref, on [%llu]\n",
9401                                 (unsigned long long)rec->start);
9402                         if (repair) {
9403                                 ret = fixup_extent_flags(root->fs_info, rec);
9404                                 if (ret)
9405                                         goto repair_abort;
9406                                 fix = 1;
9407                         }
9408                         cur_err = 1;
9409                 }
9410                 /*
9411                  * Although it's not a extent ref's problem, we reuse this
9412                  * routine for error reporting.
9413                  * No repair function yet.
9414                  */
9415                 if (rec->crossing_stripes) {
9416                         fprintf(stderr,
9417                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9418                                 rec->start, rec->start + rec->max_size);
9419                         cur_err = 1;
9420                 }
9421
9422                 if (rec->wrong_chunk_type) {
9423                         fprintf(stderr,
9424                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9425                                 rec->start, rec->start + rec->max_size);
9426                         cur_err = 1;
9427                 }
9428
9429                 remove_cache_extent(extent_cache, cache);
9430                 free_all_extent_backrefs(rec);
9431                 if (!init_extent_tree && repair && (!cur_err || fix))
9432                         clear_extent_dirty(root->fs_info->excluded_extents,
9433                                            rec->start,
9434                                            rec->start + rec->max_size - 1);
9435                 free(rec);
9436         }
9437 repair_abort:
9438         if (repair) {
9439                 if (ret && ret != -EAGAIN) {
9440                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9441                         exit(1);
9442                 } else if (!ret) {
9443                         struct btrfs_trans_handle *trans;
9444
9445                         root = root->fs_info->extent_root;
9446                         trans = btrfs_start_transaction(root, 1);
9447                         if (IS_ERR(trans)) {
9448                                 ret = PTR_ERR(trans);
9449                                 goto repair_abort;
9450                         }
9451
9452                         btrfs_fix_block_accounting(trans, root);
9453                         ret = btrfs_commit_transaction(trans, root);
9454                         if (ret)
9455                                 goto repair_abort;
9456                 }
9457                 return ret;
9458         }
9459         return 0;
9460 }
9461
9462 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9463 {
9464         u64 stripe_size;
9465
9466         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9467                 stripe_size = length;
9468                 stripe_size /= num_stripes;
9469         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9470                 stripe_size = length * 2;
9471                 stripe_size /= num_stripes;
9472         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9473                 stripe_size = length;
9474                 stripe_size /= (num_stripes - 1);
9475         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9476                 stripe_size = length;
9477                 stripe_size /= (num_stripes - 2);
9478         } else {
9479                 stripe_size = length;
9480         }
9481         return stripe_size;
9482 }
9483
9484 /*
9485  * Check the chunk with its block group/dev list ref:
9486  * Return 0 if all refs seems valid.
9487  * Return 1 if part of refs seems valid, need later check for rebuild ref
9488  * like missing block group and needs to search extent tree to rebuild them.
9489  * Return -1 if essential refs are missing and unable to rebuild.
9490  */
9491 static int check_chunk_refs(struct chunk_record *chunk_rec,
9492                             struct block_group_tree *block_group_cache,
9493                             struct device_extent_tree *dev_extent_cache,
9494                             int silent)
9495 {
9496         struct cache_extent *block_group_item;
9497         struct block_group_record *block_group_rec;
9498         struct cache_extent *dev_extent_item;
9499         struct device_extent_record *dev_extent_rec;
9500         u64 devid;
9501         u64 offset;
9502         u64 length;
9503         int metadump_v2 = 0;
9504         int i;
9505         int ret = 0;
9506
9507         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9508                                                chunk_rec->offset,
9509                                                chunk_rec->length);
9510         if (block_group_item) {
9511                 block_group_rec = container_of(block_group_item,
9512                                                struct block_group_record,
9513                                                cache);
9514                 if (chunk_rec->length != block_group_rec->offset ||
9515                     chunk_rec->offset != block_group_rec->objectid ||
9516                     (!metadump_v2 &&
9517                      chunk_rec->type_flags != block_group_rec->flags)) {
9518                         if (!silent)
9519                                 fprintf(stderr,
9520                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9521                                         chunk_rec->objectid,
9522                                         chunk_rec->type,
9523                                         chunk_rec->offset,
9524                                         chunk_rec->length,
9525                                         chunk_rec->offset,
9526                                         chunk_rec->type_flags,
9527                                         block_group_rec->objectid,
9528                                         block_group_rec->type,
9529                                         block_group_rec->offset,
9530                                         block_group_rec->offset,
9531                                         block_group_rec->objectid,
9532                                         block_group_rec->flags);
9533                         ret = -1;
9534                 } else {
9535                         list_del_init(&block_group_rec->list);
9536                         chunk_rec->bg_rec = block_group_rec;
9537                 }
9538         } else {
9539                 if (!silent)
9540                         fprintf(stderr,
9541                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9542                                 chunk_rec->objectid,
9543                                 chunk_rec->type,
9544                                 chunk_rec->offset,
9545                                 chunk_rec->length,
9546                                 chunk_rec->offset,
9547                                 chunk_rec->type_flags);
9548                 ret = 1;
9549         }
9550
9551         if (metadump_v2)
9552                 return ret;
9553
9554         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9555                                     chunk_rec->num_stripes);
9556         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9557                 devid = chunk_rec->stripes[i].devid;
9558                 offset = chunk_rec->stripes[i].offset;
9559                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9560                                                        devid, offset, length);
9561                 if (dev_extent_item) {
9562                         dev_extent_rec = container_of(dev_extent_item,
9563                                                 struct device_extent_record,
9564                                                 cache);
9565                         if (dev_extent_rec->objectid != devid ||
9566                             dev_extent_rec->offset != offset ||
9567                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9568                             dev_extent_rec->length != length) {
9569                                 if (!silent)
9570                                         fprintf(stderr,
9571                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9572                                                 chunk_rec->objectid,
9573                                                 chunk_rec->type,
9574                                                 chunk_rec->offset,
9575                                                 chunk_rec->stripes[i].devid,
9576                                                 chunk_rec->stripes[i].offset,
9577                                                 dev_extent_rec->objectid,
9578                                                 dev_extent_rec->offset,
9579                                                 dev_extent_rec->length);
9580                                 ret = -1;
9581                         } else {
9582                                 list_move(&dev_extent_rec->chunk_list,
9583                                           &chunk_rec->dextents);
9584                         }
9585                 } else {
9586                         if (!silent)
9587                                 fprintf(stderr,
9588                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9589                                         chunk_rec->objectid,
9590                                         chunk_rec->type,
9591                                         chunk_rec->offset,
9592                                         chunk_rec->stripes[i].devid,
9593                                         chunk_rec->stripes[i].offset);
9594                         ret = -1;
9595                 }
9596         }
9597         return ret;
9598 }
9599
9600 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9601 int check_chunks(struct cache_tree *chunk_cache,
9602                  struct block_group_tree *block_group_cache,
9603                  struct device_extent_tree *dev_extent_cache,
9604                  struct list_head *good, struct list_head *bad,
9605                  struct list_head *rebuild, int silent)
9606 {
9607         struct cache_extent *chunk_item;
9608         struct chunk_record *chunk_rec;
9609         struct block_group_record *bg_rec;
9610         struct device_extent_record *dext_rec;
9611         int err;
9612         int ret = 0;
9613
9614         chunk_item = first_cache_extent(chunk_cache);
9615         while (chunk_item) {
9616                 chunk_rec = container_of(chunk_item, struct chunk_record,
9617                                          cache);
9618                 err = check_chunk_refs(chunk_rec, block_group_cache,
9619                                        dev_extent_cache, silent);
9620                 if (err < 0)
9621                         ret = err;
9622                 if (err == 0 && good)
9623                         list_add_tail(&chunk_rec->list, good);
9624                 if (err > 0 && rebuild)
9625                         list_add_tail(&chunk_rec->list, rebuild);
9626                 if (err < 0 && bad)
9627                         list_add_tail(&chunk_rec->list, bad);
9628                 chunk_item = next_cache_extent(chunk_item);
9629         }
9630
9631         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9632                 if (!silent)
9633                         fprintf(stderr,
9634                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9635                                 bg_rec->objectid,
9636                                 bg_rec->offset,
9637                                 bg_rec->flags);
9638                 if (!ret)
9639                         ret = 1;
9640         }
9641
9642         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9643                             chunk_list) {
9644                 if (!silent)
9645                         fprintf(stderr,
9646                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9647                                 dext_rec->objectid,
9648                                 dext_rec->offset,
9649                                 dext_rec->length);
9650                 if (!ret)
9651                         ret = 1;
9652         }
9653         return ret;
9654 }
9655
9656
9657 static int check_device_used(struct device_record *dev_rec,
9658                              struct device_extent_tree *dext_cache)
9659 {
9660         struct cache_extent *cache;
9661         struct device_extent_record *dev_extent_rec;
9662         u64 total_byte = 0;
9663
9664         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9665         while (cache) {
9666                 dev_extent_rec = container_of(cache,
9667                                               struct device_extent_record,
9668                                               cache);
9669                 if (dev_extent_rec->objectid != dev_rec->devid)
9670                         break;
9671
9672                 list_del_init(&dev_extent_rec->device_list);
9673                 total_byte += dev_extent_rec->length;
9674                 cache = next_cache_extent(cache);
9675         }
9676
9677         if (total_byte != dev_rec->byte_used) {
9678                 fprintf(stderr,
9679                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9680                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9681                         dev_rec->type, dev_rec->offset);
9682                 return -1;
9683         } else {
9684                 return 0;
9685         }
9686 }
9687
9688 /* check btrfs_dev_item -> btrfs_dev_extent */
9689 static int check_devices(struct rb_root *dev_cache,
9690                          struct device_extent_tree *dev_extent_cache)
9691 {
9692         struct rb_node *dev_node;
9693         struct device_record *dev_rec;
9694         struct device_extent_record *dext_rec;
9695         int err;
9696         int ret = 0;
9697
9698         dev_node = rb_first(dev_cache);
9699         while (dev_node) {
9700                 dev_rec = container_of(dev_node, struct device_record, node);
9701                 err = check_device_used(dev_rec, dev_extent_cache);
9702                 if (err)
9703                         ret = err;
9704
9705                 dev_node = rb_next(dev_node);
9706         }
9707         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9708                             device_list) {
9709                 fprintf(stderr,
9710                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9711                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9712                 if (!ret)
9713                         ret = 1;
9714         }
9715         return ret;
9716 }
9717
9718 static int add_root_item_to_list(struct list_head *head,
9719                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9720                                   u8 level, u8 drop_level,
9721                                   int level_size, struct btrfs_key *drop_key)
9722 {
9723
9724         struct root_item_record *ri_rec;
9725         ri_rec = malloc(sizeof(*ri_rec));
9726         if (!ri_rec)
9727                 return -ENOMEM;
9728         ri_rec->bytenr = bytenr;
9729         ri_rec->objectid = objectid;
9730         ri_rec->level = level;
9731         ri_rec->level_size = level_size;
9732         ri_rec->drop_level = drop_level;
9733         ri_rec->last_snapshot = last_snapshot;
9734         if (drop_key)
9735                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9736         list_add_tail(&ri_rec->list, head);
9737
9738         return 0;
9739 }
9740
9741 static void free_root_item_list(struct list_head *list)
9742 {
9743         struct root_item_record *ri_rec;
9744
9745         while (!list_empty(list)) {
9746                 ri_rec = list_first_entry(list, struct root_item_record,
9747                                           list);
9748                 list_del_init(&ri_rec->list);
9749                 free(ri_rec);
9750         }
9751 }
9752
9753 static int deal_root_from_list(struct list_head *list,
9754                                struct btrfs_root *root,
9755                                struct block_info *bits,
9756                                int bits_nr,
9757                                struct cache_tree *pending,
9758                                struct cache_tree *seen,
9759                                struct cache_tree *reada,
9760                                struct cache_tree *nodes,
9761                                struct cache_tree *extent_cache,
9762                                struct cache_tree *chunk_cache,
9763                                struct rb_root *dev_cache,
9764                                struct block_group_tree *block_group_cache,
9765                                struct device_extent_tree *dev_extent_cache)
9766 {
9767         int ret = 0;
9768         u64 last;
9769
9770         while (!list_empty(list)) {
9771                 struct root_item_record *rec;
9772                 struct extent_buffer *buf;
9773                 rec = list_entry(list->next,
9774                                  struct root_item_record, list);
9775                 last = 0;
9776                 buf = read_tree_block(root->fs_info->tree_root,
9777                                       rec->bytenr, rec->level_size, 0);
9778                 if (!extent_buffer_uptodate(buf)) {
9779                         free_extent_buffer(buf);
9780                         ret = -EIO;
9781                         break;
9782                 }
9783                 ret = add_root_to_pending(buf, extent_cache, pending,
9784                                     seen, nodes, rec->objectid);
9785                 if (ret < 0)
9786                         break;
9787                 /*
9788                  * To rebuild extent tree, we need deal with snapshot
9789                  * one by one, otherwise we deal with node firstly which
9790                  * can maximize readahead.
9791                  */
9792                 while (1) {
9793                         ret = run_next_block(root, bits, bits_nr, &last,
9794                                              pending, seen, reada, nodes,
9795                                              extent_cache, chunk_cache,
9796                                              dev_cache, block_group_cache,
9797                                              dev_extent_cache, rec);
9798                         if (ret != 0)
9799                                 break;
9800                 }
9801                 free_extent_buffer(buf);
9802                 list_del(&rec->list);
9803                 free(rec);
9804                 if (ret < 0)
9805                         break;
9806         }
9807         while (ret >= 0) {
9808                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9809                                      reada, nodes, extent_cache, chunk_cache,
9810                                      dev_cache, block_group_cache,
9811                                      dev_extent_cache, NULL);
9812                 if (ret != 0) {
9813                         if (ret > 0)
9814                                 ret = 0;
9815                         break;
9816                 }
9817         }
9818         return ret;
9819 }
9820
9821 static int check_chunks_and_extents(struct btrfs_root *root)
9822 {
9823         struct rb_root dev_cache;
9824         struct cache_tree chunk_cache;
9825         struct block_group_tree block_group_cache;
9826         struct device_extent_tree dev_extent_cache;
9827         struct cache_tree extent_cache;
9828         struct cache_tree seen;
9829         struct cache_tree pending;
9830         struct cache_tree reada;
9831         struct cache_tree nodes;
9832         struct extent_io_tree excluded_extents;
9833         struct cache_tree corrupt_blocks;
9834         struct btrfs_path path;
9835         struct btrfs_key key;
9836         struct btrfs_key found_key;
9837         int ret, err = 0;
9838         struct block_info *bits;
9839         int bits_nr;
9840         struct extent_buffer *leaf;
9841         int slot;
9842         struct btrfs_root_item ri;
9843         struct list_head dropping_trees;
9844         struct list_head normal_trees;
9845         struct btrfs_root *root1;
9846         u64 objectid;
9847         u32 level_size;
9848         u8 level;
9849
9850         dev_cache = RB_ROOT;
9851         cache_tree_init(&chunk_cache);
9852         block_group_tree_init(&block_group_cache);
9853         device_extent_tree_init(&dev_extent_cache);
9854
9855         cache_tree_init(&extent_cache);
9856         cache_tree_init(&seen);
9857         cache_tree_init(&pending);
9858         cache_tree_init(&nodes);
9859         cache_tree_init(&reada);
9860         cache_tree_init(&corrupt_blocks);
9861         extent_io_tree_init(&excluded_extents);
9862         INIT_LIST_HEAD(&dropping_trees);
9863         INIT_LIST_HEAD(&normal_trees);
9864
9865         if (repair) {
9866                 root->fs_info->excluded_extents = &excluded_extents;
9867                 root->fs_info->fsck_extent_cache = &extent_cache;
9868                 root->fs_info->free_extent_hook = free_extent_hook;
9869                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9870         }
9871
9872         bits_nr = 1024;
9873         bits = malloc(bits_nr * sizeof(struct block_info));
9874         if (!bits) {
9875                 perror("malloc");
9876                 exit(1);
9877         }
9878
9879         if (ctx.progress_enabled) {
9880                 ctx.tp = TASK_EXTENTS;
9881                 task_start(ctx.info);
9882         }
9883
9884 again:
9885         root1 = root->fs_info->tree_root;
9886         level = btrfs_header_level(root1->node);
9887         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9888                                     root1->node->start, 0, level, 0,
9889                                     root1->nodesize, NULL);
9890         if (ret < 0)
9891                 goto out;
9892         root1 = root->fs_info->chunk_root;
9893         level = btrfs_header_level(root1->node);
9894         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9895                                     root1->node->start, 0, level, 0,
9896                                     root1->nodesize, NULL);
9897         if (ret < 0)
9898                 goto out;
9899         btrfs_init_path(&path);
9900         key.offset = 0;
9901         key.objectid = 0;
9902         key.type = BTRFS_ROOT_ITEM_KEY;
9903         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9904                                         &key, &path, 0, 0);
9905         if (ret < 0)
9906                 goto out;
9907         while(1) {
9908                 leaf = path.nodes[0];
9909                 slot = path.slots[0];
9910                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9911                         ret = btrfs_next_leaf(root, &path);
9912                         if (ret != 0)
9913                                 break;
9914                         leaf = path.nodes[0];
9915                         slot = path.slots[0];
9916                 }
9917                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9918                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9919                         unsigned long offset;
9920                         u64 last_snapshot;
9921
9922                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9923                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9924                         last_snapshot = btrfs_root_last_snapshot(&ri);
9925                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9926                                 level = btrfs_root_level(&ri);
9927                                 level_size = root->nodesize;
9928                                 ret = add_root_item_to_list(&normal_trees,
9929                                                 found_key.objectid,
9930                                                 btrfs_root_bytenr(&ri),
9931                                                 last_snapshot, level,
9932                                                 0, level_size, NULL);
9933                                 if (ret < 0)
9934                                         goto out;
9935                         } else {
9936                                 level = btrfs_root_level(&ri);
9937                                 level_size = root->nodesize;
9938                                 objectid = found_key.objectid;
9939                                 btrfs_disk_key_to_cpu(&found_key,
9940                                                       &ri.drop_progress);
9941                                 ret = add_root_item_to_list(&dropping_trees,
9942                                                 objectid,
9943                                                 btrfs_root_bytenr(&ri),
9944                                                 last_snapshot, level,
9945                                                 ri.drop_level,
9946                                                 level_size, &found_key);
9947                                 if (ret < 0)
9948                                         goto out;
9949                         }
9950                 }
9951                 path.slots[0]++;
9952         }
9953         btrfs_release_path(&path);
9954
9955         /*
9956          * check_block can return -EAGAIN if it fixes something, please keep
9957          * this in mind when dealing with return values from these functions, if
9958          * we get -EAGAIN we want to fall through and restart the loop.
9959          */
9960         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9961                                   &seen, &reada, &nodes, &extent_cache,
9962                                   &chunk_cache, &dev_cache, &block_group_cache,
9963                                   &dev_extent_cache);
9964         if (ret < 0) {
9965                 if (ret == -EAGAIN)
9966                         goto loop;
9967                 goto out;
9968         }
9969         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9970                                   &pending, &seen, &reada, &nodes,
9971                                   &extent_cache, &chunk_cache, &dev_cache,
9972                                   &block_group_cache, &dev_extent_cache);
9973         if (ret < 0) {
9974                 if (ret == -EAGAIN)
9975                         goto loop;
9976                 goto out;
9977         }
9978
9979         ret = check_chunks(&chunk_cache, &block_group_cache,
9980                            &dev_extent_cache, NULL, NULL, NULL, 0);
9981         if (ret) {
9982                 if (ret == -EAGAIN)
9983                         goto loop;
9984                 err = ret;
9985         }
9986
9987         ret = check_extent_refs(root, &extent_cache);
9988         if (ret < 0) {
9989                 if (ret == -EAGAIN)
9990                         goto loop;
9991                 goto out;
9992         }
9993
9994         ret = check_devices(&dev_cache, &dev_extent_cache);
9995         if (ret && err)
9996                 ret = err;
9997
9998 out:
9999         task_stop(ctx.info);
10000         if (repair) {
10001                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10002                 extent_io_tree_cleanup(&excluded_extents);
10003                 root->fs_info->fsck_extent_cache = NULL;
10004                 root->fs_info->free_extent_hook = NULL;
10005                 root->fs_info->corrupt_blocks = NULL;
10006                 root->fs_info->excluded_extents = NULL;
10007         }
10008         free(bits);
10009         free_chunk_cache_tree(&chunk_cache);
10010         free_device_cache_tree(&dev_cache);
10011         free_block_group_tree(&block_group_cache);
10012         free_device_extent_tree(&dev_extent_cache);
10013         free_extent_cache_tree(&seen);
10014         free_extent_cache_tree(&pending);
10015         free_extent_cache_tree(&reada);
10016         free_extent_cache_tree(&nodes);
10017         free_root_item_list(&normal_trees);
10018         free_root_item_list(&dropping_trees);
10019         return ret;
10020 loop:
10021         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10022         free_extent_cache_tree(&seen);
10023         free_extent_cache_tree(&pending);
10024         free_extent_cache_tree(&reada);
10025         free_extent_cache_tree(&nodes);
10026         free_chunk_cache_tree(&chunk_cache);
10027         free_block_group_tree(&block_group_cache);
10028         free_device_cache_tree(&dev_cache);
10029         free_device_extent_tree(&dev_extent_cache);
10030         free_extent_record_cache(&extent_cache);
10031         free_root_item_list(&normal_trees);
10032         free_root_item_list(&dropping_trees);
10033         extent_io_tree_cleanup(&excluded_extents);
10034         goto again;
10035 }
10036
10037 /*
10038  * Check backrefs of a tree block given by @bytenr or @eb.
10039  *
10040  * @root:       the root containing the @bytenr or @eb
10041  * @eb:         tree block extent buffer, can be NULL
10042  * @bytenr:     bytenr of the tree block to search
10043  * @level:      tree level of the tree block
10044  * @owner:      owner of the tree block
10045  *
10046  * Return >0 for any error found and output error message
10047  * Return 0 for no error found
10048  */
10049 static int check_tree_block_ref(struct btrfs_root *root,
10050                                 struct extent_buffer *eb, u64 bytenr,
10051                                 int level, u64 owner)
10052 {
10053         struct btrfs_key key;
10054         struct btrfs_root *extent_root = root->fs_info->extent_root;
10055         struct btrfs_path path;
10056         struct btrfs_extent_item *ei;
10057         struct btrfs_extent_inline_ref *iref;
10058         struct extent_buffer *leaf;
10059         unsigned long end;
10060         unsigned long ptr;
10061         int slot;
10062         int skinny_level;
10063         int type;
10064         u32 nodesize = root->nodesize;
10065         u32 item_size;
10066         u64 offset;
10067         int tree_reloc_root = 0;
10068         int found_ref = 0;
10069         int err = 0;
10070         int ret;
10071
10072         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10073             btrfs_header_bytenr(root->node) == bytenr)
10074                 tree_reloc_root = 1;
10075
10076         btrfs_init_path(&path);
10077         key.objectid = bytenr;
10078         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10079                 key.type = BTRFS_METADATA_ITEM_KEY;
10080         else
10081                 key.type = BTRFS_EXTENT_ITEM_KEY;
10082         key.offset = (u64)-1;
10083
10084         /* Search for the backref in extent tree */
10085         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10086         if (ret < 0) {
10087                 err |= BACKREF_MISSING;
10088                 goto out;
10089         }
10090         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10091         if (ret) {
10092                 err |= BACKREF_MISSING;
10093                 goto out;
10094         }
10095
10096         leaf = path.nodes[0];
10097         slot = path.slots[0];
10098         btrfs_item_key_to_cpu(leaf, &key, slot);
10099
10100         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10101
10102         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10103                 skinny_level = (int)key.offset;
10104                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10105         } else {
10106                 struct btrfs_tree_block_info *info;
10107
10108                 info = (struct btrfs_tree_block_info *)(ei + 1);
10109                 skinny_level = btrfs_tree_block_level(leaf, info);
10110                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10111         }
10112
10113         if (eb) {
10114                 u64 header_gen;
10115                 u64 extent_gen;
10116
10117                 if (!(btrfs_extent_flags(leaf, ei) &
10118                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10119                         error(
10120                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10121                                 key.objectid, nodesize,
10122                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10123                         err = BACKREF_MISMATCH;
10124                 }
10125                 header_gen = btrfs_header_generation(eb);
10126                 extent_gen = btrfs_extent_generation(leaf, ei);
10127                 if (header_gen != extent_gen) {
10128                         error(
10129         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10130                                 key.objectid, nodesize, header_gen,
10131                                 extent_gen);
10132                         err = BACKREF_MISMATCH;
10133                 }
10134                 if (level != skinny_level) {
10135                         error(
10136                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10137                                 key.objectid, nodesize, level, skinny_level);
10138                         err = BACKREF_MISMATCH;
10139                 }
10140                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10141                         error(
10142                         "extent[%llu %u] is referred by other roots than %llu",
10143                                 key.objectid, nodesize, root->objectid);
10144                         err = BACKREF_MISMATCH;
10145                 }
10146         }
10147
10148         /*
10149          * Iterate the extent/metadata item to find the exact backref
10150          */
10151         item_size = btrfs_item_size_nr(leaf, slot);
10152         ptr = (unsigned long)iref;
10153         end = (unsigned long)ei + item_size;
10154         while (ptr < end) {
10155                 iref = (struct btrfs_extent_inline_ref *)ptr;
10156                 type = btrfs_extent_inline_ref_type(leaf, iref);
10157                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10158
10159                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10160                         (offset == root->objectid || offset == owner)) {
10161                         found_ref = 1;
10162                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10163                         /*
10164                          * Backref of tree reloc root points to itself, no need
10165                          * to check backref any more.
10166                          */
10167                         if (tree_reloc_root)
10168                                 found_ref = 1;
10169                         else
10170                         /* Check if the backref points to valid referencer */
10171                                 found_ref = !check_tree_block_ref(root, NULL,
10172                                                 offset, level + 1, owner);
10173                 }
10174
10175                 if (found_ref)
10176                         break;
10177                 ptr += btrfs_extent_inline_ref_size(type);
10178         }
10179
10180         /*
10181          * Inlined extent item doesn't have what we need, check
10182          * TREE_BLOCK_REF_KEY
10183          */
10184         if (!found_ref) {
10185                 btrfs_release_path(&path);
10186                 key.objectid = bytenr;
10187                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10188                 key.offset = root->objectid;
10189
10190                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10191                 if (!ret)
10192                         found_ref = 1;
10193         }
10194         if (!found_ref)
10195                 err |= BACKREF_MISSING;
10196 out:
10197         btrfs_release_path(&path);
10198         if (eb && (err & BACKREF_MISSING))
10199                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10200                         bytenr, nodesize, owner, level);
10201         return err;
10202 }
10203
10204 /*
10205  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10206  *
10207  * Return >0 any error found and output error message
10208  * Return 0 for no error found
10209  */
10210 static int check_extent_data_item(struct btrfs_root *root,
10211                                   struct extent_buffer *eb, int slot)
10212 {
10213         struct btrfs_file_extent_item *fi;
10214         struct btrfs_path path;
10215         struct btrfs_root *extent_root = root->fs_info->extent_root;
10216         struct btrfs_key fi_key;
10217         struct btrfs_key dbref_key;
10218         struct extent_buffer *leaf;
10219         struct btrfs_extent_item *ei;
10220         struct btrfs_extent_inline_ref *iref;
10221         struct btrfs_extent_data_ref *dref;
10222         u64 owner;
10223         u64 disk_bytenr;
10224         u64 disk_num_bytes;
10225         u64 extent_num_bytes;
10226         u64 extent_flags;
10227         u32 item_size;
10228         unsigned long end;
10229         unsigned long ptr;
10230         int type;
10231         u64 ref_root;
10232         int found_dbackref = 0;
10233         int err = 0;
10234         int ret;
10235
10236         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10237         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10238
10239         /* Nothing to check for hole and inline data extents */
10240         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10241             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10242                 return 0;
10243
10244         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10245         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10246         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10247
10248         /* Check unaligned disk_num_bytes and num_bytes */
10249         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10250                 error(
10251 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10252                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10253                         root->sectorsize);
10254                 err |= BYTES_UNALIGNED;
10255         } else {
10256                 data_bytes_allocated += disk_num_bytes;
10257         }
10258         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10259                 error(
10260 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10261                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10262                         root->sectorsize);
10263                 err |= BYTES_UNALIGNED;
10264         } else {
10265                 data_bytes_referenced += extent_num_bytes;
10266         }
10267         owner = btrfs_header_owner(eb);
10268
10269         /* Check the extent item of the file extent in extent tree */
10270         btrfs_init_path(&path);
10271         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10272         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10273         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10274
10275         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10276         if (ret)
10277                 goto out;
10278
10279         leaf = path.nodes[0];
10280         slot = path.slots[0];
10281         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10282
10283         extent_flags = btrfs_extent_flags(leaf, ei);
10284
10285         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10286                 error(
10287                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10288                     disk_bytenr, disk_num_bytes,
10289                     BTRFS_EXTENT_FLAG_DATA);
10290                 err |= BACKREF_MISMATCH;
10291         }
10292
10293         /* Check data backref inside that extent item */
10294         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10295         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10296         ptr = (unsigned long)iref;
10297         end = (unsigned long)ei + item_size;
10298         while (ptr < end) {
10299                 iref = (struct btrfs_extent_inline_ref *)ptr;
10300                 type = btrfs_extent_inline_ref_type(leaf, iref);
10301                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10302
10303                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10304                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10305                         if (ref_root == owner || ref_root == root->objectid)
10306                                 found_dbackref = 1;
10307                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10308                         found_dbackref = !check_tree_block_ref(root, NULL,
10309                                 btrfs_extent_inline_ref_offset(leaf, iref),
10310                                 0, owner);
10311                 }
10312
10313                 if (found_dbackref)
10314                         break;
10315                 ptr += btrfs_extent_inline_ref_size(type);
10316         }
10317
10318         if (!found_dbackref) {
10319                 btrfs_release_path(&path);
10320
10321                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10322                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10323                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10324                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10325                                 fi_key.objectid, fi_key.offset);
10326
10327                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10328                                         &dbref_key, &path, 0, 0);
10329                 if (!ret) {
10330                         found_dbackref = 1;
10331                         goto out;
10332                 }
10333
10334                 btrfs_release_path(&path);
10335
10336                 /*
10337                  * Neither inlined nor EXTENT_DATA_REF found, try
10338                  * SHARED_DATA_REF as last chance.
10339                  */
10340                 dbref_key.objectid = disk_bytenr;
10341                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10342                 dbref_key.offset = eb->start;
10343
10344                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10345                                         &dbref_key, &path, 0, 0);
10346                 if (!ret) {
10347                         found_dbackref = 1;
10348                         goto out;
10349                 }
10350         }
10351
10352 out:
10353         if (!found_dbackref)
10354                 err |= BACKREF_MISSING;
10355         btrfs_release_path(&path);
10356         if (err & BACKREF_MISSING) {
10357                 error("data extent[%llu %llu] backref lost",
10358                       disk_bytenr, disk_num_bytes);
10359         }
10360         return err;
10361 }
10362
10363 /*
10364  * Get real tree block level for the case like shared block
10365  * Return >= 0 as tree level
10366  * Return <0 for error
10367  */
10368 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10369 {
10370         struct extent_buffer *eb;
10371         struct btrfs_path path;
10372         struct btrfs_key key;
10373         struct btrfs_extent_item *ei;
10374         u64 flags;
10375         u64 transid;
10376         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10377         u8 backref_level;
10378         u8 header_level;
10379         int ret;
10380
10381         /* Search extent tree for extent generation and level */
10382         key.objectid = bytenr;
10383         key.type = BTRFS_METADATA_ITEM_KEY;
10384         key.offset = (u64)-1;
10385
10386         btrfs_init_path(&path);
10387         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10388         if (ret < 0)
10389                 goto release_out;
10390         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10391         if (ret < 0)
10392                 goto release_out;
10393         if (ret > 0) {
10394                 ret = -ENOENT;
10395                 goto release_out;
10396         }
10397
10398         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10399         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10400                             struct btrfs_extent_item);
10401         flags = btrfs_extent_flags(path.nodes[0], ei);
10402         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10403                 ret = -ENOENT;
10404                 goto release_out;
10405         }
10406
10407         /* Get transid for later read_tree_block() check */
10408         transid = btrfs_extent_generation(path.nodes[0], ei);
10409
10410         /* Get backref level as one source */
10411         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10412                 backref_level = key.offset;
10413         } else {
10414                 struct btrfs_tree_block_info *info;
10415
10416                 info = (struct btrfs_tree_block_info *)(ei + 1);
10417                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10418         }
10419         btrfs_release_path(&path);
10420
10421         /* Get level from tree block as an alternative source */
10422         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10423         if (!extent_buffer_uptodate(eb)) {
10424                 free_extent_buffer(eb);
10425                 return -EIO;
10426         }
10427         header_level = btrfs_header_level(eb);
10428         free_extent_buffer(eb);
10429
10430         if (header_level != backref_level)
10431                 return -EIO;
10432         return header_level;
10433
10434 release_out:
10435         btrfs_release_path(&path);
10436         return ret;
10437 }
10438
10439 /*
10440  * Check if a tree block backref is valid (points to a valid tree block)
10441  * if level == -1, level will be resolved
10442  * Return >0 for any error found and print error message
10443  */
10444 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10445                                     u64 bytenr, int level)
10446 {
10447         struct btrfs_root *root;
10448         struct btrfs_key key;
10449         struct btrfs_path path;
10450         struct extent_buffer *eb;
10451         struct extent_buffer *node;
10452         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10453         int err = 0;
10454         int ret;
10455
10456         /* Query level for level == -1 special case */
10457         if (level == -1)
10458                 level = query_tree_block_level(fs_info, bytenr);
10459         if (level < 0) {
10460                 err |= REFERENCER_MISSING;
10461                 goto out;
10462         }
10463
10464         key.objectid = root_id;
10465         key.type = BTRFS_ROOT_ITEM_KEY;
10466         key.offset = (u64)-1;
10467
10468         root = btrfs_read_fs_root(fs_info, &key);
10469         if (IS_ERR(root)) {
10470                 err |= REFERENCER_MISSING;
10471                 goto out;
10472         }
10473
10474         /* Read out the tree block to get item/node key */
10475         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10476         if (!extent_buffer_uptodate(eb)) {
10477                 err |= REFERENCER_MISSING;
10478                 free_extent_buffer(eb);
10479                 goto out;
10480         }
10481
10482         /* Empty tree, no need to check key */
10483         if (!btrfs_header_nritems(eb) && !level) {
10484                 free_extent_buffer(eb);
10485                 goto out;
10486         }
10487
10488         if (level)
10489                 btrfs_node_key_to_cpu(eb, &key, 0);
10490         else
10491                 btrfs_item_key_to_cpu(eb, &key, 0);
10492
10493         free_extent_buffer(eb);
10494
10495         btrfs_init_path(&path);
10496         path.lowest_level = level;
10497         /* Search with the first key, to ensure we can reach it */
10498         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10499         if (ret < 0) {
10500                 err |= REFERENCER_MISSING;
10501                 goto release_out;
10502         }
10503
10504         node = path.nodes[level];
10505         if (btrfs_header_bytenr(node) != bytenr) {
10506                 error(
10507         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10508                         bytenr, nodesize, bytenr,
10509                         btrfs_header_bytenr(node));
10510                 err |= REFERENCER_MISMATCH;
10511         }
10512         if (btrfs_header_level(node) != level) {
10513                 error(
10514         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10515                         bytenr, nodesize, level,
10516                         btrfs_header_level(node));
10517                 err |= REFERENCER_MISMATCH;
10518         }
10519
10520 release_out:
10521         btrfs_release_path(&path);
10522 out:
10523         if (err & REFERENCER_MISSING) {
10524                 if (level < 0)
10525                         error("extent [%llu %d] lost referencer (owner: %llu)",
10526                                 bytenr, nodesize, root_id);
10527                 else
10528                         error(
10529                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10530                                 bytenr, nodesize, root_id, level);
10531         }
10532
10533         return err;
10534 }
10535
10536 /*
10537  * Check if tree block @eb is tree reloc root.
10538  * Return 0 if it's not or any problem happens
10539  * Return 1 if it's a tree reloc root
10540  */
10541 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10542                                  struct extent_buffer *eb)
10543 {
10544         struct btrfs_root *tree_reloc_root;
10545         struct btrfs_key key;
10546         u64 bytenr = btrfs_header_bytenr(eb);
10547         u64 owner = btrfs_header_owner(eb);
10548         int ret = 0;
10549
10550         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10551         key.offset = owner;
10552         key.type = BTRFS_ROOT_ITEM_KEY;
10553
10554         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10555         if (IS_ERR(tree_reloc_root))
10556                 return 0;
10557
10558         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10559                 ret = 1;
10560         btrfs_free_fs_root(tree_reloc_root);
10561         return ret;
10562 }
10563
10564 /*
10565  * Check referencer for shared block backref
10566  * If level == -1, this function will resolve the level.
10567  */
10568 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10569                                      u64 parent, u64 bytenr, int level)
10570 {
10571         struct extent_buffer *eb;
10572         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10573         u32 nr;
10574         int found_parent = 0;
10575         int i;
10576
10577         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10578         if (!extent_buffer_uptodate(eb))
10579                 goto out;
10580
10581         if (level == -1)
10582                 level = query_tree_block_level(fs_info, bytenr);
10583         if (level < 0)
10584                 goto out;
10585
10586         /* It's possible it's a tree reloc root */
10587         if (parent == bytenr) {
10588                 if (is_tree_reloc_root(fs_info, eb))
10589                         found_parent = 1;
10590                 goto out;
10591         }
10592
10593         if (level + 1 != btrfs_header_level(eb))
10594                 goto out;
10595
10596         nr = btrfs_header_nritems(eb);
10597         for (i = 0; i < nr; i++) {
10598                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10599                         found_parent = 1;
10600                         break;
10601                 }
10602         }
10603 out:
10604         free_extent_buffer(eb);
10605         if (!found_parent) {
10606                 error(
10607         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10608                         bytenr, nodesize, parent, level);
10609                 return REFERENCER_MISSING;
10610         }
10611         return 0;
10612 }
10613
10614 /*
10615  * Check referencer for normal (inlined) data ref
10616  * If len == 0, it will be resolved by searching in extent tree
10617  */
10618 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10619                                      u64 root_id, u64 objectid, u64 offset,
10620                                      u64 bytenr, u64 len, u32 count)
10621 {
10622         struct btrfs_root *root;
10623         struct btrfs_root *extent_root = fs_info->extent_root;
10624         struct btrfs_key key;
10625         struct btrfs_path path;
10626         struct extent_buffer *leaf;
10627         struct btrfs_file_extent_item *fi;
10628         u32 found_count = 0;
10629         int slot;
10630         int ret = 0;
10631
10632         if (!len) {
10633                 key.objectid = bytenr;
10634                 key.type = BTRFS_EXTENT_ITEM_KEY;
10635                 key.offset = (u64)-1;
10636
10637                 btrfs_init_path(&path);
10638                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10639                 if (ret < 0)
10640                         goto out;
10641                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10642                 if (ret)
10643                         goto out;
10644                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10645                 if (key.objectid != bytenr ||
10646                     key.type != BTRFS_EXTENT_ITEM_KEY)
10647                         goto out;
10648                 len = key.offset;
10649                 btrfs_release_path(&path);
10650         }
10651         key.objectid = root_id;
10652         key.type = BTRFS_ROOT_ITEM_KEY;
10653         key.offset = (u64)-1;
10654         btrfs_init_path(&path);
10655
10656         root = btrfs_read_fs_root(fs_info, &key);
10657         if (IS_ERR(root))
10658                 goto out;
10659
10660         key.objectid = objectid;
10661         key.type = BTRFS_EXTENT_DATA_KEY;
10662         /*
10663          * It can be nasty as data backref offset is
10664          * file offset - file extent offset, which is smaller or
10665          * equal to original backref offset.  The only special case is
10666          * overflow.  So we need to special check and do further search.
10667          */
10668         key.offset = offset & (1ULL << 63) ? 0 : offset;
10669
10670         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10671         if (ret < 0)
10672                 goto out;
10673
10674         /*
10675          * Search afterwards to get correct one
10676          * NOTE: As we must do a comprehensive check on the data backref to
10677          * make sure the dref count also matches, we must iterate all file
10678          * extents for that inode.
10679          */
10680         while (1) {
10681                 leaf = path.nodes[0];
10682                 slot = path.slots[0];
10683
10684                 if (slot >= btrfs_header_nritems(leaf))
10685                         goto next;
10686                 btrfs_item_key_to_cpu(leaf, &key, slot);
10687                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10688                         break;
10689                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10690                 /*
10691                  * Except normal disk bytenr and disk num bytes, we still
10692                  * need to do extra check on dbackref offset as
10693                  * dbackref offset = file_offset - file_extent_offset
10694                  */
10695                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10696                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10697                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10698                     offset)
10699                         found_count++;
10700
10701 next:
10702                 ret = btrfs_next_item(root, &path);
10703                 if (ret)
10704                         break;
10705         }
10706 out:
10707         btrfs_release_path(&path);
10708         if (found_count != count) {
10709                 error(
10710 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10711                         bytenr, len, root_id, objectid, offset, count, found_count);
10712                 return REFERENCER_MISSING;
10713         }
10714         return 0;
10715 }
10716
10717 /*
10718  * Check if the referencer of a shared data backref exists
10719  */
10720 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10721                                      u64 parent, u64 bytenr)
10722 {
10723         struct extent_buffer *eb;
10724         struct btrfs_key key;
10725         struct btrfs_file_extent_item *fi;
10726         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10727         u32 nr;
10728         int found_parent = 0;
10729         int i;
10730
10731         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10732         if (!extent_buffer_uptodate(eb))
10733                 goto out;
10734
10735         nr = btrfs_header_nritems(eb);
10736         for (i = 0; i < nr; i++) {
10737                 btrfs_item_key_to_cpu(eb, &key, i);
10738                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10739                         continue;
10740
10741                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10742                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10743                         continue;
10744
10745                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10746                         found_parent = 1;
10747                         break;
10748                 }
10749         }
10750
10751 out:
10752         free_extent_buffer(eb);
10753         if (!found_parent) {
10754                 error("shared extent %llu referencer lost (parent: %llu)",
10755                         bytenr, parent);
10756                 return REFERENCER_MISSING;
10757         }
10758         return 0;
10759 }
10760
10761 /*
10762  * This function will check a given extent item, including its backref and
10763  * itself (like crossing stripe boundary and type)
10764  *
10765  * Since we don't use extent_record anymore, introduce new error bit
10766  */
10767 static int check_extent_item(struct btrfs_fs_info *fs_info,
10768                              struct extent_buffer *eb, int slot)
10769 {
10770         struct btrfs_extent_item *ei;
10771         struct btrfs_extent_inline_ref *iref;
10772         struct btrfs_extent_data_ref *dref;
10773         unsigned long end;
10774         unsigned long ptr;
10775         int type;
10776         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10777         u32 item_size = btrfs_item_size_nr(eb, slot);
10778         u64 flags;
10779         u64 offset;
10780         int metadata = 0;
10781         int level;
10782         struct btrfs_key key;
10783         int ret;
10784         int err = 0;
10785
10786         btrfs_item_key_to_cpu(eb, &key, slot);
10787         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10788                 bytes_used += key.offset;
10789         else
10790                 bytes_used += nodesize;
10791
10792         if (item_size < sizeof(*ei)) {
10793                 /*
10794                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10795                  * old thing when on disk format is still un-determined.
10796                  * No need to care about it anymore
10797                  */
10798                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10799                 return -ENOTTY;
10800         }
10801
10802         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10803         flags = btrfs_extent_flags(eb, ei);
10804
10805         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10806                 metadata = 1;
10807         if (metadata && check_crossing_stripes(global_info, key.objectid,
10808                                                eb->len)) {
10809                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10810                       key.objectid, key.objectid + nodesize);
10811                 err |= CROSSING_STRIPE_BOUNDARY;
10812         }
10813
10814         ptr = (unsigned long)(ei + 1);
10815
10816         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10817                 /* Old EXTENT_ITEM metadata */
10818                 struct btrfs_tree_block_info *info;
10819
10820                 info = (struct btrfs_tree_block_info *)ptr;
10821                 level = btrfs_tree_block_level(eb, info);
10822                 ptr += sizeof(struct btrfs_tree_block_info);
10823         } else {
10824                 /* New METADATA_ITEM */
10825                 level = key.offset;
10826         }
10827         end = (unsigned long)ei + item_size;
10828
10829 next:
10830         /* Reached extent item end normally */
10831         if (ptr == end)
10832                 goto out;
10833
10834         /* Beyond extent item end, wrong item size */
10835         if (ptr > end) {
10836                 err |= ITEM_SIZE_MISMATCH;
10837                 error("extent item at bytenr %llu slot %d has wrong size",
10838                         eb->start, slot);
10839                 goto out;
10840         }
10841
10842         /* Now check every backref in this extent item */
10843         iref = (struct btrfs_extent_inline_ref *)ptr;
10844         type = btrfs_extent_inline_ref_type(eb, iref);
10845         offset = btrfs_extent_inline_ref_offset(eb, iref);
10846         switch (type) {
10847         case BTRFS_TREE_BLOCK_REF_KEY:
10848                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10849                                                level);
10850                 err |= ret;
10851                 break;
10852         case BTRFS_SHARED_BLOCK_REF_KEY:
10853                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10854                                                  level);
10855                 err |= ret;
10856                 break;
10857         case BTRFS_EXTENT_DATA_REF_KEY:
10858                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10859                 ret = check_extent_data_backref(fs_info,
10860                                 btrfs_extent_data_ref_root(eb, dref),
10861                                 btrfs_extent_data_ref_objectid(eb, dref),
10862                                 btrfs_extent_data_ref_offset(eb, dref),
10863                                 key.objectid, key.offset,
10864                                 btrfs_extent_data_ref_count(eb, dref));
10865                 err |= ret;
10866                 break;
10867         case BTRFS_SHARED_DATA_REF_KEY:
10868                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10869                 err |= ret;
10870                 break;
10871         default:
10872                 error("extent[%llu %d %llu] has unknown ref type: %d",
10873                         key.objectid, key.type, key.offset, type);
10874                 err |= UNKNOWN_TYPE;
10875                 goto out;
10876         }
10877
10878         ptr += btrfs_extent_inline_ref_size(type);
10879         goto next;
10880
10881 out:
10882         return err;
10883 }
10884
10885 /*
10886  * Check if a dev extent item is referred correctly by its chunk
10887  */
10888 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10889                                  struct extent_buffer *eb, int slot)
10890 {
10891         struct btrfs_root *chunk_root = fs_info->chunk_root;
10892         struct btrfs_dev_extent *ptr;
10893         struct btrfs_path path;
10894         struct btrfs_key chunk_key;
10895         struct btrfs_key devext_key;
10896         struct btrfs_chunk *chunk;
10897         struct extent_buffer *l;
10898         int num_stripes;
10899         u64 length;
10900         int i;
10901         int found_chunk = 0;
10902         int ret;
10903
10904         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10905         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10906         length = btrfs_dev_extent_length(eb, ptr);
10907
10908         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10909         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10910         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10911
10912         btrfs_init_path(&path);
10913         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10914         if (ret)
10915                 goto out;
10916
10917         l = path.nodes[0];
10918         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10919         if (btrfs_chunk_length(l, chunk) != length)
10920                 goto out;
10921
10922         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10923         for (i = 0; i < num_stripes; i++) {
10924                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10925                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10926
10927                 if (devid == devext_key.objectid &&
10928                     offset == devext_key.offset) {
10929                         found_chunk = 1;
10930                         break;
10931                 }
10932         }
10933 out:
10934         btrfs_release_path(&path);
10935         if (!found_chunk) {
10936                 error(
10937                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10938                         devext_key.objectid, devext_key.offset, length);
10939                 return REFERENCER_MISSING;
10940         }
10941         return 0;
10942 }
10943
10944 /*
10945  * Check if the used space is correct with the dev item
10946  */
10947 static int check_dev_item(struct btrfs_fs_info *fs_info,
10948                           struct extent_buffer *eb, int slot)
10949 {
10950         struct btrfs_root *dev_root = fs_info->dev_root;
10951         struct btrfs_dev_item *dev_item;
10952         struct btrfs_path path;
10953         struct btrfs_key key;
10954         struct btrfs_dev_extent *ptr;
10955         u64 dev_id;
10956         u64 used;
10957         u64 total = 0;
10958         int ret;
10959
10960         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10961         dev_id = btrfs_device_id(eb, dev_item);
10962         used = btrfs_device_bytes_used(eb, dev_item);
10963
10964         key.objectid = dev_id;
10965         key.type = BTRFS_DEV_EXTENT_KEY;
10966         key.offset = 0;
10967
10968         btrfs_init_path(&path);
10969         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10970         if (ret < 0) {
10971                 btrfs_item_key_to_cpu(eb, &key, slot);
10972                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10973                         key.objectid, key.type, key.offset);
10974                 btrfs_release_path(&path);
10975                 return REFERENCER_MISSING;
10976         }
10977
10978         /* Iterate dev_extents to calculate the used space of a device */
10979         while (1) {
10980                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10981                         goto next;
10982
10983                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10984                 if (key.objectid > dev_id)
10985                         break;
10986                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10987                         goto next;
10988
10989                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10990                                      struct btrfs_dev_extent);
10991                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10992 next:
10993                 ret = btrfs_next_item(dev_root, &path);
10994                 if (ret)
10995                         break;
10996         }
10997         btrfs_release_path(&path);
10998
10999         if (used != total) {
11000                 btrfs_item_key_to_cpu(eb, &key, slot);
11001                 error(
11002 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11003                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11004                         BTRFS_DEV_EXTENT_KEY, dev_id);
11005                 return ACCOUNTING_MISMATCH;
11006         }
11007         return 0;
11008 }
11009
11010 /*
11011  * Check a block group item with its referener (chunk) and its used space
11012  * with extent/metadata item
11013  */
11014 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11015                                   struct extent_buffer *eb, int slot)
11016 {
11017         struct btrfs_root *extent_root = fs_info->extent_root;
11018         struct btrfs_root *chunk_root = fs_info->chunk_root;
11019         struct btrfs_block_group_item *bi;
11020         struct btrfs_block_group_item bg_item;
11021         struct btrfs_path path;
11022         struct btrfs_key bg_key;
11023         struct btrfs_key chunk_key;
11024         struct btrfs_key extent_key;
11025         struct btrfs_chunk *chunk;
11026         struct extent_buffer *leaf;
11027         struct btrfs_extent_item *ei;
11028         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11029         u64 flags;
11030         u64 bg_flags;
11031         u64 used;
11032         u64 total = 0;
11033         int ret;
11034         int err = 0;
11035
11036         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11037         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11038         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11039         used = btrfs_block_group_used(&bg_item);
11040         bg_flags = btrfs_block_group_flags(&bg_item);
11041
11042         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11043         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11044         chunk_key.offset = bg_key.objectid;
11045
11046         btrfs_init_path(&path);
11047         /* Search for the referencer chunk */
11048         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11049         if (ret) {
11050                 error(
11051                 "block group[%llu %llu] did not find the related chunk item",
11052                         bg_key.objectid, bg_key.offset);
11053                 err |= REFERENCER_MISSING;
11054         } else {
11055                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11056                                         struct btrfs_chunk);
11057                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11058                                                 bg_key.offset) {
11059                         error(
11060         "block group[%llu %llu] related chunk item length does not match",
11061                                 bg_key.objectid, bg_key.offset);
11062                         err |= REFERENCER_MISMATCH;
11063                 }
11064         }
11065         btrfs_release_path(&path);
11066
11067         /* Search from the block group bytenr */
11068         extent_key.objectid = bg_key.objectid;
11069         extent_key.type = 0;
11070         extent_key.offset = 0;
11071
11072         btrfs_init_path(&path);
11073         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11074         if (ret < 0)
11075                 goto out;
11076
11077         /* Iterate extent tree to account used space */
11078         while (1) {
11079                 leaf = path.nodes[0];
11080
11081                 /* Search slot can point to the last item beyond leaf nritems */
11082                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11083                         goto next;
11084
11085                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11086                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11087                         break;
11088
11089                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11090                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11091                         goto next;
11092                 if (extent_key.objectid < bg_key.objectid)
11093                         goto next;
11094
11095                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11096                         total += nodesize;
11097                 else
11098                         total += extent_key.offset;
11099
11100                 ei = btrfs_item_ptr(leaf, path.slots[0],
11101                                     struct btrfs_extent_item);
11102                 flags = btrfs_extent_flags(leaf, ei);
11103                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11104                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11105                                 error(
11106                         "bad extent[%llu, %llu) type mismatch with chunk",
11107                                         extent_key.objectid,
11108                                         extent_key.objectid + extent_key.offset);
11109                                 err |= CHUNK_TYPE_MISMATCH;
11110                         }
11111                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11112                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11113                                     BTRFS_BLOCK_GROUP_METADATA))) {
11114                                 error(
11115                         "bad extent[%llu, %llu) type mismatch with chunk",
11116                                         extent_key.objectid,
11117                                         extent_key.objectid + nodesize);
11118                                 err |= CHUNK_TYPE_MISMATCH;
11119                         }
11120                 }
11121 next:
11122                 ret = btrfs_next_item(extent_root, &path);
11123                 if (ret)
11124                         break;
11125         }
11126
11127 out:
11128         btrfs_release_path(&path);
11129
11130         if (total != used) {
11131                 error(
11132                 "block group[%llu %llu] used %llu but extent items used %llu",
11133                         bg_key.objectid, bg_key.offset, used, total);
11134                 err |= ACCOUNTING_MISMATCH;
11135         }
11136         return err;
11137 }
11138
11139 /*
11140  * Check a chunk item.
11141  * Including checking all referred dev_extents and block group
11142  */
11143 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11144                             struct extent_buffer *eb, int slot)
11145 {
11146         struct btrfs_root *extent_root = fs_info->extent_root;
11147         struct btrfs_root *dev_root = fs_info->dev_root;
11148         struct btrfs_path path;
11149         struct btrfs_key chunk_key;
11150         struct btrfs_key bg_key;
11151         struct btrfs_key devext_key;
11152         struct btrfs_chunk *chunk;
11153         struct extent_buffer *leaf;
11154         struct btrfs_block_group_item *bi;
11155         struct btrfs_block_group_item bg_item;
11156         struct btrfs_dev_extent *ptr;
11157         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11158         u64 length;
11159         u64 chunk_end;
11160         u64 type;
11161         u64 profile;
11162         int num_stripes;
11163         u64 offset;
11164         u64 objectid;
11165         int i;
11166         int ret;
11167         int err = 0;
11168
11169         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11170         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11171         length = btrfs_chunk_length(eb, chunk);
11172         chunk_end = chunk_key.offset + length;
11173         if (!IS_ALIGNED(length, sectorsize)) {
11174                 error("chunk[%llu %llu) not aligned to %u",
11175                         chunk_key.offset, chunk_end, sectorsize);
11176                 err |= BYTES_UNALIGNED;
11177                 goto out;
11178         }
11179
11180         type = btrfs_chunk_type(eb, chunk);
11181         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11182         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11183                 error("chunk[%llu %llu) has no chunk type",
11184                         chunk_key.offset, chunk_end);
11185                 err |= UNKNOWN_TYPE;
11186         }
11187         if (profile && (profile & (profile - 1))) {
11188                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11189                         chunk_key.offset, chunk_end, profile);
11190                 err |= UNKNOWN_TYPE;
11191         }
11192
11193         bg_key.objectid = chunk_key.offset;
11194         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11195         bg_key.offset = length;
11196
11197         btrfs_init_path(&path);
11198         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11199         if (ret) {
11200                 error(
11201                 "chunk[%llu %llu) did not find the related block group item",
11202                         chunk_key.offset, chunk_end);
11203                 err |= REFERENCER_MISSING;
11204         } else{
11205                 leaf = path.nodes[0];
11206                 bi = btrfs_item_ptr(leaf, path.slots[0],
11207                                     struct btrfs_block_group_item);
11208                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11209                                    sizeof(bg_item));
11210                 if (btrfs_block_group_flags(&bg_item) != type) {
11211                         error(
11212 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11213                                 chunk_key.offset, chunk_end, type,
11214                                 btrfs_block_group_flags(&bg_item));
11215                         err |= REFERENCER_MISSING;
11216                 }
11217         }
11218
11219         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11220         for (i = 0; i < num_stripes; i++) {
11221                 btrfs_release_path(&path);
11222                 btrfs_init_path(&path);
11223                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11224                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11225                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11226
11227                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11228                                         0, 0);
11229                 if (ret)
11230                         goto not_match_dev;
11231
11232                 leaf = path.nodes[0];
11233                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11234                                      struct btrfs_dev_extent);
11235                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11236                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11237                 if (objectid != chunk_key.objectid ||
11238                     offset != chunk_key.offset ||
11239                     btrfs_dev_extent_length(leaf, ptr) != length)
11240                         goto not_match_dev;
11241                 continue;
11242 not_match_dev:
11243                 err |= BACKREF_MISSING;
11244                 error(
11245                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11246                         chunk_key.objectid, chunk_end, i);
11247                 continue;
11248         }
11249         btrfs_release_path(&path);
11250 out:
11251         return err;
11252 }
11253
11254 /*
11255  * Main entry function to check known items and update related accounting info
11256  */
11257 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11258 {
11259         struct btrfs_fs_info *fs_info = root->fs_info;
11260         struct btrfs_key key;
11261         int slot = 0;
11262         int type;
11263         struct btrfs_extent_data_ref *dref;
11264         int ret;
11265         int err = 0;
11266
11267 next:
11268         btrfs_item_key_to_cpu(eb, &key, slot);
11269         type = key.type;
11270
11271         switch (type) {
11272         case BTRFS_EXTENT_DATA_KEY:
11273                 ret = check_extent_data_item(root, eb, slot);
11274                 err |= ret;
11275                 break;
11276         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11277                 ret = check_block_group_item(fs_info, eb, slot);
11278                 err |= ret;
11279                 break;
11280         case BTRFS_DEV_ITEM_KEY:
11281                 ret = check_dev_item(fs_info, eb, slot);
11282                 err |= ret;
11283                 break;
11284         case BTRFS_CHUNK_ITEM_KEY:
11285                 ret = check_chunk_item(fs_info, eb, slot);
11286                 err |= ret;
11287                 break;
11288         case BTRFS_DEV_EXTENT_KEY:
11289                 ret = check_dev_extent_item(fs_info, eb, slot);
11290                 err |= ret;
11291                 break;
11292         case BTRFS_EXTENT_ITEM_KEY:
11293         case BTRFS_METADATA_ITEM_KEY:
11294                 ret = check_extent_item(fs_info, eb, slot);
11295                 err |= ret;
11296                 break;
11297         case BTRFS_EXTENT_CSUM_KEY:
11298                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11299                 break;
11300         case BTRFS_TREE_BLOCK_REF_KEY:
11301                 ret = check_tree_block_backref(fs_info, key.offset,
11302                                                key.objectid, -1);
11303                 err |= ret;
11304                 break;
11305         case BTRFS_EXTENT_DATA_REF_KEY:
11306                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11307                 ret = check_extent_data_backref(fs_info,
11308                                 btrfs_extent_data_ref_root(eb, dref),
11309                                 btrfs_extent_data_ref_objectid(eb, dref),
11310                                 btrfs_extent_data_ref_offset(eb, dref),
11311                                 key.objectid, 0,
11312                                 btrfs_extent_data_ref_count(eb, dref));
11313                 err |= ret;
11314                 break;
11315         case BTRFS_SHARED_BLOCK_REF_KEY:
11316                 ret = check_shared_block_backref(fs_info, key.offset,
11317                                                  key.objectid, -1);
11318                 err |= ret;
11319                 break;
11320         case BTRFS_SHARED_DATA_REF_KEY:
11321                 ret = check_shared_data_backref(fs_info, key.offset,
11322                                                 key.objectid);
11323                 err |= ret;
11324                 break;
11325         default:
11326                 break;
11327         }
11328
11329         if (++slot < btrfs_header_nritems(eb))
11330                 goto next;
11331
11332         return err;
11333 }
11334
11335 /*
11336  * Helper function for later fs/subvol tree check.  To determine if a tree
11337  * block should be checked.
11338  * This function will ensure only the direct referencer with lowest rootid to
11339  * check a fs/subvolume tree block.
11340  *
11341  * Backref check at extent tree would detect errors like missing subvolume
11342  * tree, so we can do aggressive check to reduce duplicated checks.
11343  */
11344 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11345 {
11346         struct btrfs_root *extent_root = root->fs_info->extent_root;
11347         struct btrfs_key key;
11348         struct btrfs_path path;
11349         struct extent_buffer *leaf;
11350         int slot;
11351         struct btrfs_extent_item *ei;
11352         unsigned long ptr;
11353         unsigned long end;
11354         int type;
11355         u32 item_size;
11356         u64 offset;
11357         struct btrfs_extent_inline_ref *iref;
11358         int ret;
11359
11360         btrfs_init_path(&path);
11361         key.objectid = btrfs_header_bytenr(eb);
11362         key.type = BTRFS_METADATA_ITEM_KEY;
11363         key.offset = (u64)-1;
11364
11365         /*
11366          * Any failure in backref resolving means we can't determine
11367          * whom the tree block belongs to.
11368          * So in that case, we need to check that tree block
11369          */
11370         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11371         if (ret < 0)
11372                 goto need_check;
11373
11374         ret = btrfs_previous_extent_item(extent_root, &path,
11375                                          btrfs_header_bytenr(eb));
11376         if (ret)
11377                 goto need_check;
11378
11379         leaf = path.nodes[0];
11380         slot = path.slots[0];
11381         btrfs_item_key_to_cpu(leaf, &key, slot);
11382         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11383
11384         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11385                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11386         } else {
11387                 struct btrfs_tree_block_info *info;
11388
11389                 info = (struct btrfs_tree_block_info *)(ei + 1);
11390                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11391         }
11392
11393         item_size = btrfs_item_size_nr(leaf, slot);
11394         ptr = (unsigned long)iref;
11395         end = (unsigned long)ei + item_size;
11396         while (ptr < end) {
11397                 iref = (struct btrfs_extent_inline_ref *)ptr;
11398                 type = btrfs_extent_inline_ref_type(leaf, iref);
11399                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11400
11401                 /*
11402                  * We only check the tree block if current root is
11403                  * the lowest referencer of it.
11404                  */
11405                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11406                     offset < root->objectid) {
11407                         btrfs_release_path(&path);
11408                         return 0;
11409                 }
11410
11411                 ptr += btrfs_extent_inline_ref_size(type);
11412         }
11413         /*
11414          * Normally we should also check keyed tree block ref, but that may be
11415          * very time consuming.  Inlined ref should already make us skip a lot
11416          * of refs now.  So skip search keyed tree block ref.
11417          */
11418
11419 need_check:
11420         btrfs_release_path(&path);
11421         return 1;
11422 }
11423
11424 /*
11425  * Traversal function for tree block. We will do:
11426  * 1) Skip shared fs/subvolume tree blocks
11427  * 2) Update related bytes accounting
11428  * 3) Pre-order traversal
11429  */
11430 static int traverse_tree_block(struct btrfs_root *root,
11431                                 struct extent_buffer *node)
11432 {
11433         struct extent_buffer *eb;
11434         struct btrfs_key key;
11435         struct btrfs_key drop_key;
11436         int level;
11437         u64 nr;
11438         int i;
11439         int err = 0;
11440         int ret;
11441
11442         /*
11443          * Skip shared fs/subvolume tree block, in that case they will
11444          * be checked by referencer with lowest rootid
11445          */
11446         if (is_fstree(root->objectid) && !should_check(root, node))
11447                 return 0;
11448
11449         /* Update bytes accounting */
11450         total_btree_bytes += node->len;
11451         if (fs_root_objectid(btrfs_header_owner(node)))
11452                 total_fs_tree_bytes += node->len;
11453         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11454                 total_extent_tree_bytes += node->len;
11455         if (!found_old_backref &&
11456             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11457             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11458             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11459                 found_old_backref = 1;
11460
11461         /* pre-order tranversal, check itself first */
11462         level = btrfs_header_level(node);
11463         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11464                                    btrfs_header_level(node),
11465                                    btrfs_header_owner(node));
11466         err |= ret;
11467         if (err)
11468                 error(
11469         "check %s failed root %llu bytenr %llu level %d, force continue check",
11470                         level ? "node":"leaf", root->objectid,
11471                         btrfs_header_bytenr(node), btrfs_header_level(node));
11472
11473         if (!level) {
11474                 btree_space_waste += btrfs_leaf_free_space(root, node);
11475                 ret = check_leaf_items(root, node);
11476                 err |= ret;
11477                 return err;
11478         }
11479
11480         nr = btrfs_header_nritems(node);
11481         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11482         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11483                 sizeof(struct btrfs_key_ptr);
11484
11485         /* Then check all its children */
11486         for (i = 0; i < nr; i++) {
11487                 u64 blocknr = btrfs_node_blockptr(node, i);
11488
11489                 btrfs_node_key_to_cpu(node, &key, i);
11490                 if (level == root->root_item.drop_level &&
11491                     is_dropped_key(&key, &drop_key))
11492                         continue;
11493
11494                 /*
11495                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11496                  * to call the function itself.
11497                  */
11498                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11499                 if (extent_buffer_uptodate(eb)) {
11500                         ret = traverse_tree_block(root, eb);
11501                         err |= ret;
11502                 }
11503                 free_extent_buffer(eb);
11504         }
11505
11506         return err;
11507 }
11508
11509 /*
11510  * Low memory usage version check_chunks_and_extents.
11511  */
11512 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11513 {
11514         struct btrfs_path path;
11515         struct btrfs_key key;
11516         struct btrfs_root *root1;
11517         struct btrfs_root *cur_root;
11518         int err = 0;
11519         int ret;
11520
11521         root1 = root->fs_info->chunk_root;
11522         ret = traverse_tree_block(root1, root1->node);
11523         err |= ret;
11524
11525         root1 = root->fs_info->tree_root;
11526         ret = traverse_tree_block(root1, root1->node);
11527         err |= ret;
11528
11529         btrfs_init_path(&path);
11530         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11531         key.offset = 0;
11532         key.type = BTRFS_ROOT_ITEM_KEY;
11533
11534         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11535         if (ret) {
11536                 error("cannot find extent treet in tree_root");
11537                 goto out;
11538         }
11539
11540         while (1) {
11541                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11542                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11543                         goto next;
11544                 key.offset = (u64)-1;
11545
11546                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11547                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11548                                         &key);
11549                 else
11550                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11551                 if (IS_ERR(cur_root) || !cur_root) {
11552                         error("failed to read tree: %lld", key.objectid);
11553                         goto next;
11554                 }
11555
11556                 ret = traverse_tree_block(cur_root, cur_root->node);
11557                 err |= ret;
11558
11559                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11560                         btrfs_free_fs_root(cur_root);
11561 next:
11562                 ret = btrfs_next_item(root1, &path);
11563                 if (ret)
11564                         goto out;
11565         }
11566
11567 out:
11568         btrfs_release_path(&path);
11569         return err;
11570 }
11571
11572 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11573                            struct btrfs_root *root, int overwrite)
11574 {
11575         struct extent_buffer *c;
11576         struct extent_buffer *old = root->node;
11577         int level;
11578         int ret;
11579         struct btrfs_disk_key disk_key = {0,0,0};
11580
11581         level = 0;
11582
11583         if (overwrite) {
11584                 c = old;
11585                 extent_buffer_get(c);
11586                 goto init;
11587         }
11588         c = btrfs_alloc_free_block(trans, root,
11589                                    root->nodesize,
11590                                    root->root_key.objectid,
11591                                    &disk_key, level, 0, 0);
11592         if (IS_ERR(c)) {
11593                 c = old;
11594                 extent_buffer_get(c);
11595                 overwrite = 1;
11596         }
11597 init:
11598         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11599         btrfs_set_header_level(c, level);
11600         btrfs_set_header_bytenr(c, c->start);
11601         btrfs_set_header_generation(c, trans->transid);
11602         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11603         btrfs_set_header_owner(c, root->root_key.objectid);
11604
11605         write_extent_buffer(c, root->fs_info->fsid,
11606                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11607
11608         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11609                             btrfs_header_chunk_tree_uuid(c),
11610                             BTRFS_UUID_SIZE);
11611
11612         btrfs_mark_buffer_dirty(c);
11613         /*
11614          * this case can happen in the following case:
11615          *
11616          * 1.overwrite previous root.
11617          *
11618          * 2.reinit reloc data root, this is because we skip pin
11619          * down reloc data tree before which means we can allocate
11620          * same block bytenr here.
11621          */
11622         if (old->start == c->start) {
11623                 btrfs_set_root_generation(&root->root_item,
11624                                           trans->transid);
11625                 root->root_item.level = btrfs_header_level(root->node);
11626                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11627                                         &root->root_key, &root->root_item);
11628                 if (ret) {
11629                         free_extent_buffer(c);
11630                         return ret;
11631                 }
11632         }
11633         free_extent_buffer(old);
11634         root->node = c;
11635         add_root_to_dirty_list(root);
11636         return 0;
11637 }
11638
11639 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11640                                 struct extent_buffer *eb, int tree_root)
11641 {
11642         struct extent_buffer *tmp;
11643         struct btrfs_root_item *ri;
11644         struct btrfs_key key;
11645         u64 bytenr;
11646         u32 nodesize;
11647         int level = btrfs_header_level(eb);
11648         int nritems;
11649         int ret;
11650         int i;
11651
11652         /*
11653          * If we have pinned this block before, don't pin it again.
11654          * This can not only avoid forever loop with broken filesystem
11655          * but also give us some speedups.
11656          */
11657         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11658                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11659                 return 0;
11660
11661         btrfs_pin_extent(fs_info, eb->start, eb->len);
11662
11663         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11664         nritems = btrfs_header_nritems(eb);
11665         for (i = 0; i < nritems; i++) {
11666                 if (level == 0) {
11667                         btrfs_item_key_to_cpu(eb, &key, i);
11668                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11669                                 continue;
11670                         /* Skip the extent root and reloc roots */
11671                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11672                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11673                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11674                                 continue;
11675                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11676                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11677
11678                         /*
11679                          * If at any point we start needing the real root we
11680                          * will have to build a stump root for the root we are
11681                          * in, but for now this doesn't actually use the root so
11682                          * just pass in extent_root.
11683                          */
11684                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11685                                               nodesize, 0);
11686                         if (!extent_buffer_uptodate(tmp)) {
11687                                 fprintf(stderr, "Error reading root block\n");
11688                                 return -EIO;
11689                         }
11690                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11691                         free_extent_buffer(tmp);
11692                         if (ret)
11693                                 return ret;
11694                 } else {
11695                         bytenr = btrfs_node_blockptr(eb, i);
11696
11697                         /* If we aren't the tree root don't read the block */
11698                         if (level == 1 && !tree_root) {
11699                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11700                                 continue;
11701                         }
11702
11703                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11704                                               nodesize, 0);
11705                         if (!extent_buffer_uptodate(tmp)) {
11706                                 fprintf(stderr, "Error reading tree block\n");
11707                                 return -EIO;
11708                         }
11709                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11710                         free_extent_buffer(tmp);
11711                         if (ret)
11712                                 return ret;
11713                 }
11714         }
11715
11716         return 0;
11717 }
11718
11719 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11720 {
11721         int ret;
11722
11723         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11724         if (ret)
11725                 return ret;
11726
11727         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11728 }
11729
11730 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11731 {
11732         struct btrfs_block_group_cache *cache;
11733         struct btrfs_path path;
11734         struct extent_buffer *leaf;
11735         struct btrfs_chunk *chunk;
11736         struct btrfs_key key;
11737         int ret;
11738         u64 start;
11739
11740         btrfs_init_path(&path);
11741         key.objectid = 0;
11742         key.type = BTRFS_CHUNK_ITEM_KEY;
11743         key.offset = 0;
11744         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11745         if (ret < 0) {
11746                 btrfs_release_path(&path);
11747                 return ret;
11748         }
11749
11750         /*
11751          * We do this in case the block groups were screwed up and had alloc
11752          * bits that aren't actually set on the chunks.  This happens with
11753          * restored images every time and could happen in real life I guess.
11754          */
11755         fs_info->avail_data_alloc_bits = 0;
11756         fs_info->avail_metadata_alloc_bits = 0;
11757         fs_info->avail_system_alloc_bits = 0;
11758
11759         /* First we need to create the in-memory block groups */
11760         while (1) {
11761                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11762                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11763                         if (ret < 0) {
11764                                 btrfs_release_path(&path);
11765                                 return ret;
11766                         }
11767                         if (ret) {
11768                                 ret = 0;
11769                                 break;
11770                         }
11771                 }
11772                 leaf = path.nodes[0];
11773                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11774                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11775                         path.slots[0]++;
11776                         continue;
11777                 }
11778
11779                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11780                 btrfs_add_block_group(fs_info, 0,
11781                                       btrfs_chunk_type(leaf, chunk),
11782                                       key.objectid, key.offset,
11783                                       btrfs_chunk_length(leaf, chunk));
11784                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11785                                  key.offset + btrfs_chunk_length(leaf, chunk));
11786                 path.slots[0]++;
11787         }
11788         start = 0;
11789         while (1) {
11790                 cache = btrfs_lookup_first_block_group(fs_info, start);
11791                 if (!cache)
11792                         break;
11793                 cache->cached = 1;
11794                 start = cache->key.objectid + cache->key.offset;
11795         }
11796
11797         btrfs_release_path(&path);
11798         return 0;
11799 }
11800
11801 static int reset_balance(struct btrfs_trans_handle *trans,
11802                          struct btrfs_fs_info *fs_info)
11803 {
11804         struct btrfs_root *root = fs_info->tree_root;
11805         struct btrfs_path path;
11806         struct extent_buffer *leaf;
11807         struct btrfs_key key;
11808         int del_slot, del_nr = 0;
11809         int ret;
11810         int found = 0;
11811
11812         btrfs_init_path(&path);
11813         key.objectid = BTRFS_BALANCE_OBJECTID;
11814         key.type = BTRFS_BALANCE_ITEM_KEY;
11815         key.offset = 0;
11816         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11817         if (ret) {
11818                 if (ret > 0)
11819                         ret = 0;
11820                 if (!ret)
11821                         goto reinit_data_reloc;
11822                 else
11823                         goto out;
11824         }
11825
11826         ret = btrfs_del_item(trans, root, &path);
11827         if (ret)
11828                 goto out;
11829         btrfs_release_path(&path);
11830
11831         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11832         key.type = BTRFS_ROOT_ITEM_KEY;
11833         key.offset = 0;
11834         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11835         if (ret < 0)
11836                 goto out;
11837         while (1) {
11838                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11839                         if (!found)
11840                                 break;
11841
11842                         if (del_nr) {
11843                                 ret = btrfs_del_items(trans, root, &path,
11844                                                       del_slot, del_nr);
11845                                 del_nr = 0;
11846                                 if (ret)
11847                                         goto out;
11848                         }
11849                         key.offset++;
11850                         btrfs_release_path(&path);
11851
11852                         found = 0;
11853                         ret = btrfs_search_slot(trans, root, &key, &path,
11854                                                 -1, 1);
11855                         if (ret < 0)
11856                                 goto out;
11857                         continue;
11858                 }
11859                 found = 1;
11860                 leaf = path.nodes[0];
11861                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11862                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11863                         break;
11864                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11865                         path.slots[0]++;
11866                         continue;
11867                 }
11868                 if (!del_nr) {
11869                         del_slot = path.slots[0];
11870                         del_nr = 1;
11871                 } else {
11872                         del_nr++;
11873                 }
11874                 path.slots[0]++;
11875         }
11876
11877         if (del_nr) {
11878                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11879                 if (ret)
11880                         goto out;
11881         }
11882         btrfs_release_path(&path);
11883
11884 reinit_data_reloc:
11885         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11886         key.type = BTRFS_ROOT_ITEM_KEY;
11887         key.offset = (u64)-1;
11888         root = btrfs_read_fs_root(fs_info, &key);
11889         if (IS_ERR(root)) {
11890                 fprintf(stderr, "Error reading data reloc tree\n");
11891                 ret = PTR_ERR(root);
11892                 goto out;
11893         }
11894         record_root_in_trans(trans, root);
11895         ret = btrfs_fsck_reinit_root(trans, root, 0);
11896         if (ret)
11897                 goto out;
11898         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11899 out:
11900         btrfs_release_path(&path);
11901         return ret;
11902 }
11903
11904 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11905                               struct btrfs_fs_info *fs_info)
11906 {
11907         u64 start = 0;
11908         int ret;
11909
11910         /*
11911          * The only reason we don't do this is because right now we're just
11912          * walking the trees we find and pinning down their bytes, we don't look
11913          * at any of the leaves.  In order to do mixed groups we'd have to check
11914          * the leaves of any fs roots and pin down the bytes for any file
11915          * extents we find.  Not hard but why do it if we don't have to?
11916          */
11917         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11918                 fprintf(stderr, "We don't support re-initing the extent tree "
11919                         "for mixed block groups yet, please notify a btrfs "
11920                         "developer you want to do this so they can add this "
11921                         "functionality.\n");
11922                 return -EINVAL;
11923         }
11924
11925         /*
11926          * first we need to walk all of the trees except the extent tree and pin
11927          * down the bytes that are in use so we don't overwrite any existing
11928          * metadata.
11929          */
11930         ret = pin_metadata_blocks(fs_info);
11931         if (ret) {
11932                 fprintf(stderr, "error pinning down used bytes\n");
11933                 return ret;
11934         }
11935
11936         /*
11937          * Need to drop all the block groups since we're going to recreate all
11938          * of them again.
11939          */
11940         btrfs_free_block_groups(fs_info);
11941         ret = reset_block_groups(fs_info);
11942         if (ret) {
11943                 fprintf(stderr, "error resetting the block groups\n");
11944                 return ret;
11945         }
11946
11947         /* Ok we can allocate now, reinit the extent root */
11948         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11949         if (ret) {
11950                 fprintf(stderr, "extent root initialization failed\n");
11951                 /*
11952                  * When the transaction code is updated we should end the
11953                  * transaction, but for now progs only knows about commit so
11954                  * just return an error.
11955                  */
11956                 return ret;
11957         }
11958
11959         /*
11960          * Now we have all the in-memory block groups setup so we can make
11961          * allocations properly, and the metadata we care about is safe since we
11962          * pinned all of it above.
11963          */
11964         while (1) {
11965                 struct btrfs_block_group_cache *cache;
11966
11967                 cache = btrfs_lookup_first_block_group(fs_info, start);
11968                 if (!cache)
11969                         break;
11970                 start = cache->key.objectid + cache->key.offset;
11971                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11972                                         &cache->key, &cache->item,
11973                                         sizeof(cache->item));
11974                 if (ret) {
11975                         fprintf(stderr, "Error adding block group\n");
11976                         return ret;
11977                 }
11978                 btrfs_extent_post_op(trans, fs_info->extent_root);
11979         }
11980
11981         ret = reset_balance(trans, fs_info);
11982         if (ret)
11983                 fprintf(stderr, "error resetting the pending balance\n");
11984
11985         return ret;
11986 }
11987
11988 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11989 {
11990         struct btrfs_path path;
11991         struct btrfs_trans_handle *trans;
11992         struct btrfs_key key;
11993         int ret;
11994
11995         printf("Recowing metadata block %llu\n", eb->start);
11996         key.objectid = btrfs_header_owner(eb);
11997         key.type = BTRFS_ROOT_ITEM_KEY;
11998         key.offset = (u64)-1;
11999
12000         root = btrfs_read_fs_root(root->fs_info, &key);
12001         if (IS_ERR(root)) {
12002                 fprintf(stderr, "Couldn't find owner root %llu\n",
12003                         key.objectid);
12004                 return PTR_ERR(root);
12005         }
12006
12007         trans = btrfs_start_transaction(root, 1);
12008         if (IS_ERR(trans))
12009                 return PTR_ERR(trans);
12010
12011         btrfs_init_path(&path);
12012         path.lowest_level = btrfs_header_level(eb);
12013         if (path.lowest_level)
12014                 btrfs_node_key_to_cpu(eb, &key, 0);
12015         else
12016                 btrfs_item_key_to_cpu(eb, &key, 0);
12017
12018         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12019         btrfs_commit_transaction(trans, root);
12020         btrfs_release_path(&path);
12021         return ret;
12022 }
12023
12024 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12025 {
12026         struct btrfs_path path;
12027         struct btrfs_trans_handle *trans;
12028         struct btrfs_key key;
12029         int ret;
12030
12031         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12032                bad->key.type, bad->key.offset);
12033         key.objectid = bad->root_id;
12034         key.type = BTRFS_ROOT_ITEM_KEY;
12035         key.offset = (u64)-1;
12036
12037         root = btrfs_read_fs_root(root->fs_info, &key);
12038         if (IS_ERR(root)) {
12039                 fprintf(stderr, "Couldn't find owner root %llu\n",
12040                         key.objectid);
12041                 return PTR_ERR(root);
12042         }
12043
12044         trans = btrfs_start_transaction(root, 1);
12045         if (IS_ERR(trans))
12046                 return PTR_ERR(trans);
12047
12048         btrfs_init_path(&path);
12049         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12050         if (ret) {
12051                 if (ret > 0)
12052                         ret = 0;
12053                 goto out;
12054         }
12055         ret = btrfs_del_item(trans, root, &path);
12056 out:
12057         btrfs_commit_transaction(trans, root);
12058         btrfs_release_path(&path);
12059         return ret;
12060 }
12061
12062 static int zero_log_tree(struct btrfs_root *root)
12063 {
12064         struct btrfs_trans_handle *trans;
12065         int ret;
12066
12067         trans = btrfs_start_transaction(root, 1);
12068         if (IS_ERR(trans)) {
12069                 ret = PTR_ERR(trans);
12070                 return ret;
12071         }
12072         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12073         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12074         ret = btrfs_commit_transaction(trans, root);
12075         return ret;
12076 }
12077
12078 static int populate_csum(struct btrfs_trans_handle *trans,
12079                          struct btrfs_root *csum_root, char *buf, u64 start,
12080                          u64 len)
12081 {
12082         u64 offset = 0;
12083         u64 sectorsize;
12084         int ret = 0;
12085
12086         while (offset < len) {
12087                 sectorsize = csum_root->sectorsize;
12088                 ret = read_extent_data(csum_root, buf, start + offset,
12089                                        &sectorsize, 0);
12090                 if (ret)
12091                         break;
12092                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12093                                             start + offset, buf, sectorsize);
12094                 if (ret)
12095                         break;
12096                 offset += sectorsize;
12097         }
12098         return ret;
12099 }
12100
12101 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12102                                       struct btrfs_root *csum_root,
12103                                       struct btrfs_root *cur_root)
12104 {
12105         struct btrfs_path path;
12106         struct btrfs_key key;
12107         struct extent_buffer *node;
12108         struct btrfs_file_extent_item *fi;
12109         char *buf = NULL;
12110         u64 start = 0;
12111         u64 len = 0;
12112         int slot = 0;
12113         int ret = 0;
12114
12115         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12116         if (!buf)
12117                 return -ENOMEM;
12118
12119         btrfs_init_path(&path);
12120         key.objectid = 0;
12121         key.offset = 0;
12122         key.type = 0;
12123         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12124         if (ret < 0)
12125                 goto out;
12126         /* Iterate all regular file extents and fill its csum */
12127         while (1) {
12128                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12129
12130                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12131                         goto next;
12132                 node = path.nodes[0];
12133                 slot = path.slots[0];
12134                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12135                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12136                         goto next;
12137                 start = btrfs_file_extent_disk_bytenr(node, fi);
12138                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12139
12140                 ret = populate_csum(trans, csum_root, buf, start, len);
12141                 if (ret == -EEXIST)
12142                         ret = 0;
12143                 if (ret < 0)
12144                         goto out;
12145 next:
12146                 /*
12147                  * TODO: if next leaf is corrupted, jump to nearest next valid
12148                  * leaf.
12149                  */
12150                 ret = btrfs_next_item(cur_root, &path);
12151                 if (ret < 0)
12152                         goto out;
12153                 if (ret > 0) {
12154                         ret = 0;
12155                         goto out;
12156                 }
12157         }
12158
12159 out:
12160         btrfs_release_path(&path);
12161         free(buf);
12162         return ret;
12163 }
12164
12165 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12166                                   struct btrfs_root *csum_root)
12167 {
12168         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12169         struct btrfs_path path;
12170         struct btrfs_root *tree_root = fs_info->tree_root;
12171         struct btrfs_root *cur_root;
12172         struct extent_buffer *node;
12173         struct btrfs_key key;
12174         int slot = 0;
12175         int ret = 0;
12176
12177         btrfs_init_path(&path);
12178         key.objectid = BTRFS_FS_TREE_OBJECTID;
12179         key.offset = 0;
12180         key.type = BTRFS_ROOT_ITEM_KEY;
12181         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12182         if (ret < 0)
12183                 goto out;
12184         if (ret > 0) {
12185                 ret = -ENOENT;
12186                 goto out;
12187         }
12188
12189         while (1) {
12190                 node = path.nodes[0];
12191                 slot = path.slots[0];
12192                 btrfs_item_key_to_cpu(node, &key, slot);
12193                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12194                         goto out;
12195                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12196                         goto next;
12197                 if (!is_fstree(key.objectid))
12198                         goto next;
12199                 key.offset = (u64)-1;
12200
12201                 cur_root = btrfs_read_fs_root(fs_info, &key);
12202                 if (IS_ERR(cur_root) || !cur_root) {
12203                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12204                                 key.objectid);
12205                         goto out;
12206                 }
12207                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12208                                 cur_root);
12209                 if (ret < 0)
12210                         goto out;
12211 next:
12212                 ret = btrfs_next_item(tree_root, &path);
12213                 if (ret > 0) {
12214                         ret = 0;
12215                         goto out;
12216                 }
12217                 if (ret < 0)
12218                         goto out;
12219         }
12220
12221 out:
12222         btrfs_release_path(&path);
12223         return ret;
12224 }
12225
12226 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12227                                       struct btrfs_root *csum_root)
12228 {
12229         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12230         struct btrfs_path path;
12231         struct btrfs_extent_item *ei;
12232         struct extent_buffer *leaf;
12233         char *buf;
12234         struct btrfs_key key;
12235         int ret;
12236
12237         btrfs_init_path(&path);
12238         key.objectid = 0;
12239         key.type = BTRFS_EXTENT_ITEM_KEY;
12240         key.offset = 0;
12241         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12242         if (ret < 0) {
12243                 btrfs_release_path(&path);
12244                 return ret;
12245         }
12246
12247         buf = malloc(csum_root->sectorsize);
12248         if (!buf) {
12249                 btrfs_release_path(&path);
12250                 return -ENOMEM;
12251         }
12252
12253         while (1) {
12254                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12255                         ret = btrfs_next_leaf(extent_root, &path);
12256                         if (ret < 0)
12257                                 break;
12258                         if (ret) {
12259                                 ret = 0;
12260                                 break;
12261                         }
12262                 }
12263                 leaf = path.nodes[0];
12264
12265                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12266                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12267                         path.slots[0]++;
12268                         continue;
12269                 }
12270
12271                 ei = btrfs_item_ptr(leaf, path.slots[0],
12272                                     struct btrfs_extent_item);
12273                 if (!(btrfs_extent_flags(leaf, ei) &
12274                       BTRFS_EXTENT_FLAG_DATA)) {
12275                         path.slots[0]++;
12276                         continue;
12277                 }
12278
12279                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12280                                     key.offset);
12281                 if (ret)
12282                         break;
12283                 path.slots[0]++;
12284         }
12285
12286         btrfs_release_path(&path);
12287         free(buf);
12288         return ret;
12289 }
12290
12291 /*
12292  * Recalculate the csum and put it into the csum tree.
12293  *
12294  * Extent tree init will wipe out all the extent info, so in that case, we
12295  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12296  * will use fs/subvol trees to init the csum tree.
12297  */
12298 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12299                           struct btrfs_root *csum_root,
12300                           int search_fs_tree)
12301 {
12302         if (search_fs_tree)
12303                 return fill_csum_tree_from_fs(trans, csum_root);
12304         else
12305                 return fill_csum_tree_from_extent(trans, csum_root);
12306 }
12307
12308 static void free_roots_info_cache(void)
12309 {
12310         if (!roots_info_cache)
12311                 return;
12312
12313         while (!cache_tree_empty(roots_info_cache)) {
12314                 struct cache_extent *entry;
12315                 struct root_item_info *rii;
12316
12317                 entry = first_cache_extent(roots_info_cache);
12318                 if (!entry)
12319                         break;
12320                 remove_cache_extent(roots_info_cache, entry);
12321                 rii = container_of(entry, struct root_item_info, cache_extent);
12322                 free(rii);
12323         }
12324
12325         free(roots_info_cache);
12326         roots_info_cache = NULL;
12327 }
12328
12329 static int build_roots_info_cache(struct btrfs_fs_info *info)
12330 {
12331         int ret = 0;
12332         struct btrfs_key key;
12333         struct extent_buffer *leaf;
12334         struct btrfs_path path;
12335
12336         if (!roots_info_cache) {
12337                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12338                 if (!roots_info_cache)
12339                         return -ENOMEM;
12340                 cache_tree_init(roots_info_cache);
12341         }
12342
12343         btrfs_init_path(&path);
12344         key.objectid = 0;
12345         key.type = BTRFS_EXTENT_ITEM_KEY;
12346         key.offset = 0;
12347         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12348         if (ret < 0)
12349                 goto out;
12350         leaf = path.nodes[0];
12351
12352         while (1) {
12353                 struct btrfs_key found_key;
12354                 struct btrfs_extent_item *ei;
12355                 struct btrfs_extent_inline_ref *iref;
12356                 int slot = path.slots[0];
12357                 int type;
12358                 u64 flags;
12359                 u64 root_id;
12360                 u8 level;
12361                 struct cache_extent *entry;
12362                 struct root_item_info *rii;
12363
12364                 if (slot >= btrfs_header_nritems(leaf)) {
12365                         ret = btrfs_next_leaf(info->extent_root, &path);
12366                         if (ret < 0) {
12367                                 break;
12368                         } else if (ret) {
12369                                 ret = 0;
12370                                 break;
12371                         }
12372                         leaf = path.nodes[0];
12373                         slot = path.slots[0];
12374                 }
12375
12376                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12377
12378                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12379                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12380                         goto next;
12381
12382                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12383                 flags = btrfs_extent_flags(leaf, ei);
12384
12385                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12386                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12387                         goto next;
12388
12389                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12390                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12391                         level = found_key.offset;
12392                 } else {
12393                         struct btrfs_tree_block_info *binfo;
12394
12395                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12396                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12397                         level = btrfs_tree_block_level(leaf, binfo);
12398                 }
12399
12400                 /*
12401                  * For a root extent, it must be of the following type and the
12402                  * first (and only one) iref in the item.
12403                  */
12404                 type = btrfs_extent_inline_ref_type(leaf, iref);
12405                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12406                         goto next;
12407
12408                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12409                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12410                 if (!entry) {
12411                         rii = malloc(sizeof(struct root_item_info));
12412                         if (!rii) {
12413                                 ret = -ENOMEM;
12414                                 goto out;
12415                         }
12416                         rii->cache_extent.start = root_id;
12417                         rii->cache_extent.size = 1;
12418                         rii->level = (u8)-1;
12419                         entry = &rii->cache_extent;
12420                         ret = insert_cache_extent(roots_info_cache, entry);
12421                         ASSERT(ret == 0);
12422                 } else {
12423                         rii = container_of(entry, struct root_item_info,
12424                                            cache_extent);
12425                 }
12426
12427                 ASSERT(rii->cache_extent.start == root_id);
12428                 ASSERT(rii->cache_extent.size == 1);
12429
12430                 if (level > rii->level || rii->level == (u8)-1) {
12431                         rii->level = level;
12432                         rii->bytenr = found_key.objectid;
12433                         rii->gen = btrfs_extent_generation(leaf, ei);
12434                         rii->node_count = 1;
12435                 } else if (level == rii->level) {
12436                         rii->node_count++;
12437                 }
12438 next:
12439                 path.slots[0]++;
12440         }
12441
12442 out:
12443         btrfs_release_path(&path);
12444
12445         return ret;
12446 }
12447
12448 static int maybe_repair_root_item(struct btrfs_path *path,
12449                                   const struct btrfs_key *root_key,
12450                                   const int read_only_mode)
12451 {
12452         const u64 root_id = root_key->objectid;
12453         struct cache_extent *entry;
12454         struct root_item_info *rii;
12455         struct btrfs_root_item ri;
12456         unsigned long offset;
12457
12458         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12459         if (!entry) {
12460                 fprintf(stderr,
12461                         "Error: could not find extent items for root %llu\n",
12462                         root_key->objectid);
12463                 return -ENOENT;
12464         }
12465
12466         rii = container_of(entry, struct root_item_info, cache_extent);
12467         ASSERT(rii->cache_extent.start == root_id);
12468         ASSERT(rii->cache_extent.size == 1);
12469
12470         if (rii->node_count != 1) {
12471                 fprintf(stderr,
12472                         "Error: could not find btree root extent for root %llu\n",
12473                         root_id);
12474                 return -ENOENT;
12475         }
12476
12477         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12478         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12479
12480         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12481             btrfs_root_level(&ri) != rii->level ||
12482             btrfs_root_generation(&ri) != rii->gen) {
12483
12484                 /*
12485                  * If we're in repair mode but our caller told us to not update
12486                  * the root item, i.e. just check if it needs to be updated, don't
12487                  * print this message, since the caller will call us again shortly
12488                  * for the same root item without read only mode (the caller will
12489                  * open a transaction first).
12490                  */
12491                 if (!(read_only_mode && repair))
12492                         fprintf(stderr,
12493                                 "%sroot item for root %llu,"
12494                                 " current bytenr %llu, current gen %llu, current level %u,"
12495                                 " new bytenr %llu, new gen %llu, new level %u\n",
12496                                 (read_only_mode ? "" : "fixing "),
12497                                 root_id,
12498                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12499                                 btrfs_root_level(&ri),
12500                                 rii->bytenr, rii->gen, rii->level);
12501
12502                 if (btrfs_root_generation(&ri) > rii->gen) {
12503                         fprintf(stderr,
12504                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12505                                 root_id, btrfs_root_generation(&ri), rii->gen);
12506                         return -EINVAL;
12507                 }
12508
12509                 if (!read_only_mode) {
12510                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12511                         btrfs_set_root_level(&ri, rii->level);
12512                         btrfs_set_root_generation(&ri, rii->gen);
12513                         write_extent_buffer(path->nodes[0], &ri,
12514                                             offset, sizeof(ri));
12515                 }
12516
12517                 return 1;
12518         }
12519
12520         return 0;
12521 }
12522
12523 /*
12524  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12525  * caused read-only snapshots to be corrupted if they were created at a moment
12526  * when the source subvolume/snapshot had orphan items. The issue was that the
12527  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12528  * node instead of the post orphan cleanup root node.
12529  * So this function, and its callees, just detects and fixes those cases. Even
12530  * though the regression was for read-only snapshots, this function applies to
12531  * any snapshot/subvolume root.
12532  * This must be run before any other repair code - not doing it so, makes other
12533  * repair code delete or modify backrefs in the extent tree for example, which
12534  * will result in an inconsistent fs after repairing the root items.
12535  */
12536 static int repair_root_items(struct btrfs_fs_info *info)
12537 {
12538         struct btrfs_path path;
12539         struct btrfs_key key;
12540         struct extent_buffer *leaf;
12541         struct btrfs_trans_handle *trans = NULL;
12542         int ret = 0;
12543         int bad_roots = 0;
12544         int need_trans = 0;
12545
12546         btrfs_init_path(&path);
12547
12548         ret = build_roots_info_cache(info);
12549         if (ret)
12550                 goto out;
12551
12552         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12553         key.type = BTRFS_ROOT_ITEM_KEY;
12554         key.offset = 0;
12555
12556 again:
12557         /*
12558          * Avoid opening and committing transactions if a leaf doesn't have
12559          * any root items that need to be fixed, so that we avoid rotating
12560          * backup roots unnecessarily.
12561          */
12562         if (need_trans) {
12563                 trans = btrfs_start_transaction(info->tree_root, 1);
12564                 if (IS_ERR(trans)) {
12565                         ret = PTR_ERR(trans);
12566                         goto out;
12567                 }
12568         }
12569
12570         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12571                                 0, trans ? 1 : 0);
12572         if (ret < 0)
12573                 goto out;
12574         leaf = path.nodes[0];
12575
12576         while (1) {
12577                 struct btrfs_key found_key;
12578
12579                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12580                         int no_more_keys = find_next_key(&path, &key);
12581
12582                         btrfs_release_path(&path);
12583                         if (trans) {
12584                                 ret = btrfs_commit_transaction(trans,
12585                                                                info->tree_root);
12586                                 trans = NULL;
12587                                 if (ret < 0)
12588                                         goto out;
12589                         }
12590                         need_trans = 0;
12591                         if (no_more_keys)
12592                                 break;
12593                         goto again;
12594                 }
12595
12596                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12597
12598                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12599                         goto next;
12600                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12601                         goto next;
12602
12603                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12604                 if (ret < 0)
12605                         goto out;
12606                 if (ret) {
12607                         if (!trans && repair) {
12608                                 need_trans = 1;
12609                                 key = found_key;
12610                                 btrfs_release_path(&path);
12611                                 goto again;
12612                         }
12613                         bad_roots++;
12614                 }
12615 next:
12616                 path.slots[0]++;
12617         }
12618         ret = 0;
12619 out:
12620         free_roots_info_cache();
12621         btrfs_release_path(&path);
12622         if (trans)
12623                 btrfs_commit_transaction(trans, info->tree_root);
12624         if (ret < 0)
12625                 return ret;
12626
12627         return bad_roots;
12628 }
12629
12630 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12631 {
12632         struct btrfs_trans_handle *trans;
12633         struct btrfs_block_group_cache *bg_cache;
12634         u64 current = 0;
12635         int ret = 0;
12636
12637         /* Clear all free space cache inodes and its extent data */
12638         while (1) {
12639                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12640                 if (!bg_cache)
12641                         break;
12642                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12643                 if (ret < 0)
12644                         return ret;
12645                 current = bg_cache->key.objectid + bg_cache->key.offset;
12646         }
12647
12648         /* Don't forget to set cache_generation to -1 */
12649         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12650         if (IS_ERR(trans)) {
12651                 error("failed to update super block cache generation");
12652                 return PTR_ERR(trans);
12653         }
12654         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12655         btrfs_commit_transaction(trans, fs_info->tree_root);
12656
12657         return ret;
12658 }
12659
12660 const char * const cmd_check_usage[] = {
12661         "btrfs check [options] <device>",
12662         "Check structural integrity of a filesystem (unmounted).",
12663         "Check structural integrity of an unmounted filesystem. Verify internal",
12664         "trees' consistency and item connectivity. In the repair mode try to",
12665         "fix the problems found. ",
12666         "WARNING: the repair mode is considered dangerous",
12667         "",
12668         "-s|--super <superblock>     use this superblock copy",
12669         "-b|--backup                 use the first valid backup root copy",
12670         "--repair                    try to repair the filesystem",
12671         "--readonly                  run in read-only mode (default)",
12672         "--init-csum-tree            create a new CRC tree",
12673         "--init-extent-tree          create a new extent tree",
12674         "--mode <MODE>               allows choice of memory/IO trade-offs",
12675         "                            where MODE is one of:",
12676         "                            original - read inodes and extents to memory (requires",
12677         "                                       more memory, does less IO)",
12678         "                            lowmem   - try to use less memory but read blocks again",
12679         "                                       when needed",
12680         "--check-data-csum           verify checksums of data blocks",
12681         "-Q|--qgroup-report          print a report on qgroup consistency",
12682         "-E|--subvol-extents <subvolid>",
12683         "                            print subvolume extents and sharing state",
12684         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12685         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12686         "-p|--progress               indicate progress",
12687         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12688         NULL
12689 };
12690
12691 int cmd_check(int argc, char **argv)
12692 {
12693         struct cache_tree root_cache;
12694         struct btrfs_root *root;
12695         struct btrfs_fs_info *info;
12696         u64 bytenr = 0;
12697         u64 subvolid = 0;
12698         u64 tree_root_bytenr = 0;
12699         u64 chunk_root_bytenr = 0;
12700         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12701         int ret;
12702         int err = 0;
12703         u64 num;
12704         int init_csum_tree = 0;
12705         int readonly = 0;
12706         int clear_space_cache = 0;
12707         int qgroup_report = 0;
12708         int qgroups_repaired = 0;
12709         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12710
12711         while(1) {
12712                 int c;
12713                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12714                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12715                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12716                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12717                 static const struct option long_options[] = {
12718                         { "super", required_argument, NULL, 's' },
12719                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12720                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12721                         { "init-csum-tree", no_argument, NULL,
12722                                 GETOPT_VAL_INIT_CSUM },
12723                         { "init-extent-tree", no_argument, NULL,
12724                                 GETOPT_VAL_INIT_EXTENT },
12725                         { "check-data-csum", no_argument, NULL,
12726                                 GETOPT_VAL_CHECK_CSUM },
12727                         { "backup", no_argument, NULL, 'b' },
12728                         { "subvol-extents", required_argument, NULL, 'E' },
12729                         { "qgroup-report", no_argument, NULL, 'Q' },
12730                         { "tree-root", required_argument, NULL, 'r' },
12731                         { "chunk-root", required_argument, NULL,
12732                                 GETOPT_VAL_CHUNK_TREE },
12733                         { "progress", no_argument, NULL, 'p' },
12734                         { "mode", required_argument, NULL,
12735                                 GETOPT_VAL_MODE },
12736                         { "clear-space-cache", required_argument, NULL,
12737                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12738                         { NULL, 0, NULL, 0}
12739                 };
12740
12741                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12742                 if (c < 0)
12743                         break;
12744                 switch(c) {
12745                         case 'a': /* ignored */ break;
12746                         case 'b':
12747                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12748                                 break;
12749                         case 's':
12750                                 num = arg_strtou64(optarg);
12751                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12752                                         error(
12753                                         "super mirror should be less than %d",
12754                                                 BTRFS_SUPER_MIRROR_MAX);
12755                                         exit(1);
12756                                 }
12757                                 bytenr = btrfs_sb_offset(((int)num));
12758                                 printf("using SB copy %llu, bytenr %llu\n", num,
12759                                        (unsigned long long)bytenr);
12760                                 break;
12761                         case 'Q':
12762                                 qgroup_report = 1;
12763                                 break;
12764                         case 'E':
12765                                 subvolid = arg_strtou64(optarg);
12766                                 break;
12767                         case 'r':
12768                                 tree_root_bytenr = arg_strtou64(optarg);
12769                                 break;
12770                         case GETOPT_VAL_CHUNK_TREE:
12771                                 chunk_root_bytenr = arg_strtou64(optarg);
12772                                 break;
12773                         case 'p':
12774                                 ctx.progress_enabled = true;
12775                                 break;
12776                         case '?':
12777                         case 'h':
12778                                 usage(cmd_check_usage);
12779                         case GETOPT_VAL_REPAIR:
12780                                 printf("enabling repair mode\n");
12781                                 repair = 1;
12782                                 ctree_flags |= OPEN_CTREE_WRITES;
12783                                 break;
12784                         case GETOPT_VAL_READONLY:
12785                                 readonly = 1;
12786                                 break;
12787                         case GETOPT_VAL_INIT_CSUM:
12788                                 printf("Creating a new CRC tree\n");
12789                                 init_csum_tree = 1;
12790                                 repair = 1;
12791                                 ctree_flags |= OPEN_CTREE_WRITES;
12792                                 break;
12793                         case GETOPT_VAL_INIT_EXTENT:
12794                                 init_extent_tree = 1;
12795                                 ctree_flags |= (OPEN_CTREE_WRITES |
12796                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12797                                 repair = 1;
12798                                 break;
12799                         case GETOPT_VAL_CHECK_CSUM:
12800                                 check_data_csum = 1;
12801                                 break;
12802                         case GETOPT_VAL_MODE:
12803                                 check_mode = parse_check_mode(optarg);
12804                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12805                                         error("unknown mode: %s", optarg);
12806                                         exit(1);
12807                                 }
12808                                 break;
12809                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12810                                 if (strcmp(optarg, "v1") == 0) {
12811                                         clear_space_cache = 1;
12812                                 } else if (strcmp(optarg, "v2") == 0) {
12813                                         clear_space_cache = 2;
12814                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12815                                 } else {
12816                                         error(
12817                 "invalid argument to --clear-space-cache, must be v1 or v2");
12818                                         exit(1);
12819                                 }
12820                                 ctree_flags |= OPEN_CTREE_WRITES;
12821                                 break;
12822                 }
12823         }
12824
12825         if (check_argc_exact(argc - optind, 1))
12826                 usage(cmd_check_usage);
12827
12828         if (ctx.progress_enabled) {
12829                 ctx.tp = TASK_NOTHING;
12830                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12831         }
12832
12833         /* This check is the only reason for --readonly to exist */
12834         if (readonly && repair) {
12835                 error("repair options are not compatible with --readonly");
12836                 exit(1);
12837         }
12838
12839         /*
12840          * Not supported yet
12841          */
12842         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12843                 error("low memory mode doesn't support repair yet");
12844                 exit(1);
12845         }
12846
12847         radix_tree_init();
12848         cache_tree_init(&root_cache);
12849
12850         if((ret = check_mounted(argv[optind])) < 0) {
12851                 error("could not check mount status: %s", strerror(-ret));
12852                 err |= !!ret;
12853                 goto err_out;
12854         } else if(ret) {
12855                 error("%s is currently mounted, aborting", argv[optind]);
12856                 ret = -EBUSY;
12857                 err |= !!ret;
12858                 goto err_out;
12859         }
12860
12861         /* only allow partial opening under repair mode */
12862         if (repair)
12863                 ctree_flags |= OPEN_CTREE_PARTIAL;
12864
12865         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12866                                   chunk_root_bytenr, ctree_flags);
12867         if (!info) {
12868                 error("cannot open file system");
12869                 ret = -EIO;
12870                 err |= !!ret;
12871                 goto err_out;
12872         }
12873
12874         global_info = info;
12875         root = info->fs_root;
12876         if (clear_space_cache == 1) {
12877                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12878                         error(
12879                 "free space cache v2 detected, use --clear-space-cache v2");
12880                         ret = 1;
12881                         goto close_out;
12882                 }
12883                 printf("Clearing free space cache\n");
12884                 ret = clear_free_space_cache(info);
12885                 if (ret) {
12886                         error("failed to clear free space cache");
12887                         ret = 1;
12888                 } else {
12889                         printf("Free space cache cleared\n");
12890                 }
12891                 goto close_out;
12892         } else if (clear_space_cache == 2) {
12893                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12894                         printf("no free space cache v2 to clear\n");
12895                         ret = 0;
12896                         goto close_out;
12897                 }
12898                 printf("Clear free space cache v2\n");
12899                 ret = btrfs_clear_free_space_tree(info);
12900                 if (ret) {
12901                         error("failed to clear free space cache v2: %d", ret);
12902                         ret = 1;
12903                 } else {
12904                         printf("free space cache v2 cleared\n");
12905                 }
12906                 goto close_out;
12907         }
12908
12909         /*
12910          * repair mode will force us to commit transaction which
12911          * will make us fail to load log tree when mounting.
12912          */
12913         if (repair && btrfs_super_log_root(info->super_copy)) {
12914                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12915                 if (!ret) {
12916                         ret = 1;
12917                         err |= !!ret;
12918                         goto close_out;
12919                 }
12920                 ret = zero_log_tree(root);
12921                 err |= !!ret;
12922                 if (ret) {
12923                         error("failed to zero log tree: %d", ret);
12924                         goto close_out;
12925                 }
12926         }
12927
12928         uuid_unparse(info->super_copy->fsid, uuidbuf);
12929         if (qgroup_report) {
12930                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12931                        uuidbuf);
12932                 ret = qgroup_verify_all(info);
12933                 err |= !!ret;
12934                 if (ret == 0)
12935                         report_qgroups(1);
12936                 goto close_out;
12937         }
12938         if (subvolid) {
12939                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12940                        subvolid, argv[optind], uuidbuf);
12941                 ret = print_extent_state(info, subvolid);
12942                 err |= !!ret;
12943                 goto close_out;
12944         }
12945         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12946
12947         if (!extent_buffer_uptodate(info->tree_root->node) ||
12948             !extent_buffer_uptodate(info->dev_root->node) ||
12949             !extent_buffer_uptodate(info->chunk_root->node)) {
12950                 error("critical roots corrupted, unable to check the filesystem");
12951                 err |= !!ret;
12952                 ret = -EIO;
12953                 goto close_out;
12954         }
12955
12956         if (init_extent_tree || init_csum_tree) {
12957                 struct btrfs_trans_handle *trans;
12958
12959                 trans = btrfs_start_transaction(info->extent_root, 0);
12960                 if (IS_ERR(trans)) {
12961                         error("error starting transaction");
12962                         ret = PTR_ERR(trans);
12963                         err |= !!ret;
12964                         goto close_out;
12965                 }
12966
12967                 if (init_extent_tree) {
12968                         printf("Creating a new extent tree\n");
12969                         ret = reinit_extent_tree(trans, info);
12970                         err |= !!ret;
12971                         if (ret)
12972                                 goto close_out;
12973                 }
12974
12975                 if (init_csum_tree) {
12976                         printf("Reinitialize checksum tree\n");
12977                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12978                         if (ret) {
12979                                 error("checksum tree initialization failed: %d",
12980                                                 ret);
12981                                 ret = -EIO;
12982                                 err |= !!ret;
12983                                 goto close_out;
12984                         }
12985
12986                         ret = fill_csum_tree(trans, info->csum_root,
12987                                              init_extent_tree);
12988                         err |= !!ret;
12989                         if (ret) {
12990                                 error("checksum tree refilling failed: %d", ret);
12991                                 return -EIO;
12992                         }
12993                 }
12994                 /*
12995                  * Ok now we commit and run the normal fsck, which will add
12996                  * extent entries for all of the items it finds.
12997                  */
12998                 ret = btrfs_commit_transaction(trans, info->extent_root);
12999                 err |= !!ret;
13000                 if (ret)
13001                         goto close_out;
13002         }
13003         if (!extent_buffer_uptodate(info->extent_root->node)) {
13004                 error("critical: extent_root, unable to check the filesystem");
13005                 ret = -EIO;
13006                 err |= !!ret;
13007                 goto close_out;
13008         }
13009         if (!extent_buffer_uptodate(info->csum_root->node)) {
13010                 error("critical: csum_root, unable to check the filesystem");
13011                 ret = -EIO;
13012                 err |= !!ret;
13013                 goto close_out;
13014         }
13015
13016         if (!ctx.progress_enabled)
13017                 fprintf(stderr, "checking extents\n");
13018         if (check_mode == CHECK_MODE_LOWMEM)
13019                 ret = check_chunks_and_extents_v2(root);
13020         else
13021                 ret = check_chunks_and_extents(root);
13022         err |= !!ret;
13023         if (ret)
13024                 error(
13025                 "errors found in extent allocation tree or chunk allocation");
13026
13027         ret = repair_root_items(info);
13028         err |= !!ret;
13029         if (ret < 0) {
13030                 error("failed to repair root items: %s", strerror(-ret));
13031                 goto close_out;
13032         }
13033         if (repair) {
13034                 fprintf(stderr, "Fixed %d roots.\n", ret);
13035                 ret = 0;
13036         } else if (ret > 0) {
13037                 fprintf(stderr,
13038                        "Found %d roots with an outdated root item.\n",
13039                        ret);
13040                 fprintf(stderr,
13041                         "Please run a filesystem check with the option --repair to fix them.\n");
13042                 ret = 1;
13043                 err |= !!ret;
13044                 goto close_out;
13045         }
13046
13047         if (!ctx.progress_enabled) {
13048                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13049                         fprintf(stderr, "checking free space tree\n");
13050                 else
13051                         fprintf(stderr, "checking free space cache\n");
13052         }
13053         ret = check_space_cache(root);
13054         err |= !!ret;
13055         if (ret) {
13056                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13057                         error("errors found in free space tree");
13058                 else
13059                         error("errors found in free space cache");
13060                 goto out;
13061         }
13062
13063         /*
13064          * We used to have to have these hole extents in between our real
13065          * extents so if we don't have this flag set we need to make sure there
13066          * are no gaps in the file extents for inodes, otherwise we can just
13067          * ignore it when this happens.
13068          */
13069         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13070         if (!ctx.progress_enabled)
13071                 fprintf(stderr, "checking fs roots\n");
13072         if (check_mode == CHECK_MODE_LOWMEM)
13073                 ret = check_fs_roots_v2(root->fs_info);
13074         else
13075                 ret = check_fs_roots(root, &root_cache);
13076         err |= !!ret;
13077         if (ret) {
13078                 error("errors found in fs roots");
13079                 goto out;
13080         }
13081
13082         fprintf(stderr, "checking csums\n");
13083         ret = check_csums(root);
13084         err |= !!ret;
13085         if (ret) {
13086                 error("errors found in csum tree");
13087                 goto out;
13088         }
13089
13090         fprintf(stderr, "checking root refs\n");
13091         /* For low memory mode, check_fs_roots_v2 handles root refs */
13092         if (check_mode != CHECK_MODE_LOWMEM) {
13093                 ret = check_root_refs(root, &root_cache);
13094                 err |= !!ret;
13095                 if (ret) {
13096                         error("errors found in root refs");
13097                         goto out;
13098                 }
13099         }
13100
13101         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13102                 struct extent_buffer *eb;
13103
13104                 eb = list_first_entry(&root->fs_info->recow_ebs,
13105                                       struct extent_buffer, recow);
13106                 list_del_init(&eb->recow);
13107                 ret = recow_extent_buffer(root, eb);
13108                 err |= !!ret;
13109                 if (ret) {
13110                         error("fails to fix transid errors");
13111                         break;
13112                 }
13113         }
13114
13115         while (!list_empty(&delete_items)) {
13116                 struct bad_item *bad;
13117
13118                 bad = list_first_entry(&delete_items, struct bad_item, list);
13119                 list_del_init(&bad->list);
13120                 if (repair) {
13121                         ret = delete_bad_item(root, bad);
13122                         err |= !!ret;
13123                 }
13124                 free(bad);
13125         }
13126
13127         if (info->quota_enabled) {
13128                 fprintf(stderr, "checking quota groups\n");
13129                 ret = qgroup_verify_all(info);
13130                 err |= !!ret;
13131                 if (ret) {
13132                         error("failed to check quota groups");
13133                         goto out;
13134                 }
13135                 report_qgroups(0);
13136                 ret = repair_qgroups(info, &qgroups_repaired);
13137                 err |= !!ret;
13138                 if (err) {
13139                         error("failed to repair quota groups");
13140                         goto out;
13141                 }
13142                 ret = 0;
13143         }
13144
13145         if (!list_empty(&root->fs_info->recow_ebs)) {
13146                 error("transid errors in file system");
13147                 ret = 1;
13148                 err |= !!ret;
13149         }
13150 out:
13151         if (found_old_backref) { /*
13152                  * there was a disk format change when mixed
13153                  * backref was in testing tree. The old format
13154                  * existed about one week.
13155                  */
13156                 printf("\n * Found old mixed backref format. "
13157                        "The old format is not supported! *"
13158                        "\n * Please mount the FS in readonly mode, "
13159                        "backup data and re-format the FS. *\n\n");
13160                 err |= 1;
13161         }
13162         printf("found %llu bytes used, ",
13163                (unsigned long long)bytes_used);
13164         if (err)
13165                 printf("error(s) found\n");
13166         else
13167                 printf("no error found\n");
13168         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13169         printf("total tree bytes: %llu\n",
13170                (unsigned long long)total_btree_bytes);
13171         printf("total fs tree bytes: %llu\n",
13172                (unsigned long long)total_fs_tree_bytes);
13173         printf("total extent tree bytes: %llu\n",
13174                (unsigned long long)total_extent_tree_bytes);
13175         printf("btree space waste bytes: %llu\n",
13176                (unsigned long long)btree_space_waste);
13177         printf("file data blocks allocated: %llu\n referenced %llu\n",
13178                 (unsigned long long)data_bytes_allocated,
13179                 (unsigned long long)data_bytes_referenced);
13180
13181         free_qgroup_counts();
13182         free_root_recs_tree(&root_cache);
13183 close_out:
13184         close_ctree(root);
13185 err_out:
13186         if (ctx.progress_enabled)
13187                 task_deinit(ctx.info);
13188
13189         return err;
13190 }