btrfs-progs: Refactor btrfs_find_tree_block to use btrfs_fs_info
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize,
833                                          root->fs_info->sectorsize));
834         }
835 }
836
837 static void print_ref_error(int errors)
838 {
839         if (errors & REF_ERR_NO_DIR_ITEM)
840                 fprintf(stderr, ", no dir item");
841         if (errors & REF_ERR_NO_DIR_INDEX)
842                 fprintf(stderr, ", no dir index");
843         if (errors & REF_ERR_NO_INODE_REF)
844                 fprintf(stderr, ", no inode ref");
845         if (errors & REF_ERR_DUP_DIR_ITEM)
846                 fprintf(stderr, ", dup dir item");
847         if (errors & REF_ERR_DUP_DIR_INDEX)
848                 fprintf(stderr, ", dup dir index");
849         if (errors & REF_ERR_DUP_INODE_REF)
850                 fprintf(stderr, ", dup inode ref");
851         if (errors & REF_ERR_INDEX_UNMATCH)
852                 fprintf(stderr, ", index mismatch");
853         if (errors & REF_ERR_FILETYPE_UNMATCH)
854                 fprintf(stderr, ", filetype mismatch");
855         if (errors & REF_ERR_NAME_TOO_LONG)
856                 fprintf(stderr, ", name too long");
857         if (errors & REF_ERR_NO_ROOT_REF)
858                 fprintf(stderr, ", no root ref");
859         if (errors & REF_ERR_NO_ROOT_BACKREF)
860                 fprintf(stderr, ", no root backref");
861         if (errors & REF_ERR_DUP_ROOT_REF)
862                 fprintf(stderr, ", dup root ref");
863         if (errors & REF_ERR_DUP_ROOT_BACKREF)
864                 fprintf(stderr, ", dup root backref");
865         fprintf(stderr, "\n");
866 }
867
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869                                           u64 ino, int mod)
870 {
871         struct ptr_node *node;
872         struct cache_extent *cache;
873         struct inode_record *rec = NULL;
874         int ret;
875
876         cache = lookup_cache_extent(inode_cache, ino, 1);
877         if (cache) {
878                 node = container_of(cache, struct ptr_node, cache);
879                 rec = node->data;
880                 if (mod && rec->refs > 1) {
881                         node->data = clone_inode_rec(rec);
882                         if (IS_ERR(node->data))
883                                 return node->data;
884                         rec->refs--;
885                         rec = node->data;
886                 }
887         } else if (mod) {
888                 rec = calloc(1, sizeof(*rec));
889                 if (!rec)
890                         return ERR_PTR(-ENOMEM);
891                 rec->ino = ino;
892                 rec->extent_start = (u64)-1;
893                 rec->refs = 1;
894                 INIT_LIST_HEAD(&rec->backrefs);
895                 INIT_LIST_HEAD(&rec->orphan_extents);
896                 rec->holes = RB_ROOT;
897
898                 node = malloc(sizeof(*node));
899                 if (!node) {
900                         free(rec);
901                         return ERR_PTR(-ENOMEM);
902                 }
903                 node->cache.start = ino;
904                 node->cache.size = 1;
905                 node->data = rec;
906
907                 if (ino == BTRFS_FREE_INO_OBJECTID)
908                         rec->found_link = 1;
909
910                 ret = insert_cache_extent(inode_cache, &node->cache);
911                 if (ret)
912                         return ERR_PTR(-EEXIST);
913         }
914         return rec;
915 }
916
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 {
919         struct orphan_data_extent *orphan;
920
921         while (!list_empty(orphan_extents)) {
922                 orphan = list_entry(orphan_extents->next,
923                                     struct orphan_data_extent, list);
924                 list_del(&orphan->list);
925                 free(orphan);
926         }
927 }
928
929 static void free_inode_rec(struct inode_record *rec)
930 {
931         struct inode_backref *backref;
932
933         if (--rec->refs > 0)
934                 return;
935
936         while (!list_empty(&rec->backrefs)) {
937                 backref = to_inode_backref(rec->backrefs.next);
938                 list_del(&backref->list);
939                 free(backref);
940         }
941         free_orphan_data_extents(&rec->orphan_extents);
942         free_file_extent_holes(&rec->holes);
943         free(rec);
944 }
945
946 static int can_free_inode_rec(struct inode_record *rec)
947 {
948         if (!rec->errors && rec->checked && rec->found_inode_item &&
949             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
950                 return 1;
951         return 0;
952 }
953
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955                                  struct inode_record *rec)
956 {
957         struct cache_extent *cache;
958         struct inode_backref *tmp, *backref;
959         struct ptr_node *node;
960         u8 filetype;
961
962         if (!rec->found_inode_item)
963                 return;
964
965         filetype = imode_to_type(rec->imode);
966         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967                 if (backref->found_dir_item && backref->found_dir_index) {
968                         if (backref->filetype != filetype)
969                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970                         if (!backref->errors && backref->found_inode_ref &&
971                             rec->nlink == rec->found_link) {
972                                 list_del(&backref->list);
973                                 free(backref);
974                         }
975                 }
976         }
977
978         if (!rec->checked || rec->merging)
979                 return;
980
981         if (S_ISDIR(rec->imode)) {
982                 if (rec->found_size != rec->isize)
983                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984                 if (rec->found_file_extent)
985                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
986         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987                 if (rec->found_dir_item)
988                         rec->errors |= I_ERR_ODD_DIR_ITEM;
989                 if (rec->found_size != rec->nbytes)
990                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991                 if (rec->nlink > 0 && !no_holes &&
992                     (rec->extent_end < rec->isize ||
993                      first_extent_gap(&rec->holes) < rec->isize))
994                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995         }
996
997         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998                 if (rec->found_csum_item && rec->nodatasum)
999                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000                 if (rec->some_csum_missing && !rec->nodatasum)
1001                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002         }
1003
1004         BUG_ON(rec->refs != 1);
1005         if (can_free_inode_rec(rec)) {
1006                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007                 node = container_of(cache, struct ptr_node, cache);
1008                 BUG_ON(node->data != rec);
1009                 remove_cache_extent(inode_cache, &node->cache);
1010                 free(node);
1011                 free_inode_rec(rec);
1012         }
1013 }
1014
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 {
1017         struct btrfs_path path;
1018         struct btrfs_key key;
1019         int ret;
1020
1021         key.objectid = BTRFS_ORPHAN_OBJECTID;
1022         key.type = BTRFS_ORPHAN_ITEM_KEY;
1023         key.offset = ino;
1024
1025         btrfs_init_path(&path);
1026         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027         btrfs_release_path(&path);
1028         if (ret > 0)
1029                 ret = -ENOENT;
1030         return ret;
1031 }
1032
1033 static int process_inode_item(struct extent_buffer *eb,
1034                               int slot, struct btrfs_key *key,
1035                               struct shared_node *active_node)
1036 {
1037         struct inode_record *rec;
1038         struct btrfs_inode_item *item;
1039
1040         rec = active_node->current;
1041         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042         if (rec->found_inode_item) {
1043                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044                 return 1;
1045         }
1046         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047         rec->nlink = btrfs_inode_nlink(eb, item);
1048         rec->isize = btrfs_inode_size(eb, item);
1049         rec->nbytes = btrfs_inode_nbytes(eb, item);
1050         rec->imode = btrfs_inode_mode(eb, item);
1051         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052                 rec->nodatasum = 1;
1053         rec->found_inode_item = 1;
1054         if (rec->nlink == 0)
1055                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056         maybe_free_inode_rec(&active_node->inode_cache, rec);
1057         return 0;
1058 }
1059
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061                                                 const char *name,
1062                                                 int namelen, u64 dir)
1063 {
1064         struct inode_backref *backref;
1065
1066         list_for_each_entry(backref, &rec->backrefs, list) {
1067                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068                         break;
1069                 if (backref->dir != dir || backref->namelen != namelen)
1070                         continue;
1071                 if (memcmp(name, backref->name, namelen))
1072                         continue;
1073                 return backref;
1074         }
1075
1076         backref = malloc(sizeof(*backref) + namelen + 1);
1077         if (!backref)
1078                 return NULL;
1079         memset(backref, 0, sizeof(*backref));
1080         backref->dir = dir;
1081         backref->namelen = namelen;
1082         memcpy(backref->name, name, namelen);
1083         backref->name[namelen] = '\0';
1084         list_add_tail(&backref->list, &rec->backrefs);
1085         return backref;
1086 }
1087
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089                              u64 ino, u64 dir, u64 index,
1090                              const char *name, int namelen,
1091                              u8 filetype, u8 itemtype, int errors)
1092 {
1093         struct inode_record *rec;
1094         struct inode_backref *backref;
1095
1096         rec = get_inode_rec(inode_cache, ino, 1);
1097         BUG_ON(IS_ERR(rec));
1098         backref = get_inode_backref(rec, name, namelen, dir);
1099         BUG_ON(!backref);
1100         if (errors)
1101                 backref->errors |= errors;
1102         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103                 if (backref->found_dir_index)
1104                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105                 if (backref->found_inode_ref && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 if (backref->found_dir_item && backref->filetype != filetype)
1108                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109
1110                 backref->index = index;
1111                 backref->filetype = filetype;
1112                 backref->found_dir_index = 1;
1113         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114                 rec->found_link++;
1115                 if (backref->found_dir_item)
1116                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117                 if (backref->found_dir_index && backref->filetype != filetype)
1118                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119
1120                 backref->filetype = filetype;
1121                 backref->found_dir_item = 1;
1122         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124                 if (backref->found_inode_ref)
1125                         backref->errors |= REF_ERR_DUP_INODE_REF;
1126                 if (backref->found_dir_index && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 else
1129                         backref->index = index;
1130
1131                 backref->ref_type = itemtype;
1132                 backref->found_inode_ref = 1;
1133         } else {
1134                 BUG_ON(1);
1135         }
1136
1137         maybe_free_inode_rec(inode_cache, rec);
1138         return 0;
1139 }
1140
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142                             struct cache_tree *dst_cache)
1143 {
1144         struct inode_backref *backref;
1145         u32 dir_count = 0;
1146         int ret = 0;
1147
1148         dst->merging = 1;
1149         list_for_each_entry(backref, &src->backrefs, list) {
1150                 if (backref->found_dir_index) {
1151                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1152                                         backref->index, backref->name,
1153                                         backref->namelen, backref->filetype,
1154                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1155                 }
1156                 if (backref->found_dir_item) {
1157                         dir_count++;
1158                         add_inode_backref(dst_cache, dst->ino,
1159                                         backref->dir, 0, backref->name,
1160                                         backref->namelen, backref->filetype,
1161                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1162                 }
1163                 if (backref->found_inode_ref) {
1164                         add_inode_backref(dst_cache, dst->ino,
1165                                         backref->dir, backref->index,
1166                                         backref->name, backref->namelen, 0,
1167                                         backref->ref_type, backref->errors);
1168                 }
1169         }
1170
1171         if (src->found_dir_item)
1172                 dst->found_dir_item = 1;
1173         if (src->found_file_extent)
1174                 dst->found_file_extent = 1;
1175         if (src->found_csum_item)
1176                 dst->found_csum_item = 1;
1177         if (src->some_csum_missing)
1178                 dst->some_csum_missing = 1;
1179         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1181                 if (ret < 0)
1182                         return ret;
1183         }
1184
1185         BUG_ON(src->found_link < dir_count);
1186         dst->found_link += src->found_link - dir_count;
1187         dst->found_size += src->found_size;
1188         if (src->extent_start != (u64)-1) {
1189                 if (dst->extent_start == (u64)-1) {
1190                         dst->extent_start = src->extent_start;
1191                         dst->extent_end = src->extent_end;
1192                 } else {
1193                         if (dst->extent_end > src->extent_start)
1194                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195                         else if (dst->extent_end < src->extent_start) {
1196                                 ret = add_file_extent_hole(&dst->holes,
1197                                         dst->extent_end,
1198                                         src->extent_start - dst->extent_end);
1199                         }
1200                         if (dst->extent_end < src->extent_end)
1201                                 dst->extent_end = src->extent_end;
1202                 }
1203         }
1204
1205         dst->errors |= src->errors;
1206         if (src->found_inode_item) {
1207                 if (!dst->found_inode_item) {
1208                         dst->nlink = src->nlink;
1209                         dst->isize = src->isize;
1210                         dst->nbytes = src->nbytes;
1211                         dst->imode = src->imode;
1212                         dst->nodatasum = src->nodatasum;
1213                         dst->found_inode_item = 1;
1214                 } else {
1215                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1216                 }
1217         }
1218         dst->merging = 0;
1219
1220         return 0;
1221 }
1222
1223 static int splice_shared_node(struct shared_node *src_node,
1224                               struct shared_node *dst_node)
1225 {
1226         struct cache_extent *cache;
1227         struct ptr_node *node, *ins;
1228         struct cache_tree *src, *dst;
1229         struct inode_record *rec, *conflict;
1230         u64 current_ino = 0;
1231         int splice = 0;
1232         int ret;
1233
1234         if (--src_node->refs == 0)
1235                 splice = 1;
1236         if (src_node->current)
1237                 current_ino = src_node->current->ino;
1238
1239         src = &src_node->root_cache;
1240         dst = &dst_node->root_cache;
1241 again:
1242         cache = search_cache_extent(src, 0);
1243         while (cache) {
1244                 node = container_of(cache, struct ptr_node, cache);
1245                 rec = node->data;
1246                 cache = next_cache_extent(cache);
1247
1248                 if (splice) {
1249                         remove_cache_extent(src, &node->cache);
1250                         ins = node;
1251                 } else {
1252                         ins = malloc(sizeof(*ins));
1253                         BUG_ON(!ins);
1254                         ins->cache.start = node->cache.start;
1255                         ins->cache.size = node->cache.size;
1256                         ins->data = rec;
1257                         rec->refs++;
1258                 }
1259                 ret = insert_cache_extent(dst, &ins->cache);
1260                 if (ret == -EEXIST) {
1261                         conflict = get_inode_rec(dst, rec->ino, 1);
1262                         BUG_ON(IS_ERR(conflict));
1263                         merge_inode_recs(rec, conflict, dst);
1264                         if (rec->checked) {
1265                                 conflict->checked = 1;
1266                                 if (dst_node->current == conflict)
1267                                         dst_node->current = NULL;
1268                         }
1269                         maybe_free_inode_rec(dst, conflict);
1270                         free_inode_rec(rec);
1271                         free(ins);
1272                 } else {
1273                         BUG_ON(ret);
1274                 }
1275         }
1276
1277         if (src == &src_node->root_cache) {
1278                 src = &src_node->inode_cache;
1279                 dst = &dst_node->inode_cache;
1280                 goto again;
1281         }
1282
1283         if (current_ino > 0 && (!dst_node->current ||
1284             current_ino > dst_node->current->ino)) {
1285                 if (dst_node->current) {
1286                         dst_node->current->checked = 1;
1287                         maybe_free_inode_rec(dst, dst_node->current);
1288                 }
1289                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290                 BUG_ON(IS_ERR(dst_node->current));
1291         }
1292         return 0;
1293 }
1294
1295 static void free_inode_ptr(struct cache_extent *cache)
1296 {
1297         struct ptr_node *node;
1298         struct inode_record *rec;
1299
1300         node = container_of(cache, struct ptr_node, cache);
1301         rec = node->data;
1302         free_inode_rec(rec);
1303         free(node);
1304 }
1305
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309                                             u64 bytenr)
1310 {
1311         struct cache_extent *cache;
1312         struct shared_node *node;
1313
1314         cache = lookup_cache_extent(shared, bytenr, 1);
1315         if (cache) {
1316                 node = container_of(cache, struct shared_node, cache);
1317                 return node;
1318         }
1319         return NULL;
1320 }
1321
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 {
1324         int ret;
1325         struct shared_node *node;
1326
1327         node = calloc(1, sizeof(*node));
1328         if (!node)
1329                 return -ENOMEM;
1330         node->cache.start = bytenr;
1331         node->cache.size = 1;
1332         cache_tree_init(&node->root_cache);
1333         cache_tree_init(&node->inode_cache);
1334         node->refs = refs;
1335
1336         ret = insert_cache_extent(shared, &node->cache);
1337
1338         return ret;
1339 }
1340
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342                              struct walk_control *wc, int level)
1343 {
1344         struct shared_node *node;
1345         struct shared_node *dest;
1346         int ret;
1347
1348         if (level == wc->active_node)
1349                 return 0;
1350
1351         BUG_ON(wc->active_node <= level);
1352         node = find_shared_node(&wc->shared, bytenr);
1353         if (!node) {
1354                 ret = add_shared_node(&wc->shared, bytenr, refs);
1355                 BUG_ON(ret);
1356                 node = find_shared_node(&wc->shared, bytenr);
1357                 wc->nodes[level] = node;
1358                 wc->active_node = level;
1359                 return 0;
1360         }
1361
1362         if (wc->root_level == wc->active_node &&
1363             btrfs_root_refs(&root->root_item) == 0) {
1364                 if (--node->refs == 0) {
1365                         free_inode_recs_tree(&node->root_cache);
1366                         free_inode_recs_tree(&node->inode_cache);
1367                         remove_cache_extent(&wc->shared, &node->cache);
1368                         free(node);
1369                 }
1370                 return 1;
1371         }
1372
1373         dest = wc->nodes[wc->active_node];
1374         splice_shared_node(node, dest);
1375         if (node->refs == 0) {
1376                 remove_cache_extent(&wc->shared, &node->cache);
1377                 free(node);
1378         }
1379         return 1;
1380 }
1381
1382 static int leave_shared_node(struct btrfs_root *root,
1383                              struct walk_control *wc, int level)
1384 {
1385         struct shared_node *node;
1386         struct shared_node *dest;
1387         int i;
1388
1389         if (level == wc->root_level)
1390                 return 0;
1391
1392         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1393                 if (wc->nodes[i])
1394                         break;
1395         }
1396         BUG_ON(i >= BTRFS_MAX_LEVEL);
1397
1398         node = wc->nodes[wc->active_node];
1399         wc->nodes[wc->active_node] = NULL;
1400         wc->active_node = i;
1401
1402         dest = wc->nodes[wc->active_node];
1403         if (wc->active_node < wc->root_level ||
1404             btrfs_root_refs(&root->root_item) > 0) {
1405                 BUG_ON(node->refs <= 1);
1406                 splice_shared_node(node, dest);
1407         } else {
1408                 BUG_ON(node->refs < 2);
1409                 node->refs--;
1410         }
1411         return 0;
1412 }
1413
1414 /*
1415  * Returns:
1416  * < 0 - on error
1417  * 1   - if the root with id child_root_id is a child of root parent_root_id
1418  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1419  *       has other root(s) as parent(s)
1420  * 2   - if the root child_root_id doesn't have any parent roots
1421  */
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423                          u64 child_root_id)
1424 {
1425         struct btrfs_path path;
1426         struct btrfs_key key;
1427         struct extent_buffer *leaf;
1428         int has_parent = 0;
1429         int ret;
1430
1431         btrfs_init_path(&path);
1432
1433         key.objectid = parent_root_id;
1434         key.type = BTRFS_ROOT_REF_KEY;
1435         key.offset = child_root_id;
1436         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1437                                 0, 0);
1438         if (ret < 0)
1439                 return ret;
1440         btrfs_release_path(&path);
1441         if (!ret)
1442                 return 1;
1443
1444         key.objectid = child_root_id;
1445         key.type = BTRFS_ROOT_BACKREF_KEY;
1446         key.offset = 0;
1447         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1448                                 0, 0);
1449         if (ret < 0)
1450                 goto out;
1451
1452         while (1) {
1453                 leaf = path.nodes[0];
1454                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456                         if (ret)
1457                                 break;
1458                         leaf = path.nodes[0];
1459                 }
1460
1461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462                 if (key.objectid != child_root_id ||
1463                     key.type != BTRFS_ROOT_BACKREF_KEY)
1464                         break;
1465
1466                 has_parent = 1;
1467
1468                 if (key.offset == parent_root_id) {
1469                         btrfs_release_path(&path);
1470                         return 1;
1471                 }
1472
1473                 path.slots[0]++;
1474         }
1475 out:
1476         btrfs_release_path(&path);
1477         if (ret < 0)
1478                 return ret;
1479         return has_parent ? 0 : 2;
1480 }
1481
1482 static int process_dir_item(struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (cur + sizeof(*di) + name_len > total ||
1517                     name_len > BTRFS_NAME_LEN) {
1518                         error = REF_ERR_NAME_TOO_LONG;
1519
1520                         if (cur + sizeof(*di) > total)
1521                                 break;
1522                         len = min_t(u32, total - cur - sizeof(*di),
1523                                     BTRFS_NAME_LEN);
1524                 } else {
1525                         len = name_len;
1526                         error = 0;
1527                 }
1528
1529                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530
1531                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1532                         add_inode_backref(inode_cache, location.objectid,
1533                                           key->objectid, key->offset, namebuf,
1534                                           len, filetype, key->type, error);
1535                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1536                         add_inode_backref(root_cache, location.objectid,
1537                                           key->objectid, key->offset,
1538                                           namebuf, len, filetype,
1539                                           key->type, error);
1540                 } else {
1541                         fprintf(stderr, "invalid location in dir item %u\n",
1542                                 location.type);
1543                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1544                                           key->objectid, key->offset, namebuf,
1545                                           len, filetype, key->type, error);
1546                 }
1547
1548                 len = sizeof(*di) + name_len + data_len;
1549                 di = (struct btrfs_dir_item *)((char *)di + len);
1550                 cur += len;
1551         }
1552         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1553                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1554
1555         return 0;
1556 }
1557
1558 static int process_inode_ref(struct extent_buffer *eb,
1559                              int slot, struct btrfs_key *key,
1560                              struct shared_node *active_node)
1561 {
1562         u32 total;
1563         u32 cur = 0;
1564         u32 len;
1565         u32 name_len;
1566         u64 index;
1567         int error;
1568         struct cache_tree *inode_cache;
1569         struct btrfs_inode_ref *ref;
1570         char namebuf[BTRFS_NAME_LEN];
1571
1572         inode_cache = &active_node->inode_cache;
1573
1574         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1575         total = btrfs_item_size_nr(eb, slot);
1576         while (cur < total) {
1577                 name_len = btrfs_inode_ref_name_len(eb, ref);
1578                 index = btrfs_inode_ref_index(eb, ref);
1579
1580                 /* inode_ref + namelen should not cross item boundary */
1581                 if (cur + sizeof(*ref) + name_len > total ||
1582                     name_len > BTRFS_NAME_LEN) {
1583                         if (total < cur + sizeof(*ref))
1584                                 break;
1585
1586                         /* Still try to read out the remaining part */
1587                         len = min_t(u32, total - cur - sizeof(*ref),
1588                                     BTRFS_NAME_LEN);
1589                         error = REF_ERR_NAME_TOO_LONG;
1590                 } else {
1591                         len = name_len;
1592                         error = 0;
1593                 }
1594
1595                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1596                 add_inode_backref(inode_cache, key->objectid, key->offset,
1597                                   index, namebuf, len, 0, key->type, error);
1598
1599                 len = sizeof(*ref) + name_len;
1600                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1601                 cur += len;
1602         }
1603         return 0;
1604 }
1605
1606 static int process_inode_extref(struct extent_buffer *eb,
1607                                 int slot, struct btrfs_key *key,
1608                                 struct shared_node *active_node)
1609 {
1610         u32 total;
1611         u32 cur = 0;
1612         u32 len;
1613         u32 name_len;
1614         u64 index;
1615         u64 parent;
1616         int error;
1617         struct cache_tree *inode_cache;
1618         struct btrfs_inode_extref *extref;
1619         char namebuf[BTRFS_NAME_LEN];
1620
1621         inode_cache = &active_node->inode_cache;
1622
1623         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1624         total = btrfs_item_size_nr(eb, slot);
1625         while (cur < total) {
1626                 name_len = btrfs_inode_extref_name_len(eb, extref);
1627                 index = btrfs_inode_extref_index(eb, extref);
1628                 parent = btrfs_inode_extref_parent(eb, extref);
1629                 if (name_len <= BTRFS_NAME_LEN) {
1630                         len = name_len;
1631                         error = 0;
1632                 } else {
1633                         len = BTRFS_NAME_LEN;
1634                         error = REF_ERR_NAME_TOO_LONG;
1635                 }
1636                 read_extent_buffer(eb, namebuf,
1637                                    (unsigned long)(extref + 1), len);
1638                 add_inode_backref(inode_cache, key->objectid, parent,
1639                                   index, namebuf, len, 0, key->type, error);
1640
1641                 len = sizeof(*extref) + name_len;
1642                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1643                 cur += len;
1644         }
1645         return 0;
1646
1647 }
1648
1649 static int count_csum_range(struct btrfs_root *root, u64 start,
1650                             u64 len, u64 *found)
1651 {
1652         struct btrfs_key key;
1653         struct btrfs_path path;
1654         struct extent_buffer *leaf;
1655         int ret;
1656         size_t size;
1657         *found = 0;
1658         u64 csum_end;
1659         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1660
1661         btrfs_init_path(&path);
1662
1663         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1664         key.offset = start;
1665         key.type = BTRFS_EXTENT_CSUM_KEY;
1666
1667         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1668                                 &key, &path, 0, 0);
1669         if (ret < 0)
1670                 goto out;
1671         if (ret > 0 && path.slots[0] > 0) {
1672                 leaf = path.nodes[0];
1673                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1674                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1675                     key.type == BTRFS_EXTENT_CSUM_KEY)
1676                         path.slots[0]--;
1677         }
1678
1679         while (len > 0) {
1680                 leaf = path.nodes[0];
1681                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1682                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1683                         if (ret > 0)
1684                                 break;
1685                         else if (ret < 0)
1686                                 goto out;
1687                         leaf = path.nodes[0];
1688                 }
1689
1690                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1691                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1692                     key.type != BTRFS_EXTENT_CSUM_KEY)
1693                         break;
1694
1695                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1696                 if (key.offset >= start + len)
1697                         break;
1698
1699                 if (key.offset > start)
1700                         start = key.offset;
1701
1702                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1703                 csum_end = key.offset + (size / csum_size) *
1704                            root->fs_info->sectorsize;
1705                 if (csum_end > start) {
1706                         size = min(csum_end - start, len);
1707                         len -= size;
1708                         start += size;
1709                         *found += size;
1710                 }
1711
1712                 path.slots[0]++;
1713         }
1714 out:
1715         btrfs_release_path(&path);
1716         if (ret < 0)
1717                 return ret;
1718         return 0;
1719 }
1720
1721 static int process_file_extent(struct btrfs_root *root,
1722                                 struct extent_buffer *eb,
1723                                 int slot, struct btrfs_key *key,
1724                                 struct shared_node *active_node)
1725 {
1726         struct inode_record *rec;
1727         struct btrfs_file_extent_item *fi;
1728         u64 num_bytes = 0;
1729         u64 disk_bytenr = 0;
1730         u64 extent_offset = 0;
1731         u64 mask = root->fs_info->sectorsize - 1;
1732         int extent_type;
1733         int ret;
1734
1735         rec = active_node->current;
1736         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1737         rec->found_file_extent = 1;
1738
1739         if (rec->extent_start == (u64)-1) {
1740                 rec->extent_start = key->offset;
1741                 rec->extent_end = key->offset;
1742         }
1743
1744         if (rec->extent_end > key->offset)
1745                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1746         else if (rec->extent_end < key->offset) {
1747                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1748                                            key->offset - rec->extent_end);
1749                 if (ret < 0)
1750                         return ret;
1751         }
1752
1753         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1754         extent_type = btrfs_file_extent_type(eb, fi);
1755
1756         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1757                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1758                 if (num_bytes == 0)
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 rec->found_size += num_bytes;
1761                 num_bytes = (num_bytes + mask) & ~mask;
1762         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1763                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1764                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1765                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1766                 extent_offset = btrfs_file_extent_offset(eb, fi);
1767                 if (num_bytes == 0 || (num_bytes & mask))
1768                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769                 if (num_bytes + extent_offset >
1770                     btrfs_file_extent_ram_bytes(eb, fi))
1771                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1772                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1773                     (btrfs_file_extent_compression(eb, fi) ||
1774                      btrfs_file_extent_encryption(eb, fi) ||
1775                      btrfs_file_extent_other_encoding(eb, fi)))
1776                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777                 if (disk_bytenr > 0)
1778                         rec->found_size += num_bytes;
1779         } else {
1780                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1781         }
1782         rec->extent_end = key->offset + num_bytes;
1783
1784         /*
1785          * The data reloc tree will copy full extents into its inode and then
1786          * copy the corresponding csums.  Because the extent it copied could be
1787          * a preallocated extent that hasn't been written to yet there may be no
1788          * csums to copy, ergo we won't have csums for our file extent.  This is
1789          * ok so just don't bother checking csums if the inode belongs to the
1790          * data reloc tree.
1791          */
1792         if (disk_bytenr > 0 &&
1793             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1794                 u64 found;
1795                 if (btrfs_file_extent_compression(eb, fi))
1796                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1797                 else
1798                         disk_bytenr += extent_offset;
1799
1800                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1801                 if (ret < 0)
1802                         return ret;
1803                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1804                         if (found > 0)
1805                                 rec->found_csum_item = 1;
1806                         if (found < num_bytes)
1807                                 rec->some_csum_missing = 1;
1808                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1809                         if (found > 0)
1810                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1811                 }
1812         }
1813         return 0;
1814 }
1815
1816 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1817                             struct walk_control *wc)
1818 {
1819         struct btrfs_key key;
1820         u32 nritems;
1821         int i;
1822         int ret = 0;
1823         struct cache_tree *inode_cache;
1824         struct shared_node *active_node;
1825
1826         if (wc->root_level == wc->active_node &&
1827             btrfs_root_refs(&root->root_item) == 0)
1828                 return 0;
1829
1830         active_node = wc->nodes[wc->active_node];
1831         inode_cache = &active_node->inode_cache;
1832         nritems = btrfs_header_nritems(eb);
1833         for (i = 0; i < nritems; i++) {
1834                 btrfs_item_key_to_cpu(eb, &key, i);
1835
1836                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1837                         continue;
1838                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1839                         continue;
1840
1841                 if (active_node->current == NULL ||
1842                     active_node->current->ino < key.objectid) {
1843                         if (active_node->current) {
1844                                 active_node->current->checked = 1;
1845                                 maybe_free_inode_rec(inode_cache,
1846                                                      active_node->current);
1847                         }
1848                         active_node->current = get_inode_rec(inode_cache,
1849                                                              key.objectid, 1);
1850                         BUG_ON(IS_ERR(active_node->current));
1851                 }
1852                 switch (key.type) {
1853                 case BTRFS_DIR_ITEM_KEY:
1854                 case BTRFS_DIR_INDEX_KEY:
1855                         ret = process_dir_item(eb, i, &key, active_node);
1856                         break;
1857                 case BTRFS_INODE_REF_KEY:
1858                         ret = process_inode_ref(eb, i, &key, active_node);
1859                         break;
1860                 case BTRFS_INODE_EXTREF_KEY:
1861                         ret = process_inode_extref(eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_ITEM_KEY:
1864                         ret = process_inode_item(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_EXTENT_DATA_KEY:
1867                         ret = process_file_extent(root, eb, i, &key,
1868                                                   active_node);
1869                         break;
1870                 default:
1871                         break;
1872                 };
1873         }
1874         return ret;
1875 }
1876
1877 struct node_refs {
1878         u64 bytenr[BTRFS_MAX_LEVEL];
1879         u64 refs[BTRFS_MAX_LEVEL];
1880         int need_check[BTRFS_MAX_LEVEL];
1881 };
1882
1883 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1884                              struct node_refs *nrefs, u64 level);
1885 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1886                             unsigned int ext_ref);
1887
1888 /*
1889  * Returns >0  Found error, not fatal, should continue
1890  * Returns <0  Fatal error, must exit the whole check
1891  * Returns 0   No errors found
1892  */
1893 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1894                                struct node_refs *nrefs, int *level, int ext_ref)
1895 {
1896         struct extent_buffer *cur = path->nodes[0];
1897         struct btrfs_key key;
1898         u64 cur_bytenr;
1899         u32 nritems;
1900         u64 first_ino = 0;
1901         int root_level = btrfs_header_level(root->node);
1902         int i;
1903         int ret = 0; /* Final return value */
1904         int err = 0; /* Positive error bitmap */
1905
1906         cur_bytenr = cur->start;
1907
1908         /* skip to first inode item or the first inode number change */
1909         nritems = btrfs_header_nritems(cur);
1910         for (i = 0; i < nritems; i++) {
1911                 btrfs_item_key_to_cpu(cur, &key, i);
1912                 if (i == 0)
1913                         first_ino = key.objectid;
1914                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1915                     (first_ino && first_ino != key.objectid))
1916                         break;
1917         }
1918         if (i == nritems) {
1919                 path->slots[0] = nritems;
1920                 return 0;
1921         }
1922         path->slots[0] = i;
1923
1924 again:
1925         err |= check_inode_item(root, path, ext_ref);
1926
1927         if (err & LAST_ITEM)
1928                 goto out;
1929
1930         /* still have inode items in thie leaf */
1931         if (cur->start == cur_bytenr)
1932                 goto again;
1933
1934         /*
1935          * we have switched to another leaf, above nodes may
1936          * have changed, here walk down the path, if a node
1937          * or leaf is shared, check whether we can skip this
1938          * node or leaf.
1939          */
1940         for (i = root_level; i >= 0; i--) {
1941                 if (path->nodes[i]->start == nrefs->bytenr[i])
1942                         continue;
1943
1944                 ret = update_nodes_refs(root,
1945                                 path->nodes[i]->start,
1946                                 nrefs, i);
1947                 if (ret)
1948                         goto out;
1949
1950                 if (!nrefs->need_check[i]) {
1951                         *level += 1;
1952                         break;
1953                 }
1954         }
1955
1956         for (i = 0; i < *level; i++) {
1957                 free_extent_buffer(path->nodes[i]);
1958                 path->nodes[i] = NULL;
1959         }
1960 out:
1961         err &= ~LAST_ITEM;
1962         if (err && !ret)
1963                 ret = err;
1964         return ret;
1965 }
1966
1967 static void reada_walk_down(struct btrfs_root *root,
1968                             struct extent_buffer *node, int slot)
1969 {
1970         u64 bytenr;
1971         u64 ptr_gen;
1972         u32 nritems;
1973         u32 blocksize;
1974         int i;
1975         int level;
1976
1977         level = btrfs_header_level(node);
1978         if (level != 1)
1979                 return;
1980
1981         nritems = btrfs_header_nritems(node);
1982         blocksize = root->fs_info->nodesize;
1983         for (i = slot; i < nritems; i++) {
1984                 bytenr = btrfs_node_blockptr(node, i);
1985                 ptr_gen = btrfs_node_ptr_generation(node, i);
1986                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1987         }
1988 }
1989
1990 /*
1991  * Check the child node/leaf by the following condition:
1992  * 1. the first item key of the node/leaf should be the same with the one
1993  *    in parent.
1994  * 2. block in parent node should match the child node/leaf.
1995  * 3. generation of parent node and child's header should be consistent.
1996  *
1997  * Or the child node/leaf pointed by the key in parent is not valid.
1998  *
1999  * We hope to check leaf owner too, but since subvol may share leaves,
2000  * which makes leaf owner check not so strong, key check should be
2001  * sufficient enough for that case.
2002  */
2003 static int check_child_node(struct extent_buffer *parent, int slot,
2004                             struct extent_buffer *child)
2005 {
2006         struct btrfs_key parent_key;
2007         struct btrfs_key child_key;
2008         int ret = 0;
2009
2010         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2011         if (btrfs_header_level(child) == 0)
2012                 btrfs_item_key_to_cpu(child, &child_key, 0);
2013         else
2014                 btrfs_node_key_to_cpu(child, &child_key, 0);
2015
2016         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2017                 ret = -EINVAL;
2018                 fprintf(stderr,
2019                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2020                         parent_key.objectid, parent_key.type, parent_key.offset,
2021                         child_key.objectid, child_key.type, child_key.offset);
2022         }
2023         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2024                 ret = -EINVAL;
2025                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2026                         btrfs_node_blockptr(parent, slot),
2027                         btrfs_header_bytenr(child));
2028         }
2029         if (btrfs_node_ptr_generation(parent, slot) !=
2030             btrfs_header_generation(child)) {
2031                 ret = -EINVAL;
2032                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2033                         btrfs_header_generation(child),
2034                         btrfs_node_ptr_generation(parent, slot));
2035         }
2036         return ret;
2037 }
2038
2039 /*
2040  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2041  * in every fs or file tree check. Here we find its all root ids, and only check
2042  * it in the fs or file tree which has the smallest root id.
2043  */
2044 static int need_check(struct btrfs_root *root, struct ulist *roots)
2045 {
2046         struct rb_node *node;
2047         struct ulist_node *u;
2048
2049         if (roots->nnodes == 1)
2050                 return 1;
2051
2052         node = rb_first(&roots->root);
2053         u = rb_entry(node, struct ulist_node, rb_node);
2054         /*
2055          * current root id is not smallest, we skip it and let it be checked
2056          * in the fs or file tree who hash the smallest root id.
2057          */
2058         if (root->objectid != u->val)
2059                 return 0;
2060
2061         return 1;
2062 }
2063
2064 /*
2065  * for a tree node or leaf, we record its reference count, so later if we still
2066  * process this node or leaf, don't need to compute its reference count again.
2067  */
2068 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2069                              struct node_refs *nrefs, u64 level)
2070 {
2071         int check, ret;
2072         u64 refs;
2073         struct ulist *roots;
2074
2075         if (nrefs->bytenr[level] != bytenr) {
2076                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2077                                        level, 1, &refs, NULL);
2078                 if (ret < 0)
2079                         return ret;
2080
2081                 nrefs->bytenr[level] = bytenr;
2082                 nrefs->refs[level] = refs;
2083                 if (refs > 1) {
2084                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2085                                                    0, &roots);
2086                         if (ret)
2087                                 return -EIO;
2088
2089                         check = need_check(root, roots);
2090                         ulist_free(roots);
2091                         nrefs->need_check[level] = check;
2092                 } else {
2093                         nrefs->need_check[level] = 1;
2094                 }
2095         }
2096
2097         return 0;
2098 }
2099
2100 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2101                           struct walk_control *wc, int *level,
2102                           struct node_refs *nrefs)
2103 {
2104         enum btrfs_tree_block_status status;
2105         u64 bytenr;
2106         u64 ptr_gen;
2107         struct btrfs_fs_info *fs_info = root->fs_info;
2108         struct extent_buffer *next;
2109         struct extent_buffer *cur;
2110         u32 blocksize;
2111         int ret, err = 0;
2112         u64 refs;
2113
2114         WARN_ON(*level < 0);
2115         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2116
2117         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2118                 refs = nrefs->refs[*level];
2119                 ret = 0;
2120         } else {
2121                 ret = btrfs_lookup_extent_info(NULL, root,
2122                                        path->nodes[*level]->start,
2123                                        *level, 1, &refs, NULL);
2124                 if (ret < 0) {
2125                         err = ret;
2126                         goto out;
2127                 }
2128                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2129                 nrefs->refs[*level] = refs;
2130         }
2131
2132         if (refs > 1) {
2133                 ret = enter_shared_node(root, path->nodes[*level]->start,
2134                                         refs, wc, *level);
2135                 if (ret > 0) {
2136                         err = ret;
2137                         goto out;
2138                 }
2139         }
2140
2141         while (*level >= 0) {
2142                 WARN_ON(*level < 0);
2143                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2144                 cur = path->nodes[*level];
2145
2146                 if (btrfs_header_level(cur) != *level)
2147                         WARN_ON(1);
2148
2149                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2150                         break;
2151                 if (*level == 0) {
2152                         ret = process_one_leaf(root, cur, wc);
2153                         if (ret < 0)
2154                                 err = ret;
2155                         break;
2156                 }
2157                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2158                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2159                 blocksize = fs_info->nodesize;
2160
2161                 if (bytenr == nrefs->bytenr[*level - 1]) {
2162                         refs = nrefs->refs[*level - 1];
2163                 } else {
2164                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2165                                         *level - 1, 1, &refs, NULL);
2166                         if (ret < 0) {
2167                                 refs = 0;
2168                         } else {
2169                                 nrefs->bytenr[*level - 1] = bytenr;
2170                                 nrefs->refs[*level - 1] = refs;
2171                         }
2172                 }
2173
2174                 if (refs > 1) {
2175                         ret = enter_shared_node(root, bytenr, refs,
2176                                                 wc, *level - 1);
2177                         if (ret > 0) {
2178                                 path->slots[*level]++;
2179                                 continue;
2180                         }
2181                 }
2182
2183                 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2184                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2185                         free_extent_buffer(next);
2186                         reada_walk_down(root, cur, path->slots[*level]);
2187                         next = read_tree_block(root->fs_info, bytenr, blocksize,
2188                                                ptr_gen);
2189                         if (!extent_buffer_uptodate(next)) {
2190                                 struct btrfs_key node_key;
2191
2192                                 btrfs_node_key_to_cpu(path->nodes[*level],
2193                                                       &node_key,
2194                                                       path->slots[*level]);
2195                                 btrfs_add_corrupt_extent_record(root->fs_info,
2196                                                 &node_key,
2197                                                 path->nodes[*level]->start,
2198                                                 root->fs_info->nodesize,
2199                                                 *level);
2200                                 err = -EIO;
2201                                 goto out;
2202                         }
2203                 }
2204
2205                 ret = check_child_node(cur, path->slots[*level], next);
2206                 if (ret) {
2207                         free_extent_buffer(next);
2208                         err = ret;
2209                         goto out;
2210                 }
2211
2212                 if (btrfs_is_leaf(next))
2213                         status = btrfs_check_leaf(root, NULL, next);
2214                 else
2215                         status = btrfs_check_node(root, NULL, next);
2216                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2217                         free_extent_buffer(next);
2218                         err = -EIO;
2219                         goto out;
2220                 }
2221
2222                 *level = *level - 1;
2223                 free_extent_buffer(path->nodes[*level]);
2224                 path->nodes[*level] = next;
2225                 path->slots[*level] = 0;
2226         }
2227 out:
2228         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2229         return err;
2230 }
2231
2232 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2233                             unsigned int ext_ref);
2234
2235 /*
2236  * Returns >0  Found error, should continue
2237  * Returns <0  Fatal error, must exit the whole check
2238  * Returns 0   No errors found
2239  */
2240 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2241                              int *level, struct node_refs *nrefs, int ext_ref)
2242 {
2243         enum btrfs_tree_block_status status;
2244         u64 bytenr;
2245         u64 ptr_gen;
2246         struct btrfs_fs_info *fs_info = root->fs_info;
2247         struct extent_buffer *next;
2248         struct extent_buffer *cur;
2249         u32 blocksize;
2250         int ret;
2251
2252         WARN_ON(*level < 0);
2253         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2254
2255         ret = update_nodes_refs(root, path->nodes[*level]->start,
2256                                 nrefs, *level);
2257         if (ret < 0)
2258                 return ret;
2259
2260         while (*level >= 0) {
2261                 WARN_ON(*level < 0);
2262                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2263                 cur = path->nodes[*level];
2264
2265                 if (btrfs_header_level(cur) != *level)
2266                         WARN_ON(1);
2267
2268                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2269                         break;
2270                 /* Don't forgot to check leaf/node validation */
2271                 if (*level == 0) {
2272                         ret = btrfs_check_leaf(root, NULL, cur);
2273                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2274                                 ret = -EIO;
2275                                 break;
2276                         }
2277                         ret = process_one_leaf_v2(root, path, nrefs,
2278                                                   level, ext_ref);
2279                         break;
2280                 } else {
2281                         ret = btrfs_check_node(root, NULL, cur);
2282                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2283                                 ret = -EIO;
2284                                 break;
2285                         }
2286                 }
2287                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2288                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2289                 blocksize = fs_info->nodesize;
2290
2291                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2292                 if (ret)
2293                         break;
2294                 if (!nrefs->need_check[*level - 1]) {
2295                         path->slots[*level]++;
2296                         continue;
2297                 }
2298
2299                 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2300                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2301                         free_extent_buffer(next);
2302                         reada_walk_down(root, cur, path->slots[*level]);
2303                         next = read_tree_block(fs_info, bytenr, blocksize,
2304                                                ptr_gen);
2305                         if (!extent_buffer_uptodate(next)) {
2306                                 struct btrfs_key node_key;
2307
2308                                 btrfs_node_key_to_cpu(path->nodes[*level],
2309                                                       &node_key,
2310                                                       path->slots[*level]);
2311                                 btrfs_add_corrupt_extent_record(fs_info,
2312                                                 &node_key,
2313                                                 path->nodes[*level]->start,
2314                                                 fs_info->nodesize,
2315                                                 *level);
2316                                 ret = -EIO;
2317                                 break;
2318                         }
2319                 }
2320
2321                 ret = check_child_node(cur, path->slots[*level], next);
2322                 if (ret < 0) 
2323                         break;
2324
2325                 if (btrfs_is_leaf(next))
2326                         status = btrfs_check_leaf(root, NULL, next);
2327                 else
2328                         status = btrfs_check_node(root, NULL, next);
2329                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2330                         free_extent_buffer(next);
2331                         ret = -EIO;
2332                         break;
2333                 }
2334
2335                 *level = *level - 1;
2336                 free_extent_buffer(path->nodes[*level]);
2337                 path->nodes[*level] = next;
2338                 path->slots[*level] = 0;
2339         }
2340         return ret;
2341 }
2342
2343 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2344                         struct walk_control *wc, int *level)
2345 {
2346         int i;
2347         struct extent_buffer *leaf;
2348
2349         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2350                 leaf = path->nodes[i];
2351                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2352                         path->slots[i]++;
2353                         *level = i;
2354                         return 0;
2355                 } else {
2356                         free_extent_buffer(path->nodes[*level]);
2357                         path->nodes[*level] = NULL;
2358                         BUG_ON(*level > wc->active_node);
2359                         if (*level == wc->active_node)
2360                                 leave_shared_node(root, wc, *level);
2361                         *level = i + 1;
2362                 }
2363         }
2364         return 1;
2365 }
2366
2367 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2368                            int *level)
2369 {
2370         int i;
2371         struct extent_buffer *leaf;
2372
2373         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2374                 leaf = path->nodes[i];
2375                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2376                         path->slots[i]++;
2377                         *level = i;
2378                         return 0;
2379                 } else {
2380                         free_extent_buffer(path->nodes[*level]);
2381                         path->nodes[*level] = NULL;
2382                         *level = i + 1;
2383                 }
2384         }
2385         return 1;
2386 }
2387
2388 static int check_root_dir(struct inode_record *rec)
2389 {
2390         struct inode_backref *backref;
2391         int ret = -1;
2392
2393         if (!rec->found_inode_item || rec->errors)
2394                 goto out;
2395         if (rec->nlink != 1 || rec->found_link != 0)
2396                 goto out;
2397         if (list_empty(&rec->backrefs))
2398                 goto out;
2399         backref = to_inode_backref(rec->backrefs.next);
2400         if (!backref->found_inode_ref)
2401                 goto out;
2402         if (backref->index != 0 || backref->namelen != 2 ||
2403             memcmp(backref->name, "..", 2))
2404                 goto out;
2405         if (backref->found_dir_index || backref->found_dir_item)
2406                 goto out;
2407         ret = 0;
2408 out:
2409         return ret;
2410 }
2411
2412 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2413                               struct btrfs_root *root, struct btrfs_path *path,
2414                               struct inode_record *rec)
2415 {
2416         struct btrfs_inode_item *ei;
2417         struct btrfs_key key;
2418         int ret;
2419
2420         key.objectid = rec->ino;
2421         key.type = BTRFS_INODE_ITEM_KEY;
2422         key.offset = (u64)-1;
2423
2424         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2425         if (ret < 0)
2426                 goto out;
2427         if (ret) {
2428                 if (!path->slots[0]) {
2429                         ret = -ENOENT;
2430                         goto out;
2431                 }
2432                 path->slots[0]--;
2433                 ret = 0;
2434         }
2435         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2436         if (key.objectid != rec->ino) {
2437                 ret = -ENOENT;
2438                 goto out;
2439         }
2440
2441         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2442                             struct btrfs_inode_item);
2443         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2444         btrfs_mark_buffer_dirty(path->nodes[0]);
2445         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2446         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2447                root->root_key.objectid);
2448 out:
2449         btrfs_release_path(path);
2450         return ret;
2451 }
2452
2453 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2454                                     struct btrfs_root *root,
2455                                     struct btrfs_path *path,
2456                                     struct inode_record *rec)
2457 {
2458         int ret;
2459
2460         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2461         btrfs_release_path(path);
2462         if (!ret)
2463                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2464         return ret;
2465 }
2466
2467 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2468                                struct btrfs_root *root,
2469                                struct btrfs_path *path,
2470                                struct inode_record *rec)
2471 {
2472         struct btrfs_inode_item *ei;
2473         struct btrfs_key key;
2474         int ret = 0;
2475
2476         key.objectid = rec->ino;
2477         key.type = BTRFS_INODE_ITEM_KEY;
2478         key.offset = 0;
2479
2480         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2481         if (ret) {
2482                 if (ret > 0)
2483                         ret = -ENOENT;
2484                 goto out;
2485         }
2486
2487         /* Since ret == 0, no need to check anything */
2488         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2489                             struct btrfs_inode_item);
2490         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2491         btrfs_mark_buffer_dirty(path->nodes[0]);
2492         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2493         printf("reset nbytes for ino %llu root %llu\n",
2494                rec->ino, root->root_key.objectid);
2495 out:
2496         btrfs_release_path(path);
2497         return ret;
2498 }
2499
2500 static int add_missing_dir_index(struct btrfs_root *root,
2501                                  struct cache_tree *inode_cache,
2502                                  struct inode_record *rec,
2503                                  struct inode_backref *backref)
2504 {
2505         struct btrfs_path path;
2506         struct btrfs_trans_handle *trans;
2507         struct btrfs_dir_item *dir_item;
2508         struct extent_buffer *leaf;
2509         struct btrfs_key key;
2510         struct btrfs_disk_key disk_key;
2511         struct inode_record *dir_rec;
2512         unsigned long name_ptr;
2513         u32 data_size = sizeof(*dir_item) + backref->namelen;
2514         int ret;
2515
2516         trans = btrfs_start_transaction(root, 1);
2517         if (IS_ERR(trans))
2518                 return PTR_ERR(trans);
2519
2520         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2521                 (unsigned long long)rec->ino);
2522
2523         btrfs_init_path(&path);
2524         key.objectid = backref->dir;
2525         key.type = BTRFS_DIR_INDEX_KEY;
2526         key.offset = backref->index;
2527         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2528         BUG_ON(ret);
2529
2530         leaf = path.nodes[0];
2531         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2532
2533         disk_key.objectid = cpu_to_le64(rec->ino);
2534         disk_key.type = BTRFS_INODE_ITEM_KEY;
2535         disk_key.offset = 0;
2536
2537         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2538         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2539         btrfs_set_dir_data_len(leaf, dir_item, 0);
2540         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2541         name_ptr = (unsigned long)(dir_item + 1);
2542         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2543         btrfs_mark_buffer_dirty(leaf);
2544         btrfs_release_path(&path);
2545         btrfs_commit_transaction(trans, root);
2546
2547         backref->found_dir_index = 1;
2548         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2549         BUG_ON(IS_ERR(dir_rec));
2550         if (!dir_rec)
2551                 return 0;
2552         dir_rec->found_size += backref->namelen;
2553         if (dir_rec->found_size == dir_rec->isize &&
2554             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2555                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2556         if (dir_rec->found_size != dir_rec->isize)
2557                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2558
2559         return 0;
2560 }
2561
2562 static int delete_dir_index(struct btrfs_root *root,
2563                             struct inode_backref *backref)
2564 {
2565         struct btrfs_trans_handle *trans;
2566         struct btrfs_dir_item *di;
2567         struct btrfs_path path;
2568         int ret = 0;
2569
2570         trans = btrfs_start_transaction(root, 1);
2571         if (IS_ERR(trans))
2572                 return PTR_ERR(trans);
2573
2574         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2575                 (unsigned long long)backref->dir,
2576                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2577                 (unsigned long long)root->objectid);
2578
2579         btrfs_init_path(&path);
2580         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2581                                     backref->name, backref->namelen,
2582                                     backref->index, -1);
2583         if (IS_ERR(di)) {
2584                 ret = PTR_ERR(di);
2585                 btrfs_release_path(&path);
2586                 btrfs_commit_transaction(trans, root);
2587                 if (ret == -ENOENT)
2588                         return 0;
2589                 return ret;
2590         }
2591
2592         if (!di)
2593                 ret = btrfs_del_item(trans, root, &path);
2594         else
2595                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2596         BUG_ON(ret);
2597         btrfs_release_path(&path);
2598         btrfs_commit_transaction(trans, root);
2599         return ret;
2600 }
2601
2602 static int create_inode_item(struct btrfs_root *root,
2603                              struct inode_record *rec,
2604                              int root_dir)
2605 {
2606         struct btrfs_trans_handle *trans;
2607         struct btrfs_inode_item inode_item;
2608         time_t now = time(NULL);
2609         int ret;
2610
2611         trans = btrfs_start_transaction(root, 1);
2612         if (IS_ERR(trans)) {
2613                 ret = PTR_ERR(trans);
2614                 return ret;
2615         }
2616
2617         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2618                 "be incomplete, please check permissions and content after "
2619                 "the fsck completes.\n", (unsigned long long)root->objectid,
2620                 (unsigned long long)rec->ino);
2621
2622         memset(&inode_item, 0, sizeof(inode_item));
2623         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2624         if (root_dir)
2625                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2626         else
2627                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2628         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2629         if (rec->found_dir_item) {
2630                 if (rec->found_file_extent)
2631                         fprintf(stderr, "root %llu inode %llu has both a dir "
2632                                 "item and extents, unsure if it is a dir or a "
2633                                 "regular file so setting it as a directory\n",
2634                                 (unsigned long long)root->objectid,
2635                                 (unsigned long long)rec->ino);
2636                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2637                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2638         } else if (!rec->found_dir_item) {
2639                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2640                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2641         }
2642         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2643         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2644         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2645         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2646         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2647         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2648         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2649         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2650
2651         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2652         BUG_ON(ret);
2653         btrfs_commit_transaction(trans, root);
2654         return 0;
2655 }
2656
2657 static int repair_inode_backrefs(struct btrfs_root *root,
2658                                  struct inode_record *rec,
2659                                  struct cache_tree *inode_cache,
2660                                  int delete)
2661 {
2662         struct inode_backref *tmp, *backref;
2663         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2664         int ret = 0;
2665         int repaired = 0;
2666
2667         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2668                 if (!delete && rec->ino == root_dirid) {
2669                         if (!rec->found_inode_item) {
2670                                 ret = create_inode_item(root, rec, 1);
2671                                 if (ret)
2672                                         break;
2673                                 repaired++;
2674                         }
2675                 }
2676
2677                 /* Index 0 for root dir's are special, don't mess with it */
2678                 if (rec->ino == root_dirid && backref->index == 0)
2679                         continue;
2680
2681                 if (delete &&
2682                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2683                      (backref->found_dir_index && backref->found_inode_ref &&
2684                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2685                         ret = delete_dir_index(root, backref);
2686                         if (ret)
2687                                 break;
2688                         repaired++;
2689                         list_del(&backref->list);
2690                         free(backref);
2691                         continue;
2692                 }
2693
2694                 if (!delete && !backref->found_dir_index &&
2695                     backref->found_dir_item && backref->found_inode_ref) {
2696                         ret = add_missing_dir_index(root, inode_cache, rec,
2697                                                     backref);
2698                         if (ret)
2699                                 break;
2700                         repaired++;
2701                         if (backref->found_dir_item &&
2702                             backref->found_dir_index) {
2703                                 if (!backref->errors &&
2704                                     backref->found_inode_ref) {
2705                                         list_del(&backref->list);
2706                                         free(backref);
2707                                         continue;
2708                                 }
2709                         }
2710                 }
2711
2712                 if (!delete && (!backref->found_dir_index &&
2713                                 !backref->found_dir_item &&
2714                                 backref->found_inode_ref)) {
2715                         struct btrfs_trans_handle *trans;
2716                         struct btrfs_key location;
2717
2718                         ret = check_dir_conflict(root, backref->name,
2719                                                  backref->namelen,
2720                                                  backref->dir,
2721                                                  backref->index);
2722                         if (ret) {
2723                                 /*
2724                                  * let nlink fixing routine to handle it,
2725                                  * which can do it better.
2726                                  */
2727                                 ret = 0;
2728                                 break;
2729                         }
2730                         location.objectid = rec->ino;
2731                         location.type = BTRFS_INODE_ITEM_KEY;
2732                         location.offset = 0;
2733
2734                         trans = btrfs_start_transaction(root, 1);
2735                         if (IS_ERR(trans)) {
2736                                 ret = PTR_ERR(trans);
2737                                 break;
2738                         }
2739                         fprintf(stderr, "adding missing dir index/item pair "
2740                                 "for inode %llu\n",
2741                                 (unsigned long long)rec->ino);
2742                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2743                                                     backref->namelen,
2744                                                     backref->dir, &location,
2745                                                     imode_to_type(rec->imode),
2746                                                     backref->index);
2747                         BUG_ON(ret);
2748                         btrfs_commit_transaction(trans, root);
2749                         repaired++;
2750                 }
2751
2752                 if (!delete && (backref->found_inode_ref &&
2753                                 backref->found_dir_index &&
2754                                 backref->found_dir_item &&
2755                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2756                                 !rec->found_inode_item)) {
2757                         ret = create_inode_item(root, rec, 0);
2758                         if (ret)
2759                                 break;
2760                         repaired++;
2761                 }
2762
2763         }
2764         return ret ? ret : repaired;
2765 }
2766
2767 /*
2768  * To determine the file type for nlink/inode_item repair
2769  *
2770  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2771  * Return -ENOENT if file type is not found.
2772  */
2773 static int find_file_type(struct inode_record *rec, u8 *type)
2774 {
2775         struct inode_backref *backref;
2776
2777         /* For inode item recovered case */
2778         if (rec->found_inode_item) {
2779                 *type = imode_to_type(rec->imode);
2780                 return 0;
2781         }
2782
2783         list_for_each_entry(backref, &rec->backrefs, list) {
2784                 if (backref->found_dir_index || backref->found_dir_item) {
2785                         *type = backref->filetype;
2786                         return 0;
2787                 }
2788         }
2789         return -ENOENT;
2790 }
2791
2792 /*
2793  * To determine the file name for nlink repair
2794  *
2795  * Return 0 if file name is found, set name and namelen.
2796  * Return -ENOENT if file name is not found.
2797  */
2798 static int find_file_name(struct inode_record *rec,
2799                           char *name, int *namelen)
2800 {
2801         struct inode_backref *backref;
2802
2803         list_for_each_entry(backref, &rec->backrefs, list) {
2804                 if (backref->found_dir_index || backref->found_dir_item ||
2805                     backref->found_inode_ref) {
2806                         memcpy(name, backref->name, backref->namelen);
2807                         *namelen = backref->namelen;
2808                         return 0;
2809                 }
2810         }
2811         return -ENOENT;
2812 }
2813
2814 /* Reset the nlink of the inode to the correct one */
2815 static int reset_nlink(struct btrfs_trans_handle *trans,
2816                        struct btrfs_root *root,
2817                        struct btrfs_path *path,
2818                        struct inode_record *rec)
2819 {
2820         struct inode_backref *backref;
2821         struct inode_backref *tmp;
2822         struct btrfs_key key;
2823         struct btrfs_inode_item *inode_item;
2824         int ret = 0;
2825
2826         /* We don't believe this either, reset it and iterate backref */
2827         rec->found_link = 0;
2828
2829         /* Remove all backref including the valid ones */
2830         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2831                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2832                                    backref->index, backref->name,
2833                                    backref->namelen, 0);
2834                 if (ret < 0)
2835                         goto out;
2836
2837                 /* remove invalid backref, so it won't be added back */
2838                 if (!(backref->found_dir_index &&
2839                       backref->found_dir_item &&
2840                       backref->found_inode_ref)) {
2841                         list_del(&backref->list);
2842                         free(backref);
2843                 } else {
2844                         rec->found_link++;
2845                 }
2846         }
2847
2848         /* Set nlink to 0 */
2849         key.objectid = rec->ino;
2850         key.type = BTRFS_INODE_ITEM_KEY;
2851         key.offset = 0;
2852         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2853         if (ret < 0)
2854                 goto out;
2855         if (ret > 0) {
2856                 ret = -ENOENT;
2857                 goto out;
2858         }
2859         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2860                                     struct btrfs_inode_item);
2861         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2862         btrfs_mark_buffer_dirty(path->nodes[0]);
2863         btrfs_release_path(path);
2864
2865         /*
2866          * Add back valid inode_ref/dir_item/dir_index,
2867          * add_link() will handle the nlink inc, so new nlink must be correct
2868          */
2869         list_for_each_entry(backref, &rec->backrefs, list) {
2870                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2871                                      backref->name, backref->namelen,
2872                                      backref->filetype, &backref->index, 1);
2873                 if (ret < 0)
2874                         goto out;
2875         }
2876 out:
2877         btrfs_release_path(path);
2878         return ret;
2879 }
2880
2881 static int get_highest_inode(struct btrfs_trans_handle *trans,
2882                                 struct btrfs_root *root,
2883                                 struct btrfs_path *path,
2884                                 u64 *highest_ino)
2885 {
2886         struct btrfs_key key, found_key;
2887         int ret;
2888
2889         btrfs_init_path(path);
2890         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2891         key.offset = -1;
2892         key.type = BTRFS_INODE_ITEM_KEY;
2893         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2894         if (ret == 1) {
2895                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2896                                 path->slots[0] - 1);
2897                 *highest_ino = found_key.objectid;
2898                 ret = 0;
2899         }
2900         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2901                 ret = -EOVERFLOW;
2902         btrfs_release_path(path);
2903         return ret;
2904 }
2905
2906 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2907                                struct btrfs_root *root,
2908                                struct btrfs_path *path,
2909                                struct inode_record *rec)
2910 {
2911         char *dir_name = "lost+found";
2912         char namebuf[BTRFS_NAME_LEN] = {0};
2913         u64 lost_found_ino;
2914         u32 mode = 0700;
2915         u8 type = 0;
2916         int namelen = 0;
2917         int name_recovered = 0;
2918         int type_recovered = 0;
2919         int ret = 0;
2920
2921         /*
2922          * Get file name and type first before these invalid inode ref
2923          * are deleted by remove_all_invalid_backref()
2924          */
2925         name_recovered = !find_file_name(rec, namebuf, &namelen);
2926         type_recovered = !find_file_type(rec, &type);
2927
2928         if (!name_recovered) {
2929                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2930                        rec->ino, rec->ino);
2931                 namelen = count_digits(rec->ino);
2932                 sprintf(namebuf, "%llu", rec->ino);
2933                 name_recovered = 1;
2934         }
2935         if (!type_recovered) {
2936                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2937                        rec->ino);
2938                 type = BTRFS_FT_REG_FILE;
2939                 type_recovered = 1;
2940         }
2941
2942         ret = reset_nlink(trans, root, path, rec);
2943         if (ret < 0) {
2944                 fprintf(stderr,
2945                         "Failed to reset nlink for inode %llu: %s\n",
2946                         rec->ino, strerror(-ret));
2947                 goto out;
2948         }
2949
2950         if (rec->found_link == 0) {
2951                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2952                 if (ret < 0)
2953                         goto out;
2954                 lost_found_ino++;
2955                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2956                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2957                                   mode);
2958                 if (ret < 0) {
2959                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2960                                 dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2964                                      namebuf, namelen, type, NULL, 1);
2965                 /*
2966                  * Add ".INO" suffix several times to handle case where
2967                  * "FILENAME.INO" is already taken by another file.
2968                  */
2969                 while (ret == -EEXIST) {
2970                         /*
2971                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2972                          */
2973                         if (namelen + count_digits(rec->ino) + 1 >
2974                             BTRFS_NAME_LEN) {
2975                                 ret = -EFBIG;
2976                                 goto out;
2977                         }
2978                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2979                                  ".%llu", rec->ino);
2980                         namelen += count_digits(rec->ino) + 1;
2981                         ret = btrfs_add_link(trans, root, rec->ino,
2982                                              lost_found_ino, namebuf,
2983                                              namelen, type, NULL, 1);
2984                 }
2985                 if (ret < 0) {
2986                         fprintf(stderr,
2987                                 "Failed to link the inode %llu to %s dir: %s\n",
2988                                 rec->ino, dir_name, strerror(-ret));
2989                         goto out;
2990                 }
2991                 /*
2992                  * Just increase the found_link, don't actually add the
2993                  * backref. This will make things easier and this inode
2994                  * record will be freed after the repair is done.
2995                  * So fsck will not report problem about this inode.
2996                  */
2997                 rec->found_link++;
2998                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2999                        namelen, namebuf, dir_name);
3000         }
3001         printf("Fixed the nlink of inode %llu\n", rec->ino);
3002 out:
3003         /*
3004          * Clear the flag anyway, or we will loop forever for the same inode
3005          * as it will not be removed from the bad inode list and the dead loop
3006          * happens.
3007          */
3008         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3009         btrfs_release_path(path);
3010         return ret;
3011 }
3012
3013 /*
3014  * Check if there is any normal(reg or prealloc) file extent for given
3015  * ino.
3016  * This is used to determine the file type when neither its dir_index/item or
3017  * inode_item exists.
3018  *
3019  * This will *NOT* report error, if any error happens, just consider it does
3020  * not have any normal file extent.
3021  */
3022 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3023 {
3024         struct btrfs_path path;
3025         struct btrfs_key key;
3026         struct btrfs_key found_key;
3027         struct btrfs_file_extent_item *fi;
3028         u8 type;
3029         int ret = 0;
3030
3031         btrfs_init_path(&path);
3032         key.objectid = ino;
3033         key.type = BTRFS_EXTENT_DATA_KEY;
3034         key.offset = 0;
3035
3036         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3037         if (ret < 0) {
3038                 ret = 0;
3039                 goto out;
3040         }
3041         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3042                 ret = btrfs_next_leaf(root, &path);
3043                 if (ret) {
3044                         ret = 0;
3045                         goto out;
3046                 }
3047         }
3048         while (1) {
3049                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3050                                       path.slots[0]);
3051                 if (found_key.objectid != ino ||
3052                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3053                         break;
3054                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3055                                     struct btrfs_file_extent_item);
3056                 type = btrfs_file_extent_type(path.nodes[0], fi);
3057                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3058                         ret = 1;
3059                         goto out;
3060                 }
3061         }
3062 out:
3063         btrfs_release_path(&path);
3064         return ret;
3065 }
3066
3067 static u32 btrfs_type_to_imode(u8 type)
3068 {
3069         static u32 imode_by_btrfs_type[] = {
3070                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3071                 [BTRFS_FT_DIR]          = S_IFDIR,
3072                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3073                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3074                 [BTRFS_FT_FIFO]         = S_IFIFO,
3075                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3076                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3077         };
3078
3079         return imode_by_btrfs_type[(type)];
3080 }
3081
3082 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3083                                 struct btrfs_root *root,
3084                                 struct btrfs_path *path,
3085                                 struct inode_record *rec)
3086 {
3087         u8 filetype;
3088         u32 mode = 0700;
3089         int type_recovered = 0;
3090         int ret = 0;
3091
3092         printf("Trying to rebuild inode:%llu\n", rec->ino);
3093
3094         type_recovered = !find_file_type(rec, &filetype);
3095
3096         /*
3097          * Try to determine inode type if type not found.
3098          *
3099          * For found regular file extent, it must be FILE.
3100          * For found dir_item/index, it must be DIR.
3101          *
3102          * For undetermined one, use FILE as fallback.
3103          *
3104          * TODO:
3105          * 1. If found backref(inode_index/item is already handled) to it,
3106          *    it must be DIR.
3107          *    Need new inode-inode ref structure to allow search for that.
3108          */
3109         if (!type_recovered) {
3110                 if (rec->found_file_extent &&
3111                     find_normal_file_extent(root, rec->ino)) {
3112                         type_recovered = 1;
3113                         filetype = BTRFS_FT_REG_FILE;
3114                 } else if (rec->found_dir_item) {
3115                         type_recovered = 1;
3116                         filetype = BTRFS_FT_DIR;
3117                 } else if (!list_empty(&rec->orphan_extents)) {
3118                         type_recovered = 1;
3119                         filetype = BTRFS_FT_REG_FILE;
3120                 } else{
3121                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3122                                rec->ino);
3123                         type_recovered = 1;
3124                         filetype = BTRFS_FT_REG_FILE;
3125                 }
3126         }
3127
3128         ret = btrfs_new_inode(trans, root, rec->ino,
3129                               mode | btrfs_type_to_imode(filetype));
3130         if (ret < 0)
3131                 goto out;
3132
3133         /*
3134          * Here inode rebuild is done, we only rebuild the inode item,
3135          * don't repair the nlink(like move to lost+found).
3136          * That is the job of nlink repair.
3137          *
3138          * We just fill the record and return
3139          */
3140         rec->found_dir_item = 1;
3141         rec->imode = mode | btrfs_type_to_imode(filetype);
3142         rec->nlink = 0;
3143         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3144         /* Ensure the inode_nlinks repair function will be called */
3145         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3146 out:
3147         return ret;
3148 }
3149
3150 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3151                                       struct btrfs_root *root,
3152                                       struct btrfs_path *path,
3153                                       struct inode_record *rec)
3154 {
3155         struct orphan_data_extent *orphan;
3156         struct orphan_data_extent *tmp;
3157         int ret = 0;
3158
3159         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3160                 /*
3161                  * Check for conflicting file extents
3162                  *
3163                  * Here we don't know whether the extents is compressed or not,
3164                  * so we can only assume it not compressed nor data offset,
3165                  * and use its disk_len as extent length.
3166                  */
3167                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3168                                        orphan->offset, orphan->disk_len, 0);
3169                 btrfs_release_path(path);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (!ret) {
3173                         fprintf(stderr,
3174                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3175                                 orphan->disk_bytenr, orphan->disk_len);
3176                         ret = btrfs_free_extent(trans,
3177                                         root->fs_info->extent_root,
3178                                         orphan->disk_bytenr, orphan->disk_len,
3179                                         0, root->objectid, orphan->objectid,
3180                                         orphan->offset);
3181                         if (ret < 0)
3182                                 goto out;
3183                 }
3184                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3185                                 orphan->offset, orphan->disk_bytenr,
3186                                 orphan->disk_len, orphan->disk_len);
3187                 if (ret < 0)
3188                         goto out;
3189
3190                 /* Update file size info */
3191                 rec->found_size += orphan->disk_len;
3192                 if (rec->found_size == rec->nbytes)
3193                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3194
3195                 /* Update the file extent hole info too */
3196                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3197                                            orphan->disk_len);
3198                 if (ret < 0)
3199                         goto out;
3200                 if (RB_EMPTY_ROOT(&rec->holes))
3201                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3202
3203                 list_del(&orphan->list);
3204                 free(orphan);
3205         }
3206         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3207 out:
3208         return ret;
3209 }
3210
3211 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3212                                         struct btrfs_root *root,
3213                                         struct btrfs_path *path,
3214                                         struct inode_record *rec)
3215 {
3216         struct rb_node *node;
3217         struct file_extent_hole *hole;
3218         int found = 0;
3219         int ret = 0;
3220
3221         node = rb_first(&rec->holes);
3222
3223         while (node) {
3224                 found = 1;
3225                 hole = rb_entry(node, struct file_extent_hole, node);
3226                 ret = btrfs_punch_hole(trans, root, rec->ino,
3227                                        hole->start, hole->len);
3228                 if (ret < 0)
3229                         goto out;
3230                 ret = del_file_extent_hole(&rec->holes, hole->start,
3231                                            hole->len);
3232                 if (ret < 0)
3233                         goto out;
3234                 if (RB_EMPTY_ROOT(&rec->holes))
3235                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3236                 node = rb_first(&rec->holes);
3237         }
3238         /* special case for a file losing all its file extent */
3239         if (!found) {
3240                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3241                                        round_up(rec->isize,
3242                                                 root->fs_info->sectorsize));
3243                 if (ret < 0)
3244                         goto out;
3245         }
3246         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3247                rec->ino, root->objectid);
3248 out:
3249         return ret;
3250 }
3251
3252 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3253 {
3254         struct btrfs_trans_handle *trans;
3255         struct btrfs_path path;
3256         int ret = 0;
3257
3258         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3259                              I_ERR_NO_ORPHAN_ITEM |
3260                              I_ERR_LINK_COUNT_WRONG |
3261                              I_ERR_NO_INODE_ITEM |
3262                              I_ERR_FILE_EXTENT_ORPHAN |
3263                              I_ERR_FILE_EXTENT_DISCOUNT|
3264                              I_ERR_FILE_NBYTES_WRONG)))
3265                 return rec->errors;
3266
3267         /*
3268          * For nlink repair, it may create a dir and add link, so
3269          * 2 for parent(256)'s dir_index and dir_item
3270          * 2 for lost+found dir's inode_item and inode_ref
3271          * 1 for the new inode_ref of the file
3272          * 2 for lost+found dir's dir_index and dir_item for the file
3273          */
3274         trans = btrfs_start_transaction(root, 7);
3275         if (IS_ERR(trans))
3276                 return PTR_ERR(trans);
3277
3278         btrfs_init_path(&path);
3279         if (rec->errors & I_ERR_NO_INODE_ITEM)
3280                 ret = repair_inode_no_item(trans, root, &path, rec);
3281         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3282                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3283         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3284                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3285         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3286                 ret = repair_inode_isize(trans, root, &path, rec);
3287         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3288                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3289         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3290                 ret = repair_inode_nlinks(trans, root, &path, rec);
3291         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3292                 ret = repair_inode_nbytes(trans, root, &path, rec);
3293         btrfs_commit_transaction(trans, root);
3294         btrfs_release_path(&path);
3295         return ret;
3296 }
3297
3298 static int check_inode_recs(struct btrfs_root *root,
3299                             struct cache_tree *inode_cache)
3300 {
3301         struct cache_extent *cache;
3302         struct ptr_node *node;
3303         struct inode_record *rec;
3304         struct inode_backref *backref;
3305         int stage = 0;
3306         int ret = 0;
3307         int err = 0;
3308         u64 error = 0;
3309         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3310
3311         if (btrfs_root_refs(&root->root_item) == 0) {
3312                 if (!cache_tree_empty(inode_cache))
3313                         fprintf(stderr, "warning line %d\n", __LINE__);
3314                 return 0;
3315         }
3316
3317         /*
3318          * We need to repair backrefs first because we could change some of the
3319          * errors in the inode recs.
3320          *
3321          * We also need to go through and delete invalid backrefs first and then
3322          * add the correct ones second.  We do this because we may get EEXIST
3323          * when adding back the correct index because we hadn't yet deleted the
3324          * invalid index.
3325          *
3326          * For example, if we were missing a dir index then the directories
3327          * isize would be wrong, so if we fixed the isize to what we thought it
3328          * would be and then fixed the backref we'd still have a invalid fs, so
3329          * we need to add back the dir index and then check to see if the isize
3330          * is still wrong.
3331          */
3332         while (stage < 3) {
3333                 stage++;
3334                 if (stage == 3 && !err)
3335                         break;
3336
3337                 cache = search_cache_extent(inode_cache, 0);
3338                 while (repair && cache) {
3339                         node = container_of(cache, struct ptr_node, cache);
3340                         rec = node->data;
3341                         cache = next_cache_extent(cache);
3342
3343                         /* Need to free everything up and rescan */
3344                         if (stage == 3) {
3345                                 remove_cache_extent(inode_cache, &node->cache);
3346                                 free(node);
3347                                 free_inode_rec(rec);
3348                                 continue;
3349                         }
3350
3351                         if (list_empty(&rec->backrefs))
3352                                 continue;
3353
3354                         ret = repair_inode_backrefs(root, rec, inode_cache,
3355                                                     stage == 1);
3356                         if (ret < 0) {
3357                                 err = ret;
3358                                 stage = 2;
3359                                 break;
3360                         } if (ret > 0) {
3361                                 err = -EAGAIN;
3362                         }
3363                 }
3364         }
3365         if (err)
3366                 return err;
3367
3368         rec = get_inode_rec(inode_cache, root_dirid, 0);
3369         BUG_ON(IS_ERR(rec));
3370         if (rec) {
3371                 ret = check_root_dir(rec);
3372                 if (ret) {
3373                         fprintf(stderr, "root %llu root dir %llu error\n",
3374                                 (unsigned long long)root->root_key.objectid,
3375                                 (unsigned long long)root_dirid);
3376                         print_inode_error(root, rec);
3377                         error++;
3378                 }
3379         } else {
3380                 if (repair) {
3381                         struct btrfs_trans_handle *trans;
3382
3383                         trans = btrfs_start_transaction(root, 1);
3384                         if (IS_ERR(trans)) {
3385                                 err = PTR_ERR(trans);
3386                                 return err;
3387                         }
3388
3389                         fprintf(stderr,
3390                                 "root %llu missing its root dir, recreating\n",
3391                                 (unsigned long long)root->objectid);
3392
3393                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3394                         BUG_ON(ret);
3395
3396                         btrfs_commit_transaction(trans, root);
3397                         return -EAGAIN;
3398                 }
3399
3400                 fprintf(stderr, "root %llu root dir %llu not found\n",
3401                         (unsigned long long)root->root_key.objectid,
3402                         (unsigned long long)root_dirid);
3403         }
3404
3405         while (1) {
3406                 cache = search_cache_extent(inode_cache, 0);
3407                 if (!cache)
3408                         break;
3409                 node = container_of(cache, struct ptr_node, cache);
3410                 rec = node->data;
3411                 remove_cache_extent(inode_cache, &node->cache);
3412                 free(node);
3413                 if (rec->ino == root_dirid ||
3414                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3415                         free_inode_rec(rec);
3416                         continue;
3417                 }
3418
3419                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3420                         ret = check_orphan_item(root, rec->ino);
3421                         if (ret == 0)
3422                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3423                         if (can_free_inode_rec(rec)) {
3424                                 free_inode_rec(rec);
3425                                 continue;
3426                         }
3427                 }
3428
3429                 if (!rec->found_inode_item)
3430                         rec->errors |= I_ERR_NO_INODE_ITEM;
3431                 if (rec->found_link != rec->nlink)
3432                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3433                 if (repair) {
3434                         ret = try_repair_inode(root, rec);
3435                         if (ret == 0 && can_free_inode_rec(rec)) {
3436                                 free_inode_rec(rec);
3437                                 continue;
3438                         }
3439                         ret = 0;
3440                 }
3441
3442                 if (!(repair && ret == 0))
3443                         error++;
3444                 print_inode_error(root, rec);
3445                 list_for_each_entry(backref, &rec->backrefs, list) {
3446                         if (!backref->found_dir_item)
3447                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3448                         if (!backref->found_dir_index)
3449                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3450                         if (!backref->found_inode_ref)
3451                                 backref->errors |= REF_ERR_NO_INODE_REF;
3452                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3453                                 " namelen %u name %s filetype %d errors %x",
3454                                 (unsigned long long)backref->dir,
3455                                 (unsigned long long)backref->index,
3456                                 backref->namelen, backref->name,
3457                                 backref->filetype, backref->errors);
3458                         print_ref_error(backref->errors);
3459                 }
3460                 free_inode_rec(rec);
3461         }
3462         return (error > 0) ? -1 : 0;
3463 }
3464
3465 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3466                                         u64 objectid)
3467 {
3468         struct cache_extent *cache;
3469         struct root_record *rec = NULL;
3470         int ret;
3471
3472         cache = lookup_cache_extent(root_cache, objectid, 1);
3473         if (cache) {
3474                 rec = container_of(cache, struct root_record, cache);
3475         } else {
3476                 rec = calloc(1, sizeof(*rec));
3477                 if (!rec)
3478                         return ERR_PTR(-ENOMEM);
3479                 rec->objectid = objectid;
3480                 INIT_LIST_HEAD(&rec->backrefs);
3481                 rec->cache.start = objectid;
3482                 rec->cache.size = 1;
3483
3484                 ret = insert_cache_extent(root_cache, &rec->cache);
3485                 if (ret)
3486                         return ERR_PTR(-EEXIST);
3487         }
3488         return rec;
3489 }
3490
3491 static struct root_backref *get_root_backref(struct root_record *rec,
3492                                              u64 ref_root, u64 dir, u64 index,
3493                                              const char *name, int namelen)
3494 {
3495         struct root_backref *backref;
3496
3497         list_for_each_entry(backref, &rec->backrefs, list) {
3498                 if (backref->ref_root != ref_root || backref->dir != dir ||
3499                     backref->namelen != namelen)
3500                         continue;
3501                 if (memcmp(name, backref->name, namelen))
3502                         continue;
3503                 return backref;
3504         }
3505
3506         backref = calloc(1, sizeof(*backref) + namelen + 1);
3507         if (!backref)
3508                 return NULL;
3509         backref->ref_root = ref_root;
3510         backref->dir = dir;
3511         backref->index = index;
3512         backref->namelen = namelen;
3513         memcpy(backref->name, name, namelen);
3514         backref->name[namelen] = '\0';
3515         list_add_tail(&backref->list, &rec->backrefs);
3516         return backref;
3517 }
3518
3519 static void free_root_record(struct cache_extent *cache)
3520 {
3521         struct root_record *rec;
3522         struct root_backref *backref;
3523
3524         rec = container_of(cache, struct root_record, cache);
3525         while (!list_empty(&rec->backrefs)) {
3526                 backref = to_root_backref(rec->backrefs.next);
3527                 list_del(&backref->list);
3528                 free(backref);
3529         }
3530
3531         free(rec);
3532 }
3533
3534 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3535
3536 static int add_root_backref(struct cache_tree *root_cache,
3537                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3538                             const char *name, int namelen,
3539                             int item_type, int errors)
3540 {
3541         struct root_record *rec;
3542         struct root_backref *backref;
3543
3544         rec = get_root_rec(root_cache, root_id);
3545         BUG_ON(IS_ERR(rec));
3546         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3547         BUG_ON(!backref);
3548
3549         backref->errors |= errors;
3550
3551         if (item_type != BTRFS_DIR_ITEM_KEY) {
3552                 if (backref->found_dir_index || backref->found_back_ref ||
3553                     backref->found_forward_ref) {
3554                         if (backref->index != index)
3555                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3556                 } else {
3557                         backref->index = index;
3558                 }
3559         }
3560
3561         if (item_type == BTRFS_DIR_ITEM_KEY) {
3562                 if (backref->found_forward_ref)
3563                         rec->found_ref++;
3564                 backref->found_dir_item = 1;
3565         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3566                 backref->found_dir_index = 1;
3567         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3568                 if (backref->found_forward_ref)
3569                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3570                 else if (backref->found_dir_item)
3571                         rec->found_ref++;
3572                 backref->found_forward_ref = 1;
3573         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3574                 if (backref->found_back_ref)
3575                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3576                 backref->found_back_ref = 1;
3577         } else {
3578                 BUG_ON(1);
3579         }
3580
3581         if (backref->found_forward_ref && backref->found_dir_item)
3582                 backref->reachable = 1;
3583         return 0;
3584 }
3585
3586 static int merge_root_recs(struct btrfs_root *root,
3587                            struct cache_tree *src_cache,
3588                            struct cache_tree *dst_cache)
3589 {
3590         struct cache_extent *cache;
3591         struct ptr_node *node;
3592         struct inode_record *rec;
3593         struct inode_backref *backref;
3594         int ret = 0;
3595
3596         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3597                 free_inode_recs_tree(src_cache);
3598                 return 0;
3599         }
3600
3601         while (1) {
3602                 cache = search_cache_extent(src_cache, 0);
3603                 if (!cache)
3604                         break;
3605                 node = container_of(cache, struct ptr_node, cache);
3606                 rec = node->data;
3607                 remove_cache_extent(src_cache, &node->cache);
3608                 free(node);
3609
3610                 ret = is_child_root(root, root->objectid, rec->ino);
3611                 if (ret < 0)
3612                         break;
3613                 else if (ret == 0)
3614                         goto skip;
3615
3616                 list_for_each_entry(backref, &rec->backrefs, list) {
3617                         BUG_ON(backref->found_inode_ref);
3618                         if (backref->found_dir_item)
3619                                 add_root_backref(dst_cache, rec->ino,
3620                                         root->root_key.objectid, backref->dir,
3621                                         backref->index, backref->name,
3622                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3623                                         backref->errors);
3624                         if (backref->found_dir_index)
3625                                 add_root_backref(dst_cache, rec->ino,
3626                                         root->root_key.objectid, backref->dir,
3627                                         backref->index, backref->name,
3628                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3629                                         backref->errors);
3630                 }
3631 skip:
3632                 free_inode_rec(rec);
3633         }
3634         if (ret < 0)
3635                 return ret;
3636         return 0;
3637 }
3638
3639 static int check_root_refs(struct btrfs_root *root,
3640                            struct cache_tree *root_cache)
3641 {
3642         struct root_record *rec;
3643         struct root_record *ref_root;
3644         struct root_backref *backref;
3645         struct cache_extent *cache;
3646         int loop = 1;
3647         int ret;
3648         int error;
3649         int errors = 0;
3650
3651         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3652         BUG_ON(IS_ERR(rec));
3653         rec->found_ref = 1;
3654
3655         /* fixme: this can not detect circular references */
3656         while (loop) {
3657                 loop = 0;
3658                 cache = search_cache_extent(root_cache, 0);
3659                 while (1) {
3660                         if (!cache)
3661                                 break;
3662                         rec = container_of(cache, struct root_record, cache);
3663                         cache = next_cache_extent(cache);
3664
3665                         if (rec->found_ref == 0)
3666                                 continue;
3667
3668                         list_for_each_entry(backref, &rec->backrefs, list) {
3669                                 if (!backref->reachable)
3670                                         continue;
3671
3672                                 ref_root = get_root_rec(root_cache,
3673                                                         backref->ref_root);
3674                                 BUG_ON(IS_ERR(ref_root));
3675                                 if (ref_root->found_ref > 0)
3676                                         continue;
3677
3678                                 backref->reachable = 0;
3679                                 rec->found_ref--;
3680                                 if (rec->found_ref == 0)
3681                                         loop = 1;
3682                         }
3683                 }
3684         }
3685
3686         cache = search_cache_extent(root_cache, 0);
3687         while (1) {
3688                 if (!cache)
3689                         break;
3690                 rec = container_of(cache, struct root_record, cache);
3691                 cache = next_cache_extent(cache);
3692
3693                 if (rec->found_ref == 0 &&
3694                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3695                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3696                         ret = check_orphan_item(root->fs_info->tree_root,
3697                                                 rec->objectid);
3698                         if (ret == 0)
3699                                 continue;
3700
3701                         /*
3702                          * If we don't have a root item then we likely just have
3703                          * a dir item in a snapshot for this root but no actual
3704                          * ref key or anything so it's meaningless.
3705                          */
3706                         if (!rec->found_root_item)
3707                                 continue;
3708                         errors++;
3709                         fprintf(stderr, "fs tree %llu not referenced\n",
3710                                 (unsigned long long)rec->objectid);
3711                 }
3712
3713                 error = 0;
3714                 if (rec->found_ref > 0 && !rec->found_root_item)
3715                         error = 1;
3716                 list_for_each_entry(backref, &rec->backrefs, list) {
3717                         if (!backref->found_dir_item)
3718                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3719                         if (!backref->found_dir_index)
3720                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3721                         if (!backref->found_back_ref)
3722                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3723                         if (!backref->found_forward_ref)
3724                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3725                         if (backref->reachable && backref->errors)
3726                                 error = 1;
3727                 }
3728                 if (!error)
3729                         continue;
3730
3731                 errors++;
3732                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3733                         (unsigned long long)rec->objectid, rec->found_ref,
3734                          rec->found_root_item ? "" : "not found");
3735
3736                 list_for_each_entry(backref, &rec->backrefs, list) {
3737                         if (!backref->reachable)
3738                                 continue;
3739                         if (!backref->errors && rec->found_root_item)
3740                                 continue;
3741                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3742                                 " index %llu namelen %u name %s errors %x\n",
3743                                 (unsigned long long)backref->ref_root,
3744                                 (unsigned long long)backref->dir,
3745                                 (unsigned long long)backref->index,
3746                                 backref->namelen, backref->name,
3747                                 backref->errors);
3748                         print_ref_error(backref->errors);
3749                 }
3750         }
3751         return errors > 0 ? 1 : 0;
3752 }
3753
3754 static int process_root_ref(struct extent_buffer *eb, int slot,
3755                             struct btrfs_key *key,
3756                             struct cache_tree *root_cache)
3757 {
3758         u64 dirid;
3759         u64 index;
3760         u32 len;
3761         u32 name_len;
3762         struct btrfs_root_ref *ref;
3763         char namebuf[BTRFS_NAME_LEN];
3764         int error;
3765
3766         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3767
3768         dirid = btrfs_root_ref_dirid(eb, ref);
3769         index = btrfs_root_ref_sequence(eb, ref);
3770         name_len = btrfs_root_ref_name_len(eb, ref);
3771
3772         if (name_len <= BTRFS_NAME_LEN) {
3773                 len = name_len;
3774                 error = 0;
3775         } else {
3776                 len = BTRFS_NAME_LEN;
3777                 error = REF_ERR_NAME_TOO_LONG;
3778         }
3779         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3780
3781         if (key->type == BTRFS_ROOT_REF_KEY) {
3782                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3783                                  index, namebuf, len, key->type, error);
3784         } else {
3785                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3786                                  index, namebuf, len, key->type, error);
3787         }
3788         return 0;
3789 }
3790
3791 static void free_corrupt_block(struct cache_extent *cache)
3792 {
3793         struct btrfs_corrupt_block *corrupt;
3794
3795         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3796         free(corrupt);
3797 }
3798
3799 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3800
3801 /*
3802  * Repair the btree of the given root.
3803  *
3804  * The fix is to remove the node key in corrupt_blocks cache_tree.
3805  * and rebalance the tree.
3806  * After the fix, the btree should be writeable.
3807  */
3808 static int repair_btree(struct btrfs_root *root,
3809                         struct cache_tree *corrupt_blocks)
3810 {
3811         struct btrfs_trans_handle *trans;
3812         struct btrfs_path path;
3813         struct btrfs_corrupt_block *corrupt;
3814         struct cache_extent *cache;
3815         struct btrfs_key key;
3816         u64 offset;
3817         int level;
3818         int ret = 0;
3819
3820         if (cache_tree_empty(corrupt_blocks))
3821                 return 0;
3822
3823         trans = btrfs_start_transaction(root, 1);
3824         if (IS_ERR(trans)) {
3825                 ret = PTR_ERR(trans);
3826                 fprintf(stderr, "Error starting transaction: %s\n",
3827                         strerror(-ret));
3828                 return ret;
3829         }
3830         btrfs_init_path(&path);
3831         cache = first_cache_extent(corrupt_blocks);
3832         while (cache) {
3833                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3834                                        cache);
3835                 level = corrupt->level;
3836                 path.lowest_level = level;
3837                 key.objectid = corrupt->key.objectid;
3838                 key.type = corrupt->key.type;
3839                 key.offset = corrupt->key.offset;
3840
3841                 /*
3842                  * Here we don't want to do any tree balance, since it may
3843                  * cause a balance with corrupted brother leaf/node,
3844                  * so ins_len set to 0 here.
3845                  * Balance will be done after all corrupt node/leaf is deleted.
3846                  */
3847                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3848                 if (ret < 0)
3849                         goto out;
3850                 offset = btrfs_node_blockptr(path.nodes[level],
3851                                              path.slots[level]);
3852
3853                 /* Remove the ptr */
3854                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3855                 if (ret < 0)
3856                         goto out;
3857                 /*
3858                  * Remove the corresponding extent
3859                  * return value is not concerned.
3860                  */
3861                 btrfs_release_path(&path);
3862                 ret = btrfs_free_extent(trans, root, offset,
3863                                 root->fs_info->nodesize, 0,
3864                                 root->root_key.objectid, level - 1, 0);
3865                 cache = next_cache_extent(cache);
3866         }
3867
3868         /* Balance the btree using btrfs_search_slot() */
3869         cache = first_cache_extent(corrupt_blocks);
3870         while (cache) {
3871                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3872                                        cache);
3873                 memcpy(&key, &corrupt->key, sizeof(key));
3874                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3875                 if (ret < 0)
3876                         goto out;
3877                 /* return will always >0 since it won't find the item */
3878                 ret = 0;
3879                 btrfs_release_path(&path);
3880                 cache = next_cache_extent(cache);
3881         }
3882 out:
3883         btrfs_commit_transaction(trans, root);
3884         btrfs_release_path(&path);
3885         return ret;
3886 }
3887
3888 static int check_fs_root(struct btrfs_root *root,
3889                          struct cache_tree *root_cache,
3890                          struct walk_control *wc)
3891 {
3892         int ret = 0;
3893         int err = 0;
3894         int wret;
3895         int level;
3896         struct btrfs_path path;
3897         struct shared_node root_node;
3898         struct root_record *rec;
3899         struct btrfs_root_item *root_item = &root->root_item;
3900         struct cache_tree corrupt_blocks;
3901         struct orphan_data_extent *orphan;
3902         struct orphan_data_extent *tmp;
3903         enum btrfs_tree_block_status status;
3904         struct node_refs nrefs;
3905
3906         /*
3907          * Reuse the corrupt_block cache tree to record corrupted tree block
3908          *
3909          * Unlike the usage in extent tree check, here we do it in a per
3910          * fs/subvol tree base.
3911          */
3912         cache_tree_init(&corrupt_blocks);
3913         root->fs_info->corrupt_blocks = &corrupt_blocks;
3914
3915         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3916                 rec = get_root_rec(root_cache, root->root_key.objectid);
3917                 BUG_ON(IS_ERR(rec));
3918                 if (btrfs_root_refs(root_item) > 0)
3919                         rec->found_root_item = 1;
3920         }
3921
3922         btrfs_init_path(&path);
3923         memset(&root_node, 0, sizeof(root_node));
3924         cache_tree_init(&root_node.root_cache);
3925         cache_tree_init(&root_node.inode_cache);
3926         memset(&nrefs, 0, sizeof(nrefs));
3927
3928         /* Move the orphan extent record to corresponding inode_record */
3929         list_for_each_entry_safe(orphan, tmp,
3930                                  &root->orphan_data_extents, list) {
3931                 struct inode_record *inode;
3932
3933                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3934                                       1);
3935                 BUG_ON(IS_ERR(inode));
3936                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3937                 list_move(&orphan->list, &inode->orphan_extents);
3938         }
3939
3940         level = btrfs_header_level(root->node);
3941         memset(wc->nodes, 0, sizeof(wc->nodes));
3942         wc->nodes[level] = &root_node;
3943         wc->active_node = level;
3944         wc->root_level = level;
3945
3946         /* We may not have checked the root block, lets do that now */
3947         if (btrfs_is_leaf(root->node))
3948                 status = btrfs_check_leaf(root, NULL, root->node);
3949         else
3950                 status = btrfs_check_node(root, NULL, root->node);
3951         if (status != BTRFS_TREE_BLOCK_CLEAN)
3952                 return -EIO;
3953
3954         if (btrfs_root_refs(root_item) > 0 ||
3955             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3956                 path.nodes[level] = root->node;
3957                 extent_buffer_get(root->node);
3958                 path.slots[level] = 0;
3959         } else {
3960                 struct btrfs_key key;
3961                 struct btrfs_disk_key found_key;
3962
3963                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3964                 level = root_item->drop_level;
3965                 path.lowest_level = level;
3966                 if (level > btrfs_header_level(root->node) ||
3967                     level >= BTRFS_MAX_LEVEL) {
3968                         error("ignoring invalid drop level: %u", level);
3969                         goto skip_walking;
3970                 }
3971                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3972                 if (wret < 0)
3973                         goto skip_walking;
3974                 btrfs_node_key(path.nodes[level], &found_key,
3975                                 path.slots[level]);
3976                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3977                                         sizeof(found_key)));
3978         }
3979
3980         while (1) {
3981                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3982                 if (wret < 0)
3983                         ret = wret;
3984                 if (wret != 0)
3985                         break;
3986
3987                 wret = walk_up_tree(root, &path, wc, &level);
3988                 if (wret < 0)
3989                         ret = wret;
3990                 if (wret != 0)
3991                         break;
3992         }
3993 skip_walking:
3994         btrfs_release_path(&path);
3995
3996         if (!cache_tree_empty(&corrupt_blocks)) {
3997                 struct cache_extent *cache;
3998                 struct btrfs_corrupt_block *corrupt;
3999
4000                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4001                        root->root_key.objectid);
4002                 cache = first_cache_extent(&corrupt_blocks);
4003                 while (cache) {
4004                         corrupt = container_of(cache,
4005                                                struct btrfs_corrupt_block,
4006                                                cache);
4007                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4008                                cache->start, corrupt->level,
4009                                corrupt->key.objectid, corrupt->key.type,
4010                                corrupt->key.offset);
4011                         cache = next_cache_extent(cache);
4012                 }
4013                 if (repair) {
4014                         printf("Try to repair the btree for root %llu\n",
4015                                root->root_key.objectid);
4016                         ret = repair_btree(root, &corrupt_blocks);
4017                         if (ret < 0)
4018                                 fprintf(stderr, "Failed to repair btree: %s\n",
4019                                         strerror(-ret));
4020                         if (!ret)
4021                                 printf("Btree for root %llu is fixed\n",
4022                                        root->root_key.objectid);
4023                 }
4024         }
4025
4026         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4027         if (err < 0)
4028                 ret = err;
4029
4030         if (root_node.current) {
4031                 root_node.current->checked = 1;
4032                 maybe_free_inode_rec(&root_node.inode_cache,
4033                                 root_node.current);
4034         }
4035
4036         err = check_inode_recs(root, &root_node.inode_cache);
4037         if (!ret)
4038                 ret = err;
4039
4040         free_corrupt_blocks_tree(&corrupt_blocks);
4041         root->fs_info->corrupt_blocks = NULL;
4042         free_orphan_data_extents(&root->orphan_data_extents);
4043         return ret;
4044 }
4045
4046 static int fs_root_objectid(u64 objectid)
4047 {
4048         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4049             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4050                 return 1;
4051         return is_fstree(objectid);
4052 }
4053
4054 static int check_fs_roots(struct btrfs_root *root,
4055                           struct cache_tree *root_cache)
4056 {
4057         struct btrfs_path path;
4058         struct btrfs_key key;
4059         struct walk_control wc;
4060         struct extent_buffer *leaf, *tree_node;
4061         struct btrfs_root *tmp_root;
4062         struct btrfs_root *tree_root = root->fs_info->tree_root;
4063         int ret;
4064         int err = 0;
4065
4066         if (ctx.progress_enabled) {
4067                 ctx.tp = TASK_FS_ROOTS;
4068                 task_start(ctx.info);
4069         }
4070
4071         /*
4072          * Just in case we made any changes to the extent tree that weren't
4073          * reflected into the free space cache yet.
4074          */
4075         if (repair)
4076                 reset_cached_block_groups(root->fs_info);
4077         memset(&wc, 0, sizeof(wc));
4078         cache_tree_init(&wc.shared);
4079         btrfs_init_path(&path);
4080
4081 again:
4082         key.offset = 0;
4083         key.objectid = 0;
4084         key.type = BTRFS_ROOT_ITEM_KEY;
4085         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4086         if (ret < 0) {
4087                 err = 1;
4088                 goto out;
4089         }
4090         tree_node = tree_root->node;
4091         while (1) {
4092                 if (tree_node != tree_root->node) {
4093                         free_root_recs_tree(root_cache);
4094                         btrfs_release_path(&path);
4095                         goto again;
4096                 }
4097                 leaf = path.nodes[0];
4098                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4099                         ret = btrfs_next_leaf(tree_root, &path);
4100                         if (ret) {
4101                                 if (ret < 0)
4102                                         err = 1;
4103                                 break;
4104                         }
4105                         leaf = path.nodes[0];
4106                 }
4107                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4108                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4109                     fs_root_objectid(key.objectid)) {
4110                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4111                                 tmp_root = btrfs_read_fs_root_no_cache(
4112                                                 root->fs_info, &key);
4113                         } else {
4114                                 key.offset = (u64)-1;
4115                                 tmp_root = btrfs_read_fs_root(
4116                                                 root->fs_info, &key);
4117                         }
4118                         if (IS_ERR(tmp_root)) {
4119                                 err = 1;
4120                                 goto next;
4121                         }
4122                         ret = check_fs_root(tmp_root, root_cache, &wc);
4123                         if (ret == -EAGAIN) {
4124                                 free_root_recs_tree(root_cache);
4125                                 btrfs_release_path(&path);
4126                                 goto again;
4127                         }
4128                         if (ret)
4129                                 err = 1;
4130                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4131                                 btrfs_free_fs_root(tmp_root);
4132                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4133                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4134                         process_root_ref(leaf, path.slots[0], &key,
4135                                          root_cache);
4136                 }
4137 next:
4138                 path.slots[0]++;
4139         }
4140 out:
4141         btrfs_release_path(&path);
4142         if (err)
4143                 free_extent_cache_tree(&wc.shared);
4144         if (!cache_tree_empty(&wc.shared))
4145                 fprintf(stderr, "warning line %d\n", __LINE__);
4146
4147         task_stop(ctx.info);
4148
4149         return err;
4150 }
4151
4152 /*
4153  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4154  * INODE_REF/INODE_EXTREF match.
4155  *
4156  * @root:       the root of the fs/file tree
4157  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4158  * @key:        the key of the DIR_ITEM/DIR_INDEX
4159  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4160  *              distinguish root_dir between normal dir/file
4161  * @name:       the name in the INODE_REF/INODE_EXTREF
4162  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4163  * @mode:       the st_mode of INODE_ITEM
4164  *
4165  * Return 0 if no error occurred.
4166  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4167  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4168  * dir/file.
4169  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4170  * not match for normal dir/file.
4171  */
4172 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4173                          struct btrfs_key *key, u64 index, char *name,
4174                          u32 namelen, u32 mode)
4175 {
4176         struct btrfs_path path;
4177         struct extent_buffer *node;
4178         struct btrfs_dir_item *di;
4179         struct btrfs_key location;
4180         char namebuf[BTRFS_NAME_LEN] = {0};
4181         u32 total;
4182         u32 cur = 0;
4183         u32 len;
4184         u32 name_len;
4185         u32 data_len;
4186         u8 filetype;
4187         int slot;
4188         int ret;
4189
4190         btrfs_init_path(&path);
4191         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4192         if (ret < 0) {
4193                 ret = DIR_ITEM_MISSING;
4194                 goto out;
4195         }
4196
4197         /* Process root dir and goto out*/
4198         if (index == 0) {
4199                 if (ret == 0) {
4200                         ret = ROOT_DIR_ERROR;
4201                         error(
4202                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4203                                 root->objectid,
4204                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4205                                         "REF" : "EXTREF",
4206                                 ref_key->objectid, ref_key->offset,
4207                                 key->type == BTRFS_DIR_ITEM_KEY ?
4208                                         "DIR_ITEM" : "DIR_INDEX");
4209                 } else {
4210                         ret = 0;
4211                 }
4212
4213                 goto out;
4214         }
4215
4216         /* Process normal file/dir */
4217         if (ret > 0) {
4218                 ret = DIR_ITEM_MISSING;
4219                 error(
4220                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4221                         root->objectid,
4222                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4223                         ref_key->objectid, ref_key->offset,
4224                         key->type == BTRFS_DIR_ITEM_KEY ?
4225                                 "DIR_ITEM" : "DIR_INDEX",
4226                         key->objectid, key->offset, namelen, name,
4227                         imode_to_type(mode));
4228                 goto out;
4229         }
4230
4231         /* Check whether inode_id/filetype/name match */
4232         node = path.nodes[0];
4233         slot = path.slots[0];
4234         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4235         total = btrfs_item_size_nr(node, slot);
4236         while (cur < total) {
4237                 ret = DIR_ITEM_MISMATCH;
4238                 name_len = btrfs_dir_name_len(node, di);
4239                 data_len = btrfs_dir_data_len(node, di);
4240
4241                 btrfs_dir_item_key_to_cpu(node, di, &location);
4242                 if (location.objectid != ref_key->objectid ||
4243                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4244                     location.offset != 0)
4245                         goto next;
4246
4247                 filetype = btrfs_dir_type(node, di);
4248                 if (imode_to_type(mode) != filetype)
4249                         goto next;
4250
4251                 if (cur + sizeof(*di) + name_len > total ||
4252                     name_len > BTRFS_NAME_LEN) {
4253                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4254                                 root->objectid,
4255                                 key->type == BTRFS_DIR_ITEM_KEY ?
4256                                 "DIR_ITEM" : "DIR_INDEX",
4257                                 key->objectid, key->offset, name_len);
4258
4259                         if (cur + sizeof(*di) > total)
4260                                 break;
4261                         len = min_t(u32, total - cur - sizeof(*di),
4262                                     BTRFS_NAME_LEN);
4263                 } else {
4264                         len = name_len;
4265                 }
4266
4267                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4268                 if (len != namelen || strncmp(namebuf, name, len))
4269                         goto next;
4270
4271                 ret = 0;
4272                 goto out;
4273 next:
4274                 len = sizeof(*di) + name_len + data_len;
4275                 di = (struct btrfs_dir_item *)((char *)di + len);
4276                 cur += len;
4277         }
4278         if (ret == DIR_ITEM_MISMATCH)
4279                 error(
4280                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4281                         root->objectid,
4282                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4283                         ref_key->objectid, ref_key->offset,
4284                         key->type == BTRFS_DIR_ITEM_KEY ?
4285                                 "DIR_ITEM" : "DIR_INDEX",
4286                         key->objectid, key->offset, namelen, name,
4287                         imode_to_type(mode));
4288 out:
4289         btrfs_release_path(&path);
4290         return ret;
4291 }
4292
4293 /*
4294  * Traverse the given INODE_REF and call find_dir_item() to find related
4295  * DIR_ITEM/DIR_INDEX.
4296  *
4297  * @root:       the root of the fs/file tree
4298  * @ref_key:    the key of the INODE_REF
4299  * @refs:       the count of INODE_REF
4300  * @mode:       the st_mode of INODE_ITEM
4301  *
4302  * Return 0 if no error occurred.
4303  */
4304 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4305                            struct extent_buffer *node, int slot, u64 *refs,
4306                            int mode)
4307 {
4308         struct btrfs_key key;
4309         struct btrfs_inode_ref *ref;
4310         char namebuf[BTRFS_NAME_LEN] = {0};
4311         u32 total;
4312         u32 cur = 0;
4313         u32 len;
4314         u32 name_len;
4315         u64 index;
4316         int ret, err = 0;
4317
4318         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4319         total = btrfs_item_size_nr(node, slot);
4320
4321 next:
4322         /* Update inode ref count */
4323         (*refs)++;
4324
4325         index = btrfs_inode_ref_index(node, ref);
4326         name_len = btrfs_inode_ref_name_len(node, ref);
4327         if (cur + sizeof(*ref) + name_len > total ||
4328             name_len > BTRFS_NAME_LEN) {
4329                 warning("root %llu INODE_REF[%llu %llu] name too long",
4330                         root->objectid, ref_key->objectid, ref_key->offset);
4331
4332                 if (total < cur + sizeof(*ref))
4333                         goto out;
4334                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4335         } else {
4336                 len = name_len;
4337         }
4338
4339         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4340
4341         /* Check root dir ref name */
4342         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4343                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4344                       root->objectid, ref_key->objectid, ref_key->offset,
4345                       namebuf);
4346                 err |= ROOT_DIR_ERROR;
4347         }
4348
4349         /* Find related DIR_INDEX */
4350         key.objectid = ref_key->offset;
4351         key.type = BTRFS_DIR_INDEX_KEY;
4352         key.offset = index;
4353         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4354         err |= ret;
4355
4356         /* Find related dir_item */
4357         key.objectid = ref_key->offset;
4358         key.type = BTRFS_DIR_ITEM_KEY;
4359         key.offset = btrfs_name_hash(namebuf, len);
4360         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4361         err |= ret;
4362
4363         len = sizeof(*ref) + name_len;
4364         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4365         cur += len;
4366         if (cur < total)
4367                 goto next;
4368
4369 out:
4370         return err;
4371 }
4372
4373 /*
4374  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4375  * DIR_ITEM/DIR_INDEX.
4376  *
4377  * @root:       the root of the fs/file tree
4378  * @ref_key:    the key of the INODE_EXTREF
4379  * @refs:       the count of INODE_EXTREF
4380  * @mode:       the st_mode of INODE_ITEM
4381  *
4382  * Return 0 if no error occurred.
4383  */
4384 static int check_inode_extref(struct btrfs_root *root,
4385                               struct btrfs_key *ref_key,
4386                               struct extent_buffer *node, int slot, u64 *refs,
4387                               int mode)
4388 {
4389         struct btrfs_key key;
4390         struct btrfs_inode_extref *extref;
4391         char namebuf[BTRFS_NAME_LEN] = {0};
4392         u32 total;
4393         u32 cur = 0;
4394         u32 len;
4395         u32 name_len;
4396         u64 index;
4397         u64 parent;
4398         int ret;
4399         int err = 0;
4400
4401         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4402         total = btrfs_item_size_nr(node, slot);
4403
4404 next:
4405         /* update inode ref count */
4406         (*refs)++;
4407         name_len = btrfs_inode_extref_name_len(node, extref);
4408         index = btrfs_inode_extref_index(node, extref);
4409         parent = btrfs_inode_extref_parent(node, extref);
4410         if (name_len <= BTRFS_NAME_LEN) {
4411                 len = name_len;
4412         } else {
4413                 len = BTRFS_NAME_LEN;
4414                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4415                         root->objectid, ref_key->objectid, ref_key->offset);
4416         }
4417         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4418
4419         /* Check root dir ref name */
4420         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4421                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4422                       root->objectid, ref_key->objectid, ref_key->offset,
4423                       namebuf);
4424                 err |= ROOT_DIR_ERROR;
4425         }
4426
4427         /* find related dir_index */
4428         key.objectid = parent;
4429         key.type = BTRFS_DIR_INDEX_KEY;
4430         key.offset = index;
4431         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4432         err |= ret;
4433
4434         /* find related dir_item */
4435         key.objectid = parent;
4436         key.type = BTRFS_DIR_ITEM_KEY;
4437         key.offset = btrfs_name_hash(namebuf, len);
4438         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4439         err |= ret;
4440
4441         len = sizeof(*extref) + name_len;
4442         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4443         cur += len;
4444
4445         if (cur < total)
4446                 goto next;
4447
4448         return err;
4449 }
4450
4451 /*
4452  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4453  * DIR_ITEM/DIR_INDEX match.
4454  *
4455  * @root:       the root of the fs/file tree
4456  * @key:        the key of the INODE_REF/INODE_EXTREF
4457  * @name:       the name in the INODE_REF/INODE_EXTREF
4458  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4459  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4460  * to (u64)-1
4461  * @ext_ref:    the EXTENDED_IREF feature
4462  *
4463  * Return 0 if no error occurred.
4464  * Return >0 for error bitmap
4465  */
4466 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4467                           char *name, int namelen, u64 index,
4468                           unsigned int ext_ref)
4469 {
4470         struct btrfs_path path;
4471         struct btrfs_inode_ref *ref;
4472         struct btrfs_inode_extref *extref;
4473         struct extent_buffer *node;
4474         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4475         u32 total;
4476         u32 cur = 0;
4477         u32 len;
4478         u32 ref_namelen;
4479         u64 ref_index;
4480         u64 parent;
4481         u64 dir_id;
4482         int slot;
4483         int ret;
4484
4485         btrfs_init_path(&path);
4486         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4487         if (ret) {
4488                 ret = INODE_REF_MISSING;
4489                 goto extref;
4490         }
4491
4492         node = path.nodes[0];
4493         slot = path.slots[0];
4494
4495         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4496         total = btrfs_item_size_nr(node, slot);
4497
4498         /* Iterate all entry of INODE_REF */
4499         while (cur < total) {
4500                 ret = INODE_REF_MISSING;
4501
4502                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4503                 ref_index = btrfs_inode_ref_index(node, ref);
4504                 if (index != (u64)-1 && index != ref_index)
4505                         goto next_ref;
4506
4507                 if (cur + sizeof(*ref) + ref_namelen > total ||
4508                     ref_namelen > BTRFS_NAME_LEN) {
4509                         warning("root %llu INODE %s[%llu %llu] name too long",
4510                                 root->objectid,
4511                                 key->type == BTRFS_INODE_REF_KEY ?
4512                                         "REF" : "EXTREF",
4513                                 key->objectid, key->offset);
4514
4515                         if (cur + sizeof(*ref) > total)
4516                                 break;
4517                         len = min_t(u32, total - cur - sizeof(*ref),
4518                                     BTRFS_NAME_LEN);
4519                 } else {
4520                         len = ref_namelen;
4521                 }
4522
4523                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4524                                    len);
4525
4526                 if (len != namelen || strncmp(ref_namebuf, name, len))
4527                         goto next_ref;
4528
4529                 ret = 0;
4530                 goto out;
4531 next_ref:
4532                 len = sizeof(*ref) + ref_namelen;
4533                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4534                 cur += len;
4535         }
4536
4537 extref:
4538         /* Skip if not support EXTENDED_IREF feature */
4539         if (!ext_ref)
4540                 goto out;
4541
4542         btrfs_release_path(&path);
4543         btrfs_init_path(&path);
4544
4545         dir_id = key->offset;
4546         key->type = BTRFS_INODE_EXTREF_KEY;
4547         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4548
4549         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4550         if (ret) {
4551                 ret = INODE_REF_MISSING;
4552                 goto out;
4553         }
4554
4555         node = path.nodes[0];
4556         slot = path.slots[0];
4557
4558         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4559         cur = 0;
4560         total = btrfs_item_size_nr(node, slot);
4561
4562         /* Iterate all entry of INODE_EXTREF */
4563         while (cur < total) {
4564                 ret = INODE_REF_MISSING;
4565
4566                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4567                 ref_index = btrfs_inode_extref_index(node, extref);
4568                 parent = btrfs_inode_extref_parent(node, extref);
4569                 if (index != (u64)-1 && index != ref_index)
4570                         goto next_extref;
4571
4572                 if (parent != dir_id)
4573                         goto next_extref;
4574
4575                 if (ref_namelen <= BTRFS_NAME_LEN) {
4576                         len = ref_namelen;
4577                 } else {
4578                         len = BTRFS_NAME_LEN;
4579                         warning("root %llu INODE %s[%llu %llu] name too long",
4580                                 root->objectid,
4581                                 key->type == BTRFS_INODE_REF_KEY ?
4582                                         "REF" : "EXTREF",
4583                                 key->objectid, key->offset);
4584                 }
4585                 read_extent_buffer(node, ref_namebuf,
4586                                    (unsigned long)(extref + 1), len);
4587
4588                 if (len != namelen || strncmp(ref_namebuf, name, len))
4589                         goto next_extref;
4590
4591                 ret = 0;
4592                 goto out;
4593
4594 next_extref:
4595                 len = sizeof(*extref) + ref_namelen;
4596                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4597                 cur += len;
4598
4599         }
4600 out:
4601         btrfs_release_path(&path);
4602         return ret;
4603 }
4604
4605 /*
4606  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4607  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4608  *
4609  * @root:       the root of the fs/file tree
4610  * @key:        the key of the INODE_REF/INODE_EXTREF
4611  * @size:       the st_size of the INODE_ITEM
4612  * @ext_ref:    the EXTENDED_IREF feature
4613  *
4614  * Return 0 if no error occurred.
4615  */
4616 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4617                           struct extent_buffer *node, int slot, u64 *size,
4618                           unsigned int ext_ref)
4619 {
4620         struct btrfs_dir_item *di;
4621         struct btrfs_inode_item *ii;
4622         struct btrfs_path path;
4623         struct btrfs_key location;
4624         char namebuf[BTRFS_NAME_LEN] = {0};
4625         u32 total;
4626         u32 cur = 0;
4627         u32 len;
4628         u32 name_len;
4629         u32 data_len;
4630         u8 filetype;
4631         u32 mode;
4632         u64 index;
4633         int ret;
4634         int err = 0;
4635
4636         /*
4637          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4638          * ignore index check.
4639          */
4640         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4641
4642         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4643         total = btrfs_item_size_nr(node, slot);
4644
4645         while (cur < total) {
4646                 data_len = btrfs_dir_data_len(node, di);
4647                 if (data_len)
4648                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX",
4651                               key->objectid, key->offset, data_len);
4652
4653                 name_len = btrfs_dir_name_len(node, di);
4654                 if (cur + sizeof(*di) + name_len > total ||
4655                     name_len > BTRFS_NAME_LEN) {
4656                         warning("root %llu %s[%llu %llu] name too long",
4657                                 root->objectid,
4658                                 key->type == BTRFS_DIR_ITEM_KEY ?
4659                                 "DIR_ITEM" : "DIR_INDEX",
4660                                 key->objectid, key->offset);
4661
4662                         if (cur + sizeof(*di) > total)
4663                                 break;
4664                         len = min_t(u32, total - cur - sizeof(*di),
4665                                     BTRFS_NAME_LEN);
4666                 } else {
4667                         len = name_len;
4668                 }
4669                 (*size) += name_len;
4670
4671                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4672                 filetype = btrfs_dir_type(node, di);
4673
4674                 btrfs_init_path(&path);
4675                 btrfs_dir_item_key_to_cpu(node, di, &location);
4676
4677                 /* Ignore related ROOT_ITEM check */
4678                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4679                         goto next;
4680
4681                 /* Check relative INODE_ITEM(existence/filetype) */
4682                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4683                 if (ret) {
4684                         err |= INODE_ITEM_MISSING;
4685                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4686                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4687                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4688                               key->offset, location.objectid, name_len,
4689                               namebuf, filetype);
4690                         goto next;
4691                 }
4692
4693                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4694                                     struct btrfs_inode_item);
4695                 mode = btrfs_inode_mode(path.nodes[0], ii);
4696
4697                 if (imode_to_type(mode) != filetype) {
4698                         err |= INODE_ITEM_MISMATCH;
4699                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4700                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4701                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4702                               key->offset, name_len, namebuf, filetype);
4703                 }
4704
4705                 /* Check relative INODE_REF/INODE_EXTREF */
4706                 location.type = BTRFS_INODE_REF_KEY;
4707                 location.offset = key->objectid;
4708                 ret = find_inode_ref(root, &location, namebuf, len,
4709                                        index, ext_ref);
4710                 err |= ret;
4711                 if (ret & INODE_REF_MISSING)
4712                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4713                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4714                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4715                               key->offset, name_len, namebuf, filetype);
4716
4717 next:
4718                 btrfs_release_path(&path);
4719                 len = sizeof(*di) + name_len + data_len;
4720                 di = (struct btrfs_dir_item *)((char *)di + len);
4721                 cur += len;
4722
4723                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4724                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4725                               root->objectid, key->objectid, key->offset);
4726                         break;
4727                 }
4728         }
4729
4730         return err;
4731 }
4732
4733 /*
4734  * Check file extent datasum/hole, update the size of the file extents,
4735  * check and update the last offset of the file extent.
4736  *
4737  * @root:       the root of fs/file tree.
4738  * @fkey:       the key of the file extent.
4739  * @nodatasum:  INODE_NODATASUM feature.
4740  * @size:       the sum of all EXTENT_DATA items size for this inode.
4741  * @end:        the offset of the last extent.
4742  *
4743  * Return 0 if no error occurred.
4744  */
4745 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4746                              struct extent_buffer *node, int slot,
4747                              unsigned int nodatasum, u64 *size, u64 *end)
4748 {
4749         struct btrfs_file_extent_item *fi;
4750         u64 disk_bytenr;
4751         u64 disk_num_bytes;
4752         u64 extent_num_bytes;
4753         u64 extent_offset;
4754         u64 csum_found;         /* In byte size, sectorsize aligned */
4755         u64 search_start;       /* Logical range start we search for csum */
4756         u64 search_len;         /* Logical range len we search for csum */
4757         unsigned int extent_type;
4758         unsigned int is_hole;
4759         int compressed = 0;
4760         int ret;
4761         int err = 0;
4762
4763         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4764
4765         /* Check inline extent */
4766         extent_type = btrfs_file_extent_type(node, fi);
4767         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4768                 struct btrfs_item *e = btrfs_item_nr(slot);
4769                 u32 item_inline_len;
4770
4771                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4772                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4773                 compressed = btrfs_file_extent_compression(node, fi);
4774                 if (extent_num_bytes == 0) {
4775                         error(
4776                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4777                                 root->objectid, fkey->objectid, fkey->offset);
4778                         err |= FILE_EXTENT_ERROR;
4779                 }
4780                 if (!compressed && extent_num_bytes != item_inline_len) {
4781                         error(
4782                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4783                                 root->objectid, fkey->objectid, fkey->offset,
4784                                 extent_num_bytes, item_inline_len);
4785                         err |= FILE_EXTENT_ERROR;
4786                 }
4787                 *end += extent_num_bytes;
4788                 *size += extent_num_bytes;
4789                 return err;
4790         }
4791
4792         /* Check extent type */
4793         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4794                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4795                 err |= FILE_EXTENT_ERROR;
4796                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4797                       root->objectid, fkey->objectid, fkey->offset);
4798                 return err;
4799         }
4800
4801         /* Check REG_EXTENT/PREALLOC_EXTENT */
4802         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4803         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4804         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4805         extent_offset = btrfs_file_extent_offset(node, fi);
4806         compressed = btrfs_file_extent_compression(node, fi);
4807         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4808
4809         /*
4810          * Check EXTENT_DATA csum
4811          *
4812          * For plain (uncompressed) extent, we should only check the range
4813          * we're referring to, as it's possible that part of prealloc extent
4814          * has been written, and has csum:
4815          *
4816          * |<--- Original large preallocated extent A ---->|
4817          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4818          *      No csum                         Has csum
4819          *
4820          * For compressed extent, we should check the whole range.
4821          */
4822         if (!compressed) {
4823                 search_start = disk_bytenr + extent_offset;
4824                 search_len = extent_num_bytes;
4825         } else {
4826                 search_start = disk_bytenr;
4827                 search_len = disk_num_bytes;
4828         }
4829         ret = count_csum_range(root, search_start, search_len, &csum_found);
4830         if (csum_found > 0 && nodatasum) {
4831                 err |= ODD_CSUM_ITEM;
4832                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4833                       root->objectid, fkey->objectid, fkey->offset);
4834         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4835                    !is_hole && (ret < 0 || csum_found < search_len)) {
4836                 err |= CSUM_ITEM_MISSING;
4837                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4838                       root->objectid, fkey->objectid, fkey->offset,
4839                       csum_found, search_len);
4840         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4841                 err |= ODD_CSUM_ITEM;
4842                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4843                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4844         }
4845
4846         /* Check EXTENT_DATA hole */
4847         if (!no_holes && *end != fkey->offset) {
4848                 err |= FILE_EXTENT_ERROR;
4849                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4850                       root->objectid, fkey->objectid, fkey->offset);
4851         }
4852
4853         *end += extent_num_bytes;
4854         if (!is_hole)
4855                 *size += extent_num_bytes;
4856
4857         return err;
4858 }
4859
4860 /*
4861  * Check INODE_ITEM and related ITEMs (the same inode number)
4862  * 1. check link count
4863  * 2. check inode ref/extref
4864  * 3. check dir item/index
4865  *
4866  * @ext_ref:    the EXTENDED_IREF feature
4867  *
4868  * Return 0 if no error occurred.
4869  * Return >0 for error or hit the traversal is done(by error bitmap)
4870  */
4871 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4872                             unsigned int ext_ref)
4873 {
4874         struct extent_buffer *node;
4875         struct btrfs_inode_item *ii;
4876         struct btrfs_key key;
4877         u64 inode_id;
4878         u32 mode;
4879         u64 nlink;
4880         u64 nbytes;
4881         u64 isize;
4882         u64 size = 0;
4883         u64 refs = 0;
4884         u64 extent_end = 0;
4885         u64 extent_size = 0;
4886         unsigned int dir;
4887         unsigned int nodatasum;
4888         int slot;
4889         int ret;
4890         int err = 0;
4891
4892         node = path->nodes[0];
4893         slot = path->slots[0];
4894
4895         btrfs_item_key_to_cpu(node, &key, slot);
4896         inode_id = key.objectid;
4897
4898         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4899                 ret = btrfs_next_item(root, path);
4900                 if (ret > 0)
4901                         err |= LAST_ITEM;
4902                 return err;
4903         }
4904
4905         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4906         isize = btrfs_inode_size(node, ii);
4907         nbytes = btrfs_inode_nbytes(node, ii);
4908         mode = btrfs_inode_mode(node, ii);
4909         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4910         nlink = btrfs_inode_nlink(node, ii);
4911         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4912
4913         while (1) {
4914                 ret = btrfs_next_item(root, path);
4915                 if (ret < 0) {
4916                         /* out will fill 'err' rusing current statistics */
4917                         goto out;
4918                 } else if (ret > 0) {
4919                         err |= LAST_ITEM;
4920                         goto out;
4921                 }
4922
4923                 node = path->nodes[0];
4924                 slot = path->slots[0];
4925                 btrfs_item_key_to_cpu(node, &key, slot);
4926                 if (key.objectid != inode_id)
4927                         goto out;
4928
4929                 switch (key.type) {
4930                 case BTRFS_INODE_REF_KEY:
4931                         ret = check_inode_ref(root, &key, node, slot, &refs,
4932                                               mode);
4933                         err |= ret;
4934                         break;
4935                 case BTRFS_INODE_EXTREF_KEY:
4936                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4937                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4938                                         root->objectid, key.objectid,
4939                                         key.offset);
4940                         ret = check_inode_extref(root, &key, node, slot, &refs,
4941                                                  mode);
4942                         err |= ret;
4943                         break;
4944                 case BTRFS_DIR_ITEM_KEY:
4945                 case BTRFS_DIR_INDEX_KEY:
4946                         if (!dir) {
4947                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4948                                         root->objectid, inode_id,
4949                                         imode_to_type(mode), key.objectid,
4950                                         key.offset);
4951                         }
4952                         ret = check_dir_item(root, &key, node, slot, &size,
4953                                              ext_ref);
4954                         err |= ret;
4955                         break;
4956                 case BTRFS_EXTENT_DATA_KEY:
4957                         if (dir) {
4958                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4959                                         root->objectid, inode_id, key.objectid,
4960                                         key.offset);
4961                         }
4962                         ret = check_file_extent(root, &key, node, slot,
4963                                                 nodatasum, &extent_size,
4964                                                 &extent_end);
4965                         err |= ret;
4966                         break;
4967                 case BTRFS_XATTR_ITEM_KEY:
4968                         break;
4969                 default:
4970                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4971                               key.objectid, key.type, key.offset);
4972                 }
4973         }
4974
4975 out:
4976         /* verify INODE_ITEM nlink/isize/nbytes */
4977         if (dir) {
4978                 if (nlink != 1) {
4979                         err |= LINK_COUNT_ERROR;
4980                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4981                               root->objectid, inode_id, nlink);
4982                 }
4983
4984                 /*
4985                  * Just a warning, as dir inode nbytes is just an
4986                  * instructive value.
4987                  */
4988                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4989                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4990                                 root->objectid, inode_id,
4991                                 root->fs_info->nodesize);
4992                 }
4993
4994                 if (isize != size) {
4995                         err |= ISIZE_ERROR;
4996                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4997                               root->objectid, inode_id, isize, size);
4998                 }
4999         } else {
5000                 if (nlink != refs) {
5001                         err |= LINK_COUNT_ERROR;
5002                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5003                               root->objectid, inode_id, nlink, refs);
5004                 } else if (!nlink) {
5005                         err |= ORPHAN_ITEM;
5006                 }
5007
5008                 if (!nbytes && !no_holes && extent_end < isize) {
5009                         err |= NBYTES_ERROR;
5010                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5011                               root->objectid, inode_id, isize);
5012                 }
5013
5014                 if (nbytes != extent_size) {
5015                         err |= NBYTES_ERROR;
5016                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5017                               root->objectid, inode_id, nbytes, extent_size);
5018                 }
5019         }
5020
5021         return err;
5022 }
5023
5024 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5025 {
5026         struct btrfs_path path;
5027         struct btrfs_key key;
5028         int err = 0;
5029         int ret;
5030
5031         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5032         key.type = BTRFS_INODE_ITEM_KEY;
5033         key.offset = 0;
5034
5035         /* For root being dropped, we don't need to check first inode */
5036         if (btrfs_root_refs(&root->root_item) == 0 &&
5037             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5038             key.objectid)
5039                 return 0;
5040
5041         btrfs_init_path(&path);
5042
5043         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5044         if (ret < 0)
5045                 goto out;
5046         if (ret > 0) {
5047                 ret = 0;
5048                 err |= INODE_ITEM_MISSING;
5049                 error("first inode item of root %llu is missing",
5050                       root->objectid);
5051         }
5052
5053         err |= check_inode_item(root, &path, ext_ref);
5054         err &= ~LAST_ITEM;
5055         if (err && !ret)
5056                 ret = -EIO;
5057 out:
5058         btrfs_release_path(&path);
5059         return ret;
5060 }
5061
5062 /*
5063  * Iterate all item on the tree and call check_inode_item() to check.
5064  *
5065  * @root:       the root of the tree to be checked.
5066  * @ext_ref:    the EXTENDED_IREF feature
5067  *
5068  * Return 0 if no error found.
5069  * Return <0 for error.
5070  */
5071 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5072 {
5073         struct btrfs_path path;
5074         struct node_refs nrefs;
5075         struct btrfs_root_item *root_item = &root->root_item;
5076         int ret;
5077         int level;
5078         int err = 0;
5079
5080         /*
5081          * We need to manually check the first inode item(256)
5082          * As the following traversal function will only start from
5083          * the first inode item in the leaf, if inode item(256) is missing
5084          * we will just skip it forever.
5085          */
5086         ret = check_fs_first_inode(root, ext_ref);
5087         if (ret < 0)
5088                 return ret;
5089
5090         memset(&nrefs, 0, sizeof(nrefs));
5091         level = btrfs_header_level(root->node);
5092         btrfs_init_path(&path);
5093
5094         if (btrfs_root_refs(root_item) > 0 ||
5095             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5096                 path.nodes[level] = root->node;
5097                 path.slots[level] = 0;
5098                 extent_buffer_get(root->node);
5099         } else {
5100                 struct btrfs_key key;
5101
5102                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5103                 level = root_item->drop_level;
5104                 path.lowest_level = level;
5105                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5106                 if (ret < 0)
5107                         goto out;
5108                 ret = 0;
5109         }
5110
5111         while (1) {
5112                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5113                 err |= !!ret;
5114
5115                 /* if ret is negative, walk shall stop */
5116                 if (ret < 0) {
5117                         ret = err;
5118                         break;
5119                 }
5120
5121                 ret = walk_up_tree_v2(root, &path, &level);
5122                 if (ret != 0) {
5123                         /* Normal exit, reset ret to err */
5124                         ret = err;
5125                         break;
5126                 }
5127         }
5128
5129 out:
5130         btrfs_release_path(&path);
5131         return ret;
5132 }
5133
5134 /*
5135  * Find the relative ref for root_ref and root_backref.
5136  *
5137  * @root:       the root of the root tree.
5138  * @ref_key:    the key of the root ref.
5139  *
5140  * Return 0 if no error occurred.
5141  */
5142 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5143                           struct extent_buffer *node, int slot)
5144 {
5145         struct btrfs_path path;
5146         struct btrfs_key key;
5147         struct btrfs_root_ref *ref;
5148         struct btrfs_root_ref *backref;
5149         char ref_name[BTRFS_NAME_LEN] = {0};
5150         char backref_name[BTRFS_NAME_LEN] = {0};
5151         u64 ref_dirid;
5152         u64 ref_seq;
5153         u32 ref_namelen;
5154         u64 backref_dirid;
5155         u64 backref_seq;
5156         u32 backref_namelen;
5157         u32 len;
5158         int ret;
5159         int err = 0;
5160
5161         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5162         ref_dirid = btrfs_root_ref_dirid(node, ref);
5163         ref_seq = btrfs_root_ref_sequence(node, ref);
5164         ref_namelen = btrfs_root_ref_name_len(node, ref);
5165
5166         if (ref_namelen <= BTRFS_NAME_LEN) {
5167                 len = ref_namelen;
5168         } else {
5169                 len = BTRFS_NAME_LEN;
5170                 warning("%s[%llu %llu] ref_name too long",
5171                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5172                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5173                         ref_key->offset);
5174         }
5175         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5176
5177         /* Find relative root_ref */
5178         key.objectid = ref_key->offset;
5179         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5180         key.offset = ref_key->objectid;
5181
5182         btrfs_init_path(&path);
5183         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5184         if (ret) {
5185                 err |= ROOT_REF_MISSING;
5186                 error("%s[%llu %llu] couldn't find relative ref",
5187                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5188                       "ROOT_REF" : "ROOT_BACKREF",
5189                       ref_key->objectid, ref_key->offset);
5190                 goto out;
5191         }
5192
5193         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5194                                  struct btrfs_root_ref);
5195         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5196         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5197         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5198
5199         if (backref_namelen <= BTRFS_NAME_LEN) {
5200                 len = backref_namelen;
5201         } else {
5202                 len = BTRFS_NAME_LEN;
5203                 warning("%s[%llu %llu] ref_name too long",
5204                         key.type == BTRFS_ROOT_REF_KEY ?
5205                         "ROOT_REF" : "ROOT_BACKREF",
5206                         key.objectid, key.offset);
5207         }
5208         read_extent_buffer(path.nodes[0], backref_name,
5209                            (unsigned long)(backref + 1), len);
5210
5211         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5212             ref_namelen != backref_namelen ||
5213             strncmp(ref_name, backref_name, len)) {
5214                 err |= ROOT_REF_MISMATCH;
5215                 error("%s[%llu %llu] mismatch relative ref",
5216                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5217                       "ROOT_REF" : "ROOT_BACKREF",
5218                       ref_key->objectid, ref_key->offset);
5219         }
5220 out:
5221         btrfs_release_path(&path);
5222         return err;
5223 }
5224
5225 /*
5226  * Check all fs/file tree in low_memory mode.
5227  *
5228  * 1. for fs tree root item, call check_fs_root_v2()
5229  * 2. for fs tree root ref/backref, call check_root_ref()
5230  *
5231  * Return 0 if no error occurred.
5232  */
5233 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5234 {
5235         struct btrfs_root *tree_root = fs_info->tree_root;
5236         struct btrfs_root *cur_root = NULL;
5237         struct btrfs_path path;
5238         struct btrfs_key key;
5239         struct extent_buffer *node;
5240         unsigned int ext_ref;
5241         int slot;
5242         int ret;
5243         int err = 0;
5244
5245         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5246
5247         btrfs_init_path(&path);
5248         key.objectid = BTRFS_FS_TREE_OBJECTID;
5249         key.offset = 0;
5250         key.type = BTRFS_ROOT_ITEM_KEY;
5251
5252         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5253         if (ret < 0) {
5254                 err = ret;
5255                 goto out;
5256         } else if (ret > 0) {
5257                 err = -ENOENT;
5258                 goto out;
5259         }
5260
5261         while (1) {
5262                 node = path.nodes[0];
5263                 slot = path.slots[0];
5264                 btrfs_item_key_to_cpu(node, &key, slot);
5265                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5266                         goto out;
5267                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5268                     fs_root_objectid(key.objectid)) {
5269                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5270                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5271                                                                        &key);
5272                         } else {
5273                                 key.offset = (u64)-1;
5274                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5275                         }
5276
5277                         if (IS_ERR(cur_root)) {
5278                                 error("Fail to read fs/subvol tree: %lld",
5279                                       key.objectid);
5280                                 err = -EIO;
5281                                 goto next;
5282                         }
5283
5284                         ret = check_fs_root_v2(cur_root, ext_ref);
5285                         err |= ret;
5286
5287                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5288                                 btrfs_free_fs_root(cur_root);
5289                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5290                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5291                         ret = check_root_ref(tree_root, &key, node, slot);
5292                         err |= ret;
5293                 }
5294 next:
5295                 ret = btrfs_next_item(tree_root, &path);
5296                 if (ret > 0)
5297                         goto out;
5298                 if (ret < 0) {
5299                         err = ret;
5300                         goto out;
5301                 }
5302         }
5303
5304 out:
5305         btrfs_release_path(&path);
5306         return err;
5307 }
5308
5309 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5310 {
5311         struct list_head *cur = rec->backrefs.next;
5312         struct extent_backref *back;
5313         struct tree_backref *tback;
5314         struct data_backref *dback;
5315         u64 found = 0;
5316         int err = 0;
5317
5318         while(cur != &rec->backrefs) {
5319                 back = to_extent_backref(cur);
5320                 cur = cur->next;
5321                 if (!back->found_extent_tree) {
5322                         err = 1;
5323                         if (!print_errs)
5324                                 goto out;
5325                         if (back->is_data) {
5326                                 dback = to_data_backref(back);
5327                                 fprintf(stderr, "Backref %llu %s %llu"
5328                                         " owner %llu offset %llu num_refs %lu"
5329                                         " not found in extent tree\n",
5330                                         (unsigned long long)rec->start,
5331                                         back->full_backref ?
5332                                         "parent" : "root",
5333                                         back->full_backref ?
5334                                         (unsigned long long)dback->parent:
5335                                         (unsigned long long)dback->root,
5336                                         (unsigned long long)dback->owner,
5337                                         (unsigned long long)dback->offset,
5338                                         (unsigned long)dback->num_refs);
5339                         } else {
5340                                 tback = to_tree_backref(back);
5341                                 fprintf(stderr, "Backref %llu parent %llu"
5342                                         " root %llu not found in extent tree\n",
5343                                         (unsigned long long)rec->start,
5344                                         (unsigned long long)tback->parent,
5345                                         (unsigned long long)tback->root);
5346                         }
5347                 }
5348                 if (!back->is_data && !back->found_ref) {
5349                         err = 1;
5350                         if (!print_errs)
5351                                 goto out;
5352                         tback = to_tree_backref(back);
5353                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5354                                 (unsigned long long)rec->start,
5355                                 back->full_backref ? "parent" : "root",
5356                                 back->full_backref ?
5357                                 (unsigned long long)tback->parent :
5358                                 (unsigned long long)tback->root, back);
5359                 }
5360                 if (back->is_data) {
5361                         dback = to_data_backref(back);
5362                         if (dback->found_ref != dback->num_refs) {
5363                                 err = 1;
5364                                 if (!print_errs)
5365                                         goto out;
5366                                 fprintf(stderr, "Incorrect local backref count"
5367                                         " on %llu %s %llu owner %llu"
5368                                         " offset %llu found %u wanted %u back %p\n",
5369                                         (unsigned long long)rec->start,
5370                                         back->full_backref ?
5371                                         "parent" : "root",
5372                                         back->full_backref ?
5373                                         (unsigned long long)dback->parent:
5374                                         (unsigned long long)dback->root,
5375                                         (unsigned long long)dback->owner,
5376                                         (unsigned long long)dback->offset,
5377                                         dback->found_ref, dback->num_refs, back);
5378                         }
5379                         if (dback->disk_bytenr != rec->start) {
5380                                 err = 1;
5381                                 if (!print_errs)
5382                                         goto out;
5383                                 fprintf(stderr, "Backref disk bytenr does not"
5384                                         " match extent record, bytenr=%llu, "
5385                                         "ref bytenr=%llu\n",
5386                                         (unsigned long long)rec->start,
5387                                         (unsigned long long)dback->disk_bytenr);
5388                         }
5389
5390                         if (dback->bytes != rec->nr) {
5391                                 err = 1;
5392                                 if (!print_errs)
5393                                         goto out;
5394                                 fprintf(stderr, "Backref bytes do not match "
5395                                         "extent backref, bytenr=%llu, ref "
5396                                         "bytes=%llu, backref bytes=%llu\n",
5397                                         (unsigned long long)rec->start,
5398                                         (unsigned long long)rec->nr,
5399                                         (unsigned long long)dback->bytes);
5400                         }
5401                 }
5402                 if (!back->is_data) {
5403                         found += 1;
5404                 } else {
5405                         dback = to_data_backref(back);
5406                         found += dback->found_ref;
5407                 }
5408         }
5409         if (found != rec->refs) {
5410                 err = 1;
5411                 if (!print_errs)
5412                         goto out;
5413                 fprintf(stderr, "Incorrect global backref count "
5414                         "on %llu found %llu wanted %llu\n",
5415                         (unsigned long long)rec->start,
5416                         (unsigned long long)found,
5417                         (unsigned long long)rec->refs);
5418         }
5419 out:
5420         return err;
5421 }
5422
5423 static int free_all_extent_backrefs(struct extent_record *rec)
5424 {
5425         struct extent_backref *back;
5426         struct list_head *cur;
5427         while (!list_empty(&rec->backrefs)) {
5428                 cur = rec->backrefs.next;
5429                 back = to_extent_backref(cur);
5430                 list_del(cur);
5431                 free(back);
5432         }
5433         return 0;
5434 }
5435
5436 static void free_extent_record_cache(struct cache_tree *extent_cache)
5437 {
5438         struct cache_extent *cache;
5439         struct extent_record *rec;
5440
5441         while (1) {
5442                 cache = first_cache_extent(extent_cache);
5443                 if (!cache)
5444                         break;
5445                 rec = container_of(cache, struct extent_record, cache);
5446                 remove_cache_extent(extent_cache, cache);
5447                 free_all_extent_backrefs(rec);
5448                 free(rec);
5449         }
5450 }
5451
5452 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5453                                  struct extent_record *rec)
5454 {
5455         if (rec->content_checked && rec->owner_ref_checked &&
5456             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5457             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5458             !rec->bad_full_backref && !rec->crossing_stripes &&
5459             !rec->wrong_chunk_type) {
5460                 remove_cache_extent(extent_cache, &rec->cache);
5461                 free_all_extent_backrefs(rec);
5462                 list_del_init(&rec->list);
5463                 free(rec);
5464         }
5465         return 0;
5466 }
5467
5468 static int check_owner_ref(struct btrfs_root *root,
5469                             struct extent_record *rec,
5470                             struct extent_buffer *buf)
5471 {
5472         struct extent_backref *node;
5473         struct tree_backref *back;
5474         struct btrfs_root *ref_root;
5475         struct btrfs_key key;
5476         struct btrfs_path path;
5477         struct extent_buffer *parent;
5478         int level;
5479         int found = 0;
5480         int ret;
5481
5482         list_for_each_entry(node, &rec->backrefs, list) {
5483                 if (node->is_data)
5484                         continue;
5485                 if (!node->found_ref)
5486                         continue;
5487                 if (node->full_backref)
5488                         continue;
5489                 back = to_tree_backref(node);
5490                 if (btrfs_header_owner(buf) == back->root)
5491                         return 0;
5492         }
5493         BUG_ON(rec->is_root);
5494
5495         /* try to find the block by search corresponding fs tree */
5496         key.objectid = btrfs_header_owner(buf);
5497         key.type = BTRFS_ROOT_ITEM_KEY;
5498         key.offset = (u64)-1;
5499
5500         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5501         if (IS_ERR(ref_root))
5502                 return 1;
5503
5504         level = btrfs_header_level(buf);
5505         if (level == 0)
5506                 btrfs_item_key_to_cpu(buf, &key, 0);
5507         else
5508                 btrfs_node_key_to_cpu(buf, &key, 0);
5509
5510         btrfs_init_path(&path);
5511         path.lowest_level = level + 1;
5512         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5513         if (ret < 0)
5514                 return 0;
5515
5516         parent = path.nodes[level + 1];
5517         if (parent && buf->start == btrfs_node_blockptr(parent,
5518                                                         path.slots[level + 1]))
5519                 found = 1;
5520
5521         btrfs_release_path(&path);
5522         return found ? 0 : 1;
5523 }
5524
5525 static int is_extent_tree_record(struct extent_record *rec)
5526 {
5527         struct list_head *cur = rec->backrefs.next;
5528         struct extent_backref *node;
5529         struct tree_backref *back;
5530         int is_extent = 0;
5531
5532         while(cur != &rec->backrefs) {
5533                 node = to_extent_backref(cur);
5534                 cur = cur->next;
5535                 if (node->is_data)
5536                         return 0;
5537                 back = to_tree_backref(node);
5538                 if (node->full_backref)
5539                         return 0;
5540                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5541                         is_extent = 1;
5542         }
5543         return is_extent;
5544 }
5545
5546
5547 static int record_bad_block_io(struct btrfs_fs_info *info,
5548                                struct cache_tree *extent_cache,
5549                                u64 start, u64 len)
5550 {
5551         struct extent_record *rec;
5552         struct cache_extent *cache;
5553         struct btrfs_key key;
5554
5555         cache = lookup_cache_extent(extent_cache, start, len);
5556         if (!cache)
5557                 return 0;
5558
5559         rec = container_of(cache, struct extent_record, cache);
5560         if (!is_extent_tree_record(rec))
5561                 return 0;
5562
5563         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5564         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5565 }
5566
5567 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5568                        struct extent_buffer *buf, int slot)
5569 {
5570         if (btrfs_header_level(buf)) {
5571                 struct btrfs_key_ptr ptr1, ptr2;
5572
5573                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5574                                    sizeof(struct btrfs_key_ptr));
5575                 read_extent_buffer(buf, &ptr2,
5576                                    btrfs_node_key_ptr_offset(slot + 1),
5577                                    sizeof(struct btrfs_key_ptr));
5578                 write_extent_buffer(buf, &ptr1,
5579                                     btrfs_node_key_ptr_offset(slot + 1),
5580                                     sizeof(struct btrfs_key_ptr));
5581                 write_extent_buffer(buf, &ptr2,
5582                                     btrfs_node_key_ptr_offset(slot),
5583                                     sizeof(struct btrfs_key_ptr));
5584                 if (slot == 0) {
5585                         struct btrfs_disk_key key;
5586                         btrfs_node_key(buf, &key, 0);
5587                         btrfs_fixup_low_keys(root, path, &key,
5588                                              btrfs_header_level(buf) + 1);
5589                 }
5590         } else {
5591                 struct btrfs_item *item1, *item2;
5592                 struct btrfs_key k1, k2;
5593                 char *item1_data, *item2_data;
5594                 u32 item1_offset, item2_offset, item1_size, item2_size;
5595
5596                 item1 = btrfs_item_nr(slot);
5597                 item2 = btrfs_item_nr(slot + 1);
5598                 btrfs_item_key_to_cpu(buf, &k1, slot);
5599                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5600                 item1_offset = btrfs_item_offset(buf, item1);
5601                 item2_offset = btrfs_item_offset(buf, item2);
5602                 item1_size = btrfs_item_size(buf, item1);
5603                 item2_size = btrfs_item_size(buf, item2);
5604
5605                 item1_data = malloc(item1_size);
5606                 if (!item1_data)
5607                         return -ENOMEM;
5608                 item2_data = malloc(item2_size);
5609                 if (!item2_data) {
5610                         free(item1_data);
5611                         return -ENOMEM;
5612                 }
5613
5614                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5615                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5616
5617                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5618                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5619                 free(item1_data);
5620                 free(item2_data);
5621
5622                 btrfs_set_item_offset(buf, item1, item2_offset);
5623                 btrfs_set_item_offset(buf, item2, item1_offset);
5624                 btrfs_set_item_size(buf, item1, item2_size);
5625                 btrfs_set_item_size(buf, item2, item1_size);
5626
5627                 path->slots[0] = slot;
5628                 btrfs_set_item_key_unsafe(root, path, &k2);
5629                 path->slots[0] = slot + 1;
5630                 btrfs_set_item_key_unsafe(root, path, &k1);
5631         }
5632         return 0;
5633 }
5634
5635 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5636 {
5637         struct extent_buffer *buf;
5638         struct btrfs_key k1, k2;
5639         int i;
5640         int level = path->lowest_level;
5641         int ret = -EIO;
5642
5643         buf = path->nodes[level];
5644         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5645                 if (level) {
5646                         btrfs_node_key_to_cpu(buf, &k1, i);
5647                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5648                 } else {
5649                         btrfs_item_key_to_cpu(buf, &k1, i);
5650                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5651                 }
5652                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5653                         continue;
5654                 ret = swap_values(root, path, buf, i);
5655                 if (ret)
5656                         break;
5657                 btrfs_mark_buffer_dirty(buf);
5658                 i = 0;
5659         }
5660         return ret;
5661 }
5662
5663 static int delete_bogus_item(struct btrfs_root *root,
5664                              struct btrfs_path *path,
5665                              struct extent_buffer *buf, int slot)
5666 {
5667         struct btrfs_key key;
5668         int nritems = btrfs_header_nritems(buf);
5669
5670         btrfs_item_key_to_cpu(buf, &key, slot);
5671
5672         /* These are all the keys we can deal with missing. */
5673         if (key.type != BTRFS_DIR_INDEX_KEY &&
5674             key.type != BTRFS_EXTENT_ITEM_KEY &&
5675             key.type != BTRFS_METADATA_ITEM_KEY &&
5676             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5677             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5678                 return -1;
5679
5680         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5681                (unsigned long long)key.objectid, key.type,
5682                (unsigned long long)key.offset, slot, buf->start);
5683         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5684                               btrfs_item_nr_offset(slot + 1),
5685                               sizeof(struct btrfs_item) *
5686                               (nritems - slot - 1));
5687         btrfs_set_header_nritems(buf, nritems - 1);
5688         if (slot == 0) {
5689                 struct btrfs_disk_key disk_key;
5690
5691                 btrfs_item_key(buf, &disk_key, 0);
5692                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5693         }
5694         btrfs_mark_buffer_dirty(buf);
5695         return 0;
5696 }
5697
5698 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5699 {
5700         struct extent_buffer *buf;
5701         int i;
5702         int ret = 0;
5703
5704         /* We should only get this for leaves */
5705         BUG_ON(path->lowest_level);
5706         buf = path->nodes[0];
5707 again:
5708         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5709                 unsigned int shift = 0, offset;
5710
5711                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5712                     BTRFS_LEAF_DATA_SIZE(root)) {
5713                         if (btrfs_item_end_nr(buf, i) >
5714                             BTRFS_LEAF_DATA_SIZE(root)) {
5715                                 ret = delete_bogus_item(root, path, buf, i);
5716                                 if (!ret)
5717                                         goto again;
5718                                 fprintf(stderr, "item is off the end of the "
5719                                         "leaf, can't fix\n");
5720                                 ret = -EIO;
5721                                 break;
5722                         }
5723                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5724                                 btrfs_item_end_nr(buf, i);
5725                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5726                            btrfs_item_offset_nr(buf, i - 1)) {
5727                         if (btrfs_item_end_nr(buf, i) >
5728                             btrfs_item_offset_nr(buf, i - 1)) {
5729                                 ret = delete_bogus_item(root, path, buf, i);
5730                                 if (!ret)
5731                                         goto again;
5732                                 fprintf(stderr, "items overlap, can't fix\n");
5733                                 ret = -EIO;
5734                                 break;
5735                         }
5736                         shift = btrfs_item_offset_nr(buf, i - 1) -
5737                                 btrfs_item_end_nr(buf, i);
5738                 }
5739                 if (!shift)
5740                         continue;
5741
5742                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5743                        i, shift, (unsigned long long)buf->start);
5744                 offset = btrfs_item_offset_nr(buf, i);
5745                 memmove_extent_buffer(buf,
5746                                       btrfs_leaf_data(buf) + offset + shift,
5747                                       btrfs_leaf_data(buf) + offset,
5748                                       btrfs_item_size_nr(buf, i));
5749                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5750                                       offset + shift);
5751                 btrfs_mark_buffer_dirty(buf);
5752         }
5753
5754         /*
5755          * We may have moved things, in which case we want to exit so we don't
5756          * write those changes out.  Once we have proper abort functionality in
5757          * progs this can be changed to something nicer.
5758          */
5759         BUG_ON(ret);
5760         return ret;
5761 }
5762
5763 /*
5764  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5765  * then just return -EIO.
5766  */
5767 static int try_to_fix_bad_block(struct btrfs_root *root,
5768                                 struct extent_buffer *buf,
5769                                 enum btrfs_tree_block_status status)
5770 {
5771         struct btrfs_trans_handle *trans;
5772         struct ulist *roots;
5773         struct ulist_node *node;
5774         struct btrfs_root *search_root;
5775         struct btrfs_path path;
5776         struct ulist_iterator iter;
5777         struct btrfs_key root_key, key;
5778         int ret;
5779
5780         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5781             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5782                 return -EIO;
5783
5784         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5785         if (ret)
5786                 return -EIO;
5787
5788         btrfs_init_path(&path);
5789         ULIST_ITER_INIT(&iter);
5790         while ((node = ulist_next(roots, &iter))) {
5791                 root_key.objectid = node->val;
5792                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5793                 root_key.offset = (u64)-1;
5794
5795                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5796                 if (IS_ERR(root)) {
5797                         ret = -EIO;
5798                         break;
5799                 }
5800
5801
5802                 trans = btrfs_start_transaction(search_root, 0);
5803                 if (IS_ERR(trans)) {
5804                         ret = PTR_ERR(trans);
5805                         break;
5806                 }
5807
5808                 path.lowest_level = btrfs_header_level(buf);
5809                 path.skip_check_block = 1;
5810                 if (path.lowest_level)
5811                         btrfs_node_key_to_cpu(buf, &key, 0);
5812                 else
5813                         btrfs_item_key_to_cpu(buf, &key, 0);
5814                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5815                 if (ret) {
5816                         ret = -EIO;
5817                         btrfs_commit_transaction(trans, search_root);
5818                         break;
5819                 }
5820                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5821                         ret = fix_key_order(search_root, &path);
5822                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5823                         ret = fix_item_offset(search_root, &path);
5824                 if (ret) {
5825                         btrfs_commit_transaction(trans, search_root);
5826                         break;
5827                 }
5828                 btrfs_release_path(&path);
5829                 btrfs_commit_transaction(trans, search_root);
5830         }
5831         ulist_free(roots);
5832         btrfs_release_path(&path);
5833         return ret;
5834 }
5835
5836 static int check_block(struct btrfs_root *root,
5837                        struct cache_tree *extent_cache,
5838                        struct extent_buffer *buf, u64 flags)
5839 {
5840         struct extent_record *rec;
5841         struct cache_extent *cache;
5842         struct btrfs_key key;
5843         enum btrfs_tree_block_status status;
5844         int ret = 0;
5845         int level;
5846
5847         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5848         if (!cache)
5849                 return 1;
5850         rec = container_of(cache, struct extent_record, cache);
5851         rec->generation = btrfs_header_generation(buf);
5852
5853         level = btrfs_header_level(buf);
5854         if (btrfs_header_nritems(buf) > 0) {
5855
5856                 if (level == 0)
5857                         btrfs_item_key_to_cpu(buf, &key, 0);
5858                 else
5859                         btrfs_node_key_to_cpu(buf, &key, 0);
5860
5861                 rec->info_objectid = key.objectid;
5862         }
5863         rec->info_level = level;
5864
5865         if (btrfs_is_leaf(buf))
5866                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5867         else
5868                 status = btrfs_check_node(root, &rec->parent_key, buf);
5869
5870         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5871                 if (repair)
5872                         status = try_to_fix_bad_block(root, buf, status);
5873                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5874                         ret = -EIO;
5875                         fprintf(stderr, "bad block %llu\n",
5876                                 (unsigned long long)buf->start);
5877                 } else {
5878                         /*
5879                          * Signal to callers we need to start the scan over
5880                          * again since we'll have cowed blocks.
5881                          */
5882                         ret = -EAGAIN;
5883                 }
5884         } else {
5885                 rec->content_checked = 1;
5886                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5887                         rec->owner_ref_checked = 1;
5888                 else {
5889                         ret = check_owner_ref(root, rec, buf);
5890                         if (!ret)
5891                                 rec->owner_ref_checked = 1;
5892                 }
5893         }
5894         if (!ret)
5895                 maybe_free_extent_rec(extent_cache, rec);
5896         return ret;
5897 }
5898
5899 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5900                                                 u64 parent, u64 root)
5901 {
5902         struct list_head *cur = rec->backrefs.next;
5903         struct extent_backref *node;
5904         struct tree_backref *back;
5905
5906         while(cur != &rec->backrefs) {
5907                 node = to_extent_backref(cur);
5908                 cur = cur->next;
5909                 if (node->is_data)
5910                         continue;
5911                 back = to_tree_backref(node);
5912                 if (parent > 0) {
5913                         if (!node->full_backref)
5914                                 continue;
5915                         if (parent == back->parent)
5916                                 return back;
5917                 } else {
5918                         if (node->full_backref)
5919                                 continue;
5920                         if (back->root == root)
5921                                 return back;
5922                 }
5923         }
5924         return NULL;
5925 }
5926
5927 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5928                                                 u64 parent, u64 root)
5929 {
5930         struct tree_backref *ref = malloc(sizeof(*ref));
5931
5932         if (!ref)
5933                 return NULL;
5934         memset(&ref->node, 0, sizeof(ref->node));
5935         if (parent > 0) {
5936                 ref->parent = parent;
5937                 ref->node.full_backref = 1;
5938         } else {
5939                 ref->root = root;
5940                 ref->node.full_backref = 0;
5941         }
5942         list_add_tail(&ref->node.list, &rec->backrefs);
5943
5944         return ref;
5945 }
5946
5947 static struct data_backref *find_data_backref(struct extent_record *rec,
5948                                                 u64 parent, u64 root,
5949                                                 u64 owner, u64 offset,
5950                                                 int found_ref,
5951                                                 u64 disk_bytenr, u64 bytes)
5952 {
5953         struct list_head *cur = rec->backrefs.next;
5954         struct extent_backref *node;
5955         struct data_backref *back;
5956
5957         while(cur != &rec->backrefs) {
5958                 node = to_extent_backref(cur);
5959                 cur = cur->next;
5960                 if (!node->is_data)
5961                         continue;
5962                 back = to_data_backref(node);
5963                 if (parent > 0) {
5964                         if (!node->full_backref)
5965                                 continue;
5966                         if (parent == back->parent)
5967                                 return back;
5968                 } else {
5969                         if (node->full_backref)
5970                                 continue;
5971                         if (back->root == root && back->owner == owner &&
5972                             back->offset == offset) {
5973                                 if (found_ref && node->found_ref &&
5974                                     (back->bytes != bytes ||
5975                                     back->disk_bytenr != disk_bytenr))
5976                                         continue;
5977                                 return back;
5978                         }
5979                 }
5980         }
5981         return NULL;
5982 }
5983
5984 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5985                                                 u64 parent, u64 root,
5986                                                 u64 owner, u64 offset,
5987                                                 u64 max_size)
5988 {
5989         struct data_backref *ref = malloc(sizeof(*ref));
5990
5991         if (!ref)
5992                 return NULL;
5993         memset(&ref->node, 0, sizeof(ref->node));
5994         ref->node.is_data = 1;
5995
5996         if (parent > 0) {
5997                 ref->parent = parent;
5998                 ref->owner = 0;
5999                 ref->offset = 0;
6000                 ref->node.full_backref = 1;
6001         } else {
6002                 ref->root = root;
6003                 ref->owner = owner;
6004                 ref->offset = offset;
6005                 ref->node.full_backref = 0;
6006         }
6007         ref->bytes = max_size;
6008         ref->found_ref = 0;
6009         ref->num_refs = 0;
6010         list_add_tail(&ref->node.list, &rec->backrefs);
6011         if (max_size > rec->max_size)
6012                 rec->max_size = max_size;
6013         return ref;
6014 }
6015
6016 /* Check if the type of extent matches with its chunk */
6017 static void check_extent_type(struct extent_record *rec)
6018 {
6019         struct btrfs_block_group_cache *bg_cache;
6020
6021         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6022         if (!bg_cache)
6023                 return;
6024
6025         /* data extent, check chunk directly*/
6026         if (!rec->metadata) {
6027                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6028                         rec->wrong_chunk_type = 1;
6029                 return;
6030         }
6031
6032         /* metadata extent, check the obvious case first */
6033         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6034                                  BTRFS_BLOCK_GROUP_METADATA))) {
6035                 rec->wrong_chunk_type = 1;
6036                 return;
6037         }
6038
6039         /*
6040          * Check SYSTEM extent, as it's also marked as metadata, we can only
6041          * make sure it's a SYSTEM extent by its backref
6042          */
6043         if (!list_empty(&rec->backrefs)) {
6044                 struct extent_backref *node;
6045                 struct tree_backref *tback;
6046                 u64 bg_type;
6047
6048                 node = to_extent_backref(rec->backrefs.next);
6049                 if (node->is_data) {
6050                         /* tree block shouldn't have data backref */
6051                         rec->wrong_chunk_type = 1;
6052                         return;
6053                 }
6054                 tback = container_of(node, struct tree_backref, node);
6055
6056                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6057                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6058                 else
6059                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6060                 if (!(bg_cache->flags & bg_type))
6061                         rec->wrong_chunk_type = 1;
6062         }
6063 }
6064
6065 /*
6066  * Allocate a new extent record, fill default values from @tmpl and insert int
6067  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6068  * the cache, otherwise it fails.
6069  */
6070 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6071                 struct extent_record *tmpl)
6072 {
6073         struct extent_record *rec;
6074         int ret = 0;
6075
6076         BUG_ON(tmpl->max_size == 0);
6077         rec = malloc(sizeof(*rec));
6078         if (!rec)
6079                 return -ENOMEM;
6080         rec->start = tmpl->start;
6081         rec->max_size = tmpl->max_size;
6082         rec->nr = max(tmpl->nr, tmpl->max_size);
6083         rec->found_rec = tmpl->found_rec;
6084         rec->content_checked = tmpl->content_checked;
6085         rec->owner_ref_checked = tmpl->owner_ref_checked;
6086         rec->num_duplicates = 0;
6087         rec->metadata = tmpl->metadata;
6088         rec->flag_block_full_backref = FLAG_UNSET;
6089         rec->bad_full_backref = 0;
6090         rec->crossing_stripes = 0;
6091         rec->wrong_chunk_type = 0;
6092         rec->is_root = tmpl->is_root;
6093         rec->refs = tmpl->refs;
6094         rec->extent_item_refs = tmpl->extent_item_refs;
6095         rec->parent_generation = tmpl->parent_generation;
6096         INIT_LIST_HEAD(&rec->backrefs);
6097         INIT_LIST_HEAD(&rec->dups);
6098         INIT_LIST_HEAD(&rec->list);
6099         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6100         rec->cache.start = tmpl->start;
6101         rec->cache.size = tmpl->nr;
6102         ret = insert_cache_extent(extent_cache, &rec->cache);
6103         if (ret) {
6104                 free(rec);
6105                 return ret;
6106         }
6107         bytes_used += rec->nr;
6108
6109         if (tmpl->metadata)
6110                 rec->crossing_stripes = check_crossing_stripes(global_info,
6111                                 rec->start, global_info->nodesize);
6112         check_extent_type(rec);
6113         return ret;
6114 }
6115
6116 /*
6117  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6118  * some are hints:
6119  * - refs              - if found, increase refs
6120  * - is_root           - if found, set
6121  * - content_checked   - if found, set
6122  * - owner_ref_checked - if found, set
6123  *
6124  * If not found, create a new one, initialize and insert.
6125  */
6126 static int add_extent_rec(struct cache_tree *extent_cache,
6127                 struct extent_record *tmpl)
6128 {
6129         struct extent_record *rec;
6130         struct cache_extent *cache;
6131         int ret = 0;
6132         int dup = 0;
6133
6134         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6135         if (cache) {
6136                 rec = container_of(cache, struct extent_record, cache);
6137                 if (tmpl->refs)
6138                         rec->refs++;
6139                 if (rec->nr == 1)
6140                         rec->nr = max(tmpl->nr, tmpl->max_size);
6141
6142                 /*
6143                  * We need to make sure to reset nr to whatever the extent
6144                  * record says was the real size, this way we can compare it to
6145                  * the backrefs.
6146                  */
6147                 if (tmpl->found_rec) {
6148                         if (tmpl->start != rec->start || rec->found_rec) {
6149                                 struct extent_record *tmp;
6150
6151                                 dup = 1;
6152                                 if (list_empty(&rec->list))
6153                                         list_add_tail(&rec->list,
6154                                                       &duplicate_extents);
6155
6156                                 /*
6157                                  * We have to do this song and dance in case we
6158                                  * find an extent record that falls inside of
6159                                  * our current extent record but does not have
6160                                  * the same objectid.
6161                                  */
6162                                 tmp = malloc(sizeof(*tmp));
6163                                 if (!tmp)
6164                                         return -ENOMEM;
6165                                 tmp->start = tmpl->start;
6166                                 tmp->max_size = tmpl->max_size;
6167                                 tmp->nr = tmpl->nr;
6168                                 tmp->found_rec = 1;
6169                                 tmp->metadata = tmpl->metadata;
6170                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6171                                 INIT_LIST_HEAD(&tmp->list);
6172                                 list_add_tail(&tmp->list, &rec->dups);
6173                                 rec->num_duplicates++;
6174                         } else {
6175                                 rec->nr = tmpl->nr;
6176                                 rec->found_rec = 1;
6177                         }
6178                 }
6179
6180                 if (tmpl->extent_item_refs && !dup) {
6181                         if (rec->extent_item_refs) {
6182                                 fprintf(stderr, "block %llu rec "
6183                                         "extent_item_refs %llu, passed %llu\n",
6184                                         (unsigned long long)tmpl->start,
6185                                         (unsigned long long)
6186                                                         rec->extent_item_refs,
6187                                         (unsigned long long)tmpl->extent_item_refs);
6188                         }
6189                         rec->extent_item_refs = tmpl->extent_item_refs;
6190                 }
6191                 if (tmpl->is_root)
6192                         rec->is_root = 1;
6193                 if (tmpl->content_checked)
6194                         rec->content_checked = 1;
6195                 if (tmpl->owner_ref_checked)
6196                         rec->owner_ref_checked = 1;
6197                 memcpy(&rec->parent_key, &tmpl->parent_key,
6198                                 sizeof(tmpl->parent_key));
6199                 if (tmpl->parent_generation)
6200                         rec->parent_generation = tmpl->parent_generation;
6201                 if (rec->max_size < tmpl->max_size)
6202                         rec->max_size = tmpl->max_size;
6203
6204                 /*
6205                  * A metadata extent can't cross stripe_len boundary, otherwise
6206                  * kernel scrub won't be able to handle it.
6207                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6208                  * it.
6209                  */
6210                 if (tmpl->metadata)
6211                         rec->crossing_stripes = check_crossing_stripes(
6212                                         global_info, rec->start,
6213                                         global_info->nodesize);
6214                 check_extent_type(rec);
6215                 maybe_free_extent_rec(extent_cache, rec);
6216                 return ret;
6217         }
6218
6219         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6220
6221         return ret;
6222 }
6223
6224 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6225                             u64 parent, u64 root, int found_ref)
6226 {
6227         struct extent_record *rec;
6228         struct tree_backref *back;
6229         struct cache_extent *cache;
6230         int ret;
6231
6232         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6233         if (!cache) {
6234                 struct extent_record tmpl;
6235
6236                 memset(&tmpl, 0, sizeof(tmpl));
6237                 tmpl.start = bytenr;
6238                 tmpl.nr = 1;
6239                 tmpl.metadata = 1;
6240                 tmpl.max_size = 1;
6241
6242                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6243                 if (ret)
6244                         return ret;
6245
6246                 /* really a bug in cache_extent implement now */
6247                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6248                 if (!cache)
6249                         return -ENOENT;
6250         }
6251
6252         rec = container_of(cache, struct extent_record, cache);
6253         if (rec->start != bytenr) {
6254                 /*
6255                  * Several cause, from unaligned bytenr to over lapping extents
6256                  */
6257                 return -EEXIST;
6258         }
6259
6260         back = find_tree_backref(rec, parent, root);
6261         if (!back) {
6262                 back = alloc_tree_backref(rec, parent, root);
6263                 if (!back)
6264                         return -ENOMEM;
6265         }
6266
6267         if (found_ref) {
6268                 if (back->node.found_ref) {
6269                         fprintf(stderr, "Extent back ref already exists "
6270                                 "for %llu parent %llu root %llu \n",
6271                                 (unsigned long long)bytenr,
6272                                 (unsigned long long)parent,
6273                                 (unsigned long long)root);
6274                 }
6275                 back->node.found_ref = 1;
6276         } else {
6277                 if (back->node.found_extent_tree) {
6278                         fprintf(stderr, "Extent back ref already exists "
6279                                 "for %llu parent %llu root %llu \n",
6280                                 (unsigned long long)bytenr,
6281                                 (unsigned long long)parent,
6282                                 (unsigned long long)root);
6283                 }
6284                 back->node.found_extent_tree = 1;
6285         }
6286         check_extent_type(rec);
6287         maybe_free_extent_rec(extent_cache, rec);
6288         return 0;
6289 }
6290
6291 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6292                             u64 parent, u64 root, u64 owner, u64 offset,
6293                             u32 num_refs, int found_ref, u64 max_size)
6294 {
6295         struct extent_record *rec;
6296         struct data_backref *back;
6297         struct cache_extent *cache;
6298         int ret;
6299
6300         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6301         if (!cache) {
6302                 struct extent_record tmpl;
6303
6304                 memset(&tmpl, 0, sizeof(tmpl));
6305                 tmpl.start = bytenr;
6306                 tmpl.nr = 1;
6307                 tmpl.max_size = max_size;
6308
6309                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6310                 if (ret)
6311                         return ret;
6312
6313                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6314                 if (!cache)
6315                         abort();
6316         }
6317
6318         rec = container_of(cache, struct extent_record, cache);
6319         if (rec->max_size < max_size)
6320                 rec->max_size = max_size;
6321
6322         /*
6323          * If found_ref is set then max_size is the real size and must match the
6324          * existing refs.  So if we have already found a ref then we need to
6325          * make sure that this ref matches the existing one, otherwise we need
6326          * to add a new backref so we can notice that the backrefs don't match
6327          * and we need to figure out who is telling the truth.  This is to
6328          * account for that awful fsync bug I introduced where we'd end up with
6329          * a btrfs_file_extent_item that would have its length include multiple
6330          * prealloc extents or point inside of a prealloc extent.
6331          */
6332         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6333                                  bytenr, max_size);
6334         if (!back) {
6335                 back = alloc_data_backref(rec, parent, root, owner, offset,
6336                                           max_size);
6337                 BUG_ON(!back);
6338         }
6339
6340         if (found_ref) {
6341                 BUG_ON(num_refs != 1);
6342                 if (back->node.found_ref)
6343                         BUG_ON(back->bytes != max_size);
6344                 back->node.found_ref = 1;
6345                 back->found_ref += 1;
6346                 back->bytes = max_size;
6347                 back->disk_bytenr = bytenr;
6348                 rec->refs += 1;
6349                 rec->content_checked = 1;
6350                 rec->owner_ref_checked = 1;
6351         } else {
6352                 if (back->node.found_extent_tree) {
6353                         fprintf(stderr, "Extent back ref already exists "
6354                                 "for %llu parent %llu root %llu "
6355                                 "owner %llu offset %llu num_refs %lu\n",
6356                                 (unsigned long long)bytenr,
6357                                 (unsigned long long)parent,
6358                                 (unsigned long long)root,
6359                                 (unsigned long long)owner,
6360                                 (unsigned long long)offset,
6361                                 (unsigned long)num_refs);
6362                 }
6363                 back->num_refs = num_refs;
6364                 back->node.found_extent_tree = 1;
6365         }
6366         maybe_free_extent_rec(extent_cache, rec);
6367         return 0;
6368 }
6369
6370 static int add_pending(struct cache_tree *pending,
6371                        struct cache_tree *seen, u64 bytenr, u32 size)
6372 {
6373         int ret;
6374         ret = add_cache_extent(seen, bytenr, size);
6375         if (ret)
6376                 return ret;
6377         add_cache_extent(pending, bytenr, size);
6378         return 0;
6379 }
6380
6381 static int pick_next_pending(struct cache_tree *pending,
6382                         struct cache_tree *reada,
6383                         struct cache_tree *nodes,
6384                         u64 last, struct block_info *bits, int bits_nr,
6385                         int *reada_bits)
6386 {
6387         unsigned long node_start = last;
6388         struct cache_extent *cache;
6389         int ret;
6390
6391         cache = search_cache_extent(reada, 0);
6392         if (cache) {
6393                 bits[0].start = cache->start;
6394                 bits[0].size = cache->size;
6395                 *reada_bits = 1;
6396                 return 1;
6397         }
6398         *reada_bits = 0;
6399         if (node_start > 32768)
6400                 node_start -= 32768;
6401
6402         cache = search_cache_extent(nodes, node_start);
6403         if (!cache)
6404                 cache = search_cache_extent(nodes, 0);
6405
6406         if (!cache) {
6407                  cache = search_cache_extent(pending, 0);
6408                  if (!cache)
6409                          return 0;
6410                  ret = 0;
6411                  do {
6412                          bits[ret].start = cache->start;
6413                          bits[ret].size = cache->size;
6414                          cache = next_cache_extent(cache);
6415                          ret++;
6416                  } while (cache && ret < bits_nr);
6417                  return ret;
6418         }
6419
6420         ret = 0;
6421         do {
6422                 bits[ret].start = cache->start;
6423                 bits[ret].size = cache->size;
6424                 cache = next_cache_extent(cache);
6425                 ret++;
6426         } while (cache && ret < bits_nr);
6427
6428         if (bits_nr - ret > 8) {
6429                 u64 lookup = bits[0].start + bits[0].size;
6430                 struct cache_extent *next;
6431                 next = search_cache_extent(pending, lookup);
6432                 while(next) {
6433                         if (next->start - lookup > 32768)
6434                                 break;
6435                         bits[ret].start = next->start;
6436                         bits[ret].size = next->size;
6437                         lookup = next->start + next->size;
6438                         ret++;
6439                         if (ret == bits_nr)
6440                                 break;
6441                         next = next_cache_extent(next);
6442                         if (!next)
6443                                 break;
6444                 }
6445         }
6446         return ret;
6447 }
6448
6449 static void free_chunk_record(struct cache_extent *cache)
6450 {
6451         struct chunk_record *rec;
6452
6453         rec = container_of(cache, struct chunk_record, cache);
6454         list_del_init(&rec->list);
6455         list_del_init(&rec->dextents);
6456         free(rec);
6457 }
6458
6459 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6460 {
6461         cache_tree_free_extents(chunk_cache, free_chunk_record);
6462 }
6463
6464 static void free_device_record(struct rb_node *node)
6465 {
6466         struct device_record *rec;
6467
6468         rec = container_of(node, struct device_record, node);
6469         free(rec);
6470 }
6471
6472 FREE_RB_BASED_TREE(device_cache, free_device_record);
6473
6474 int insert_block_group_record(struct block_group_tree *tree,
6475                               struct block_group_record *bg_rec)
6476 {
6477         int ret;
6478
6479         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6480         if (ret)
6481                 return ret;
6482
6483         list_add_tail(&bg_rec->list, &tree->block_groups);
6484         return 0;
6485 }
6486
6487 static void free_block_group_record(struct cache_extent *cache)
6488 {
6489         struct block_group_record *rec;
6490
6491         rec = container_of(cache, struct block_group_record, cache);
6492         list_del_init(&rec->list);
6493         free(rec);
6494 }
6495
6496 void free_block_group_tree(struct block_group_tree *tree)
6497 {
6498         cache_tree_free_extents(&tree->tree, free_block_group_record);
6499 }
6500
6501 int insert_device_extent_record(struct device_extent_tree *tree,
6502                                 struct device_extent_record *de_rec)
6503 {
6504         int ret;
6505
6506         /*
6507          * Device extent is a bit different from the other extents, because
6508          * the extents which belong to the different devices may have the
6509          * same start and size, so we need use the special extent cache
6510          * search/insert functions.
6511          */
6512         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6513         if (ret)
6514                 return ret;
6515
6516         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6517         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6518         return 0;
6519 }
6520
6521 static void free_device_extent_record(struct cache_extent *cache)
6522 {
6523         struct device_extent_record *rec;
6524
6525         rec = container_of(cache, struct device_extent_record, cache);
6526         if (!list_empty(&rec->chunk_list))
6527                 list_del_init(&rec->chunk_list);
6528         if (!list_empty(&rec->device_list))
6529                 list_del_init(&rec->device_list);
6530         free(rec);
6531 }
6532
6533 void free_device_extent_tree(struct device_extent_tree *tree)
6534 {
6535         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6536 }
6537
6538 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6539 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6540                                  struct extent_buffer *leaf, int slot)
6541 {
6542         struct btrfs_extent_ref_v0 *ref0;
6543         struct btrfs_key key;
6544         int ret;
6545
6546         btrfs_item_key_to_cpu(leaf, &key, slot);
6547         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6548         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6549                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6550                                 0, 0);
6551         } else {
6552                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6553                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6554         }
6555         return ret;
6556 }
6557 #endif
6558
6559 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6560                                             struct btrfs_key *key,
6561                                             int slot)
6562 {
6563         struct btrfs_chunk *ptr;
6564         struct chunk_record *rec;
6565         int num_stripes, i;
6566
6567         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6568         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6569
6570         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6571         if (!rec) {
6572                 fprintf(stderr, "memory allocation failed\n");
6573                 exit(-1);
6574         }
6575
6576         INIT_LIST_HEAD(&rec->list);
6577         INIT_LIST_HEAD(&rec->dextents);
6578         rec->bg_rec = NULL;
6579
6580         rec->cache.start = key->offset;
6581         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6582
6583         rec->generation = btrfs_header_generation(leaf);
6584
6585         rec->objectid = key->objectid;
6586         rec->type = key->type;
6587         rec->offset = key->offset;
6588
6589         rec->length = rec->cache.size;
6590         rec->owner = btrfs_chunk_owner(leaf, ptr);
6591         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6592         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6593         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6594         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6595         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6596         rec->num_stripes = num_stripes;
6597         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6598
6599         for (i = 0; i < rec->num_stripes; ++i) {
6600                 rec->stripes[i].devid =
6601                         btrfs_stripe_devid_nr(leaf, ptr, i);
6602                 rec->stripes[i].offset =
6603                         btrfs_stripe_offset_nr(leaf, ptr, i);
6604                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6605                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6606                                 BTRFS_UUID_SIZE);
6607         }
6608
6609         return rec;
6610 }
6611
6612 static int process_chunk_item(struct cache_tree *chunk_cache,
6613                               struct btrfs_key *key, struct extent_buffer *eb,
6614                               int slot)
6615 {
6616         struct chunk_record *rec;
6617         struct btrfs_chunk *chunk;
6618         int ret = 0;
6619
6620         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6621         /*
6622          * Do extra check for this chunk item,
6623          *
6624          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6625          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6626          * and owner<->key_type check.
6627          */
6628         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6629                                       key->offset);
6630         if (ret < 0) {
6631                 error("chunk(%llu, %llu) is not valid, ignore it",
6632                       key->offset, btrfs_chunk_length(eb, chunk));
6633                 return 0;
6634         }
6635         rec = btrfs_new_chunk_record(eb, key, slot);
6636         ret = insert_cache_extent(chunk_cache, &rec->cache);
6637         if (ret) {
6638                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6639                         rec->offset, rec->length);
6640                 free(rec);
6641         }
6642
6643         return ret;
6644 }
6645
6646 static int process_device_item(struct rb_root *dev_cache,
6647                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6648 {
6649         struct btrfs_dev_item *ptr;
6650         struct device_record *rec;
6651         int ret = 0;
6652
6653         ptr = btrfs_item_ptr(eb,
6654                 slot, struct btrfs_dev_item);
6655
6656         rec = malloc(sizeof(*rec));
6657         if (!rec) {
6658                 fprintf(stderr, "memory allocation failed\n");
6659                 return -ENOMEM;
6660         }
6661
6662         rec->devid = key->offset;
6663         rec->generation = btrfs_header_generation(eb);
6664
6665         rec->objectid = key->objectid;
6666         rec->type = key->type;
6667         rec->offset = key->offset;
6668
6669         rec->devid = btrfs_device_id(eb, ptr);
6670         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6671         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6672
6673         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6674         if (ret) {
6675                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6676                 free(rec);
6677         }
6678
6679         return ret;
6680 }
6681
6682 struct block_group_record *
6683 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6684                              int slot)
6685 {
6686         struct btrfs_block_group_item *ptr;
6687         struct block_group_record *rec;
6688
6689         rec = calloc(1, sizeof(*rec));
6690         if (!rec) {
6691                 fprintf(stderr, "memory allocation failed\n");
6692                 exit(-1);
6693         }
6694
6695         rec->cache.start = key->objectid;
6696         rec->cache.size = key->offset;
6697
6698         rec->generation = btrfs_header_generation(leaf);
6699
6700         rec->objectid = key->objectid;
6701         rec->type = key->type;
6702         rec->offset = key->offset;
6703
6704         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6705         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6706
6707         INIT_LIST_HEAD(&rec->list);
6708
6709         return rec;
6710 }
6711
6712 static int process_block_group_item(struct block_group_tree *block_group_cache,
6713                                     struct btrfs_key *key,
6714                                     struct extent_buffer *eb, int slot)
6715 {
6716         struct block_group_record *rec;
6717         int ret = 0;
6718
6719         rec = btrfs_new_block_group_record(eb, key, slot);
6720         ret = insert_block_group_record(block_group_cache, rec);
6721         if (ret) {
6722                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6723                         rec->objectid, rec->offset);
6724                 free(rec);
6725         }
6726
6727         return ret;
6728 }
6729
6730 struct device_extent_record *
6731 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6732                                struct btrfs_key *key, int slot)
6733 {
6734         struct device_extent_record *rec;
6735         struct btrfs_dev_extent *ptr;
6736
6737         rec = calloc(1, sizeof(*rec));
6738         if (!rec) {
6739                 fprintf(stderr, "memory allocation failed\n");
6740                 exit(-1);
6741         }
6742
6743         rec->cache.objectid = key->objectid;
6744         rec->cache.start = key->offset;
6745
6746         rec->generation = btrfs_header_generation(leaf);
6747
6748         rec->objectid = key->objectid;
6749         rec->type = key->type;
6750         rec->offset = key->offset;
6751
6752         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6753         rec->chunk_objecteid =
6754                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6755         rec->chunk_offset =
6756                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6757         rec->length = btrfs_dev_extent_length(leaf, ptr);
6758         rec->cache.size = rec->length;
6759
6760         INIT_LIST_HEAD(&rec->chunk_list);
6761         INIT_LIST_HEAD(&rec->device_list);
6762
6763         return rec;
6764 }
6765
6766 static int
6767 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6768                            struct btrfs_key *key, struct extent_buffer *eb,
6769                            int slot)
6770 {
6771         struct device_extent_record *rec;
6772         int ret;
6773
6774         rec = btrfs_new_device_extent_record(eb, key, slot);
6775         ret = insert_device_extent_record(dev_extent_cache, rec);
6776         if (ret) {
6777                 fprintf(stderr,
6778                         "Device extent[%llu, %llu, %llu] existed.\n",
6779                         rec->objectid, rec->offset, rec->length);
6780                 free(rec);
6781         }
6782
6783         return ret;
6784 }
6785
6786 static int process_extent_item(struct btrfs_root *root,
6787                                struct cache_tree *extent_cache,
6788                                struct extent_buffer *eb, int slot)
6789 {
6790         struct btrfs_extent_item *ei;
6791         struct btrfs_extent_inline_ref *iref;
6792         struct btrfs_extent_data_ref *dref;
6793         struct btrfs_shared_data_ref *sref;
6794         struct btrfs_key key;
6795         struct extent_record tmpl;
6796         unsigned long end;
6797         unsigned long ptr;
6798         int ret;
6799         int type;
6800         u32 item_size = btrfs_item_size_nr(eb, slot);
6801         u64 refs = 0;
6802         u64 offset;
6803         u64 num_bytes;
6804         int metadata = 0;
6805
6806         btrfs_item_key_to_cpu(eb, &key, slot);
6807
6808         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6809                 metadata = 1;
6810                 num_bytes = root->fs_info->nodesize;
6811         } else {
6812                 num_bytes = key.offset;
6813         }
6814
6815         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6816                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6817                       key.objectid, root->fs_info->sectorsize);
6818                 return -EIO;
6819         }
6820         if (item_size < sizeof(*ei)) {
6821 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6822                 struct btrfs_extent_item_v0 *ei0;
6823                 BUG_ON(item_size != sizeof(*ei0));
6824                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6825                 refs = btrfs_extent_refs_v0(eb, ei0);
6826 #else
6827                 BUG();
6828 #endif
6829                 memset(&tmpl, 0, sizeof(tmpl));
6830                 tmpl.start = key.objectid;
6831                 tmpl.nr = num_bytes;
6832                 tmpl.extent_item_refs = refs;
6833                 tmpl.metadata = metadata;
6834                 tmpl.found_rec = 1;
6835                 tmpl.max_size = num_bytes;
6836
6837                 return add_extent_rec(extent_cache, &tmpl);
6838         }
6839
6840         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6841         refs = btrfs_extent_refs(eb, ei);
6842         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6843                 metadata = 1;
6844         else
6845                 metadata = 0;
6846         if (metadata && num_bytes != root->fs_info->nodesize) {
6847                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6848                       num_bytes, root->fs_info->nodesize);
6849                 return -EIO;
6850         }
6851         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6852                 error("ignore invalid data extent, length %llu is not aligned to %u",
6853                       num_bytes, root->fs_info->sectorsize);
6854                 return -EIO;
6855         }
6856
6857         memset(&tmpl, 0, sizeof(tmpl));
6858         tmpl.start = key.objectid;
6859         tmpl.nr = num_bytes;
6860         tmpl.extent_item_refs = refs;
6861         tmpl.metadata = metadata;
6862         tmpl.found_rec = 1;
6863         tmpl.max_size = num_bytes;
6864         add_extent_rec(extent_cache, &tmpl);
6865
6866         ptr = (unsigned long)(ei + 1);
6867         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6868             key.type == BTRFS_EXTENT_ITEM_KEY)
6869                 ptr += sizeof(struct btrfs_tree_block_info);
6870
6871         end = (unsigned long)ei + item_size;
6872         while (ptr < end) {
6873                 iref = (struct btrfs_extent_inline_ref *)ptr;
6874                 type = btrfs_extent_inline_ref_type(eb, iref);
6875                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6876                 switch (type) {
6877                 case BTRFS_TREE_BLOCK_REF_KEY:
6878                         ret = add_tree_backref(extent_cache, key.objectid,
6879                                         0, offset, 0);
6880                         if (ret < 0)
6881                                 error(
6882                         "add_tree_backref failed (extent items tree block): %s",
6883                                       strerror(-ret));
6884                         break;
6885                 case BTRFS_SHARED_BLOCK_REF_KEY:
6886                         ret = add_tree_backref(extent_cache, key.objectid,
6887                                         offset, 0, 0);
6888                         if (ret < 0)
6889                                 error(
6890                         "add_tree_backref failed (extent items shared block): %s",
6891                                       strerror(-ret));
6892                         break;
6893                 case BTRFS_EXTENT_DATA_REF_KEY:
6894                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6895                         add_data_backref(extent_cache, key.objectid, 0,
6896                                         btrfs_extent_data_ref_root(eb, dref),
6897                                         btrfs_extent_data_ref_objectid(eb,
6898                                                                        dref),
6899                                         btrfs_extent_data_ref_offset(eb, dref),
6900                                         btrfs_extent_data_ref_count(eb, dref),
6901                                         0, num_bytes);
6902                         break;
6903                 case BTRFS_SHARED_DATA_REF_KEY:
6904                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6905                         add_data_backref(extent_cache, key.objectid, offset,
6906                                         0, 0, 0,
6907                                         btrfs_shared_data_ref_count(eb, sref),
6908                                         0, num_bytes);
6909                         break;
6910                 default:
6911                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6912                                 key.objectid, key.type, num_bytes);
6913                         goto out;
6914                 }
6915                 ptr += btrfs_extent_inline_ref_size(type);
6916         }
6917         WARN_ON(ptr > end);
6918 out:
6919         return 0;
6920 }
6921
6922 static int check_cache_range(struct btrfs_root *root,
6923                              struct btrfs_block_group_cache *cache,
6924                              u64 offset, u64 bytes)
6925 {
6926         struct btrfs_free_space *entry;
6927         u64 *logical;
6928         u64 bytenr;
6929         int stripe_len;
6930         int i, nr, ret;
6931
6932         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6933                 bytenr = btrfs_sb_offset(i);
6934                 ret = btrfs_rmap_block(root->fs_info,
6935                                        cache->key.objectid, bytenr, 0,
6936                                        &logical, &nr, &stripe_len);
6937                 if (ret)
6938                         return ret;
6939
6940                 while (nr--) {
6941                         if (logical[nr] + stripe_len <= offset)
6942                                 continue;
6943                         if (offset + bytes <= logical[nr])
6944                                 continue;
6945                         if (logical[nr] == offset) {
6946                                 if (stripe_len >= bytes) {
6947                                         free(logical);
6948                                         return 0;
6949                                 }
6950                                 bytes -= stripe_len;
6951                                 offset += stripe_len;
6952                         } else if (logical[nr] < offset) {
6953                                 if (logical[nr] + stripe_len >=
6954                                     offset + bytes) {
6955                                         free(logical);
6956                                         return 0;
6957                                 }
6958                                 bytes = (offset + bytes) -
6959                                         (logical[nr] + stripe_len);
6960                                 offset = logical[nr] + stripe_len;
6961                         } else {
6962                                 /*
6963                                  * Could be tricky, the super may land in the
6964                                  * middle of the area we're checking.  First
6965                                  * check the easiest case, it's at the end.
6966                                  */
6967                                 if (logical[nr] + stripe_len >=
6968                                     bytes + offset) {
6969                                         bytes = logical[nr] - offset;
6970                                         continue;
6971                                 }
6972
6973                                 /* Check the left side */
6974                                 ret = check_cache_range(root, cache,
6975                                                         offset,
6976                                                         logical[nr] - offset);
6977                                 if (ret) {
6978                                         free(logical);
6979                                         return ret;
6980                                 }
6981
6982                                 /* Now we continue with the right side */
6983                                 bytes = (offset + bytes) -
6984                                         (logical[nr] + stripe_len);
6985                                 offset = logical[nr] + stripe_len;
6986                         }
6987                 }
6988
6989                 free(logical);
6990         }
6991
6992         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6993         if (!entry) {
6994                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6995                         offset, offset+bytes);
6996                 return -EINVAL;
6997         }
6998
6999         if (entry->offset != offset) {
7000                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7001                         entry->offset);
7002                 return -EINVAL;
7003         }
7004
7005         if (entry->bytes != bytes) {
7006                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7007                         bytes, entry->bytes, offset);
7008                 return -EINVAL;
7009         }
7010
7011         unlink_free_space(cache->free_space_ctl, entry);
7012         free(entry);
7013         return 0;
7014 }
7015
7016 static int verify_space_cache(struct btrfs_root *root,
7017                               struct btrfs_block_group_cache *cache)
7018 {
7019         struct btrfs_path path;
7020         struct extent_buffer *leaf;
7021         struct btrfs_key key;
7022         u64 last;
7023         int ret = 0;
7024
7025         root = root->fs_info->extent_root;
7026
7027         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7028
7029         btrfs_init_path(&path);
7030         key.objectid = last;
7031         key.offset = 0;
7032         key.type = BTRFS_EXTENT_ITEM_KEY;
7033         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7034         if (ret < 0)
7035                 goto out;
7036         ret = 0;
7037         while (1) {
7038                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7039                         ret = btrfs_next_leaf(root, &path);
7040                         if (ret < 0)
7041                                 goto out;
7042                         if (ret > 0) {
7043                                 ret = 0;
7044                                 break;
7045                         }
7046                 }
7047                 leaf = path.nodes[0];
7048                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7049                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7050                         break;
7051                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7052                     key.type != BTRFS_METADATA_ITEM_KEY) {
7053                         path.slots[0]++;
7054                         continue;
7055                 }
7056
7057                 if (last == key.objectid) {
7058                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7059                                 last = key.objectid + key.offset;
7060                         else
7061                                 last = key.objectid + root->fs_info->nodesize;
7062                         path.slots[0]++;
7063                         continue;
7064                 }
7065
7066                 ret = check_cache_range(root, cache, last,
7067                                         key.objectid - last);
7068                 if (ret)
7069                         break;
7070                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7071                         last = key.objectid + key.offset;
7072                 else
7073                         last = key.objectid + root->fs_info->nodesize;
7074                 path.slots[0]++;
7075         }
7076
7077         if (last < cache->key.objectid + cache->key.offset)
7078                 ret = check_cache_range(root, cache, last,
7079                                         cache->key.objectid +
7080                                         cache->key.offset - last);
7081
7082 out:
7083         btrfs_release_path(&path);
7084
7085         if (!ret &&
7086             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7087                 fprintf(stderr, "There are still entries left in the space "
7088                         "cache\n");
7089                 ret = -EINVAL;
7090         }
7091
7092         return ret;
7093 }
7094
7095 static int check_space_cache(struct btrfs_root *root)
7096 {
7097         struct btrfs_block_group_cache *cache;
7098         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7099         int ret;
7100         int error = 0;
7101
7102         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7103             btrfs_super_generation(root->fs_info->super_copy) !=
7104             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7105                 printf("cache and super generation don't match, space cache "
7106                        "will be invalidated\n");
7107                 return 0;
7108         }
7109
7110         if (ctx.progress_enabled) {
7111                 ctx.tp = TASK_FREE_SPACE;
7112                 task_start(ctx.info);
7113         }
7114
7115         while (1) {
7116                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7117                 if (!cache)
7118                         break;
7119
7120                 start = cache->key.objectid + cache->key.offset;
7121                 if (!cache->free_space_ctl) {
7122                         if (btrfs_init_free_space_ctl(cache,
7123                                                 root->fs_info->sectorsize)) {
7124                                 ret = -ENOMEM;
7125                                 break;
7126                         }
7127                 } else {
7128                         btrfs_remove_free_space_cache(cache);
7129                 }
7130
7131                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7132                         ret = exclude_super_stripes(root, cache);
7133                         if (ret) {
7134                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7135                                         strerror(-ret));
7136                                 error++;
7137                                 continue;
7138                         }
7139                         ret = load_free_space_tree(root->fs_info, cache);
7140                         free_excluded_extents(root, cache);
7141                         if (ret < 0) {
7142                                 fprintf(stderr, "could not load free space tree: %s\n",
7143                                         strerror(-ret));
7144                                 error++;
7145                                 continue;
7146                         }
7147                         error += ret;
7148                 } else {
7149                         ret = load_free_space_cache(root->fs_info, cache);
7150                         if (!ret)
7151                                 continue;
7152                 }
7153
7154                 ret = verify_space_cache(root, cache);
7155                 if (ret) {
7156                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7157                                 cache->key.objectid);
7158                         error++;
7159                 }
7160         }
7161
7162         task_stop(ctx.info);
7163
7164         return error ? -EINVAL : 0;
7165 }
7166
7167 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7168                         u64 num_bytes, unsigned long leaf_offset,
7169                         struct extent_buffer *eb) {
7170
7171         struct btrfs_fs_info *fs_info = root->fs_info;
7172         u64 offset = 0;
7173         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7174         char *data;
7175         unsigned long csum_offset;
7176         u32 csum;
7177         u32 csum_expected;
7178         u64 read_len;
7179         u64 data_checked = 0;
7180         u64 tmp;
7181         int ret = 0;
7182         int mirror;
7183         int num_copies;
7184
7185         if (num_bytes % fs_info->sectorsize)
7186                 return -EINVAL;
7187
7188         data = malloc(num_bytes);
7189         if (!data)
7190                 return -ENOMEM;
7191
7192         while (offset < num_bytes) {
7193                 mirror = 0;
7194 again:
7195                 read_len = num_bytes - offset;
7196                 /* read as much space once a time */
7197                 ret = read_extent_data(fs_info, data + offset,
7198                                 bytenr + offset, &read_len, mirror);
7199                 if (ret)
7200                         goto out;
7201                 data_checked = 0;
7202                 /* verify every 4k data's checksum */
7203                 while (data_checked < read_len) {
7204                         csum = ~(u32)0;
7205                         tmp = offset + data_checked;
7206
7207                         csum = btrfs_csum_data((char *)data + tmp,
7208                                                csum, fs_info->sectorsize);
7209                         btrfs_csum_final(csum, (u8 *)&csum);
7210
7211                         csum_offset = leaf_offset +
7212                                  tmp / fs_info->sectorsize * csum_size;
7213                         read_extent_buffer(eb, (char *)&csum_expected,
7214                                            csum_offset, csum_size);
7215                         /* try another mirror */
7216                         if (csum != csum_expected) {
7217                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7218                                                 mirror, bytenr + tmp,
7219                                                 csum, csum_expected);
7220                                 num_copies = btrfs_num_copies(root->fs_info,
7221                                                 bytenr, num_bytes);
7222                                 if (mirror < num_copies - 1) {
7223                                         mirror += 1;
7224                                         goto again;
7225                                 }
7226                         }
7227                         data_checked += fs_info->sectorsize;
7228                 }
7229                 offset += read_len;
7230         }
7231 out:
7232         free(data);
7233         return ret;
7234 }
7235
7236 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7237                                u64 num_bytes)
7238 {
7239         struct btrfs_path path;
7240         struct extent_buffer *leaf;
7241         struct btrfs_key key;
7242         int ret;
7243
7244         btrfs_init_path(&path);
7245         key.objectid = bytenr;
7246         key.type = BTRFS_EXTENT_ITEM_KEY;
7247         key.offset = (u64)-1;
7248
7249 again:
7250         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7251                                 0, 0);
7252         if (ret < 0) {
7253                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7254                 btrfs_release_path(&path);
7255                 return ret;
7256         } else if (ret) {
7257                 if (path.slots[0] > 0) {
7258                         path.slots[0]--;
7259                 } else {
7260                         ret = btrfs_prev_leaf(root, &path);
7261                         if (ret < 0) {
7262                                 goto out;
7263                         } else if (ret > 0) {
7264                                 ret = 0;
7265                                 goto out;
7266                         }
7267                 }
7268         }
7269
7270         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7271
7272         /*
7273          * Block group items come before extent items if they have the same
7274          * bytenr, so walk back one more just in case.  Dear future traveller,
7275          * first congrats on mastering time travel.  Now if it's not too much
7276          * trouble could you go back to 2006 and tell Chris to make the
7277          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7278          * EXTENT_ITEM_KEY please?
7279          */
7280         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7281                 if (path.slots[0] > 0) {
7282                         path.slots[0]--;
7283                 } else {
7284                         ret = btrfs_prev_leaf(root, &path);
7285                         if (ret < 0) {
7286                                 goto out;
7287                         } else if (ret > 0) {
7288                                 ret = 0;
7289                                 goto out;
7290                         }
7291                 }
7292                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7293         }
7294
7295         while (num_bytes) {
7296                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7297                         ret = btrfs_next_leaf(root, &path);
7298                         if (ret < 0) {
7299                                 fprintf(stderr, "Error going to next leaf "
7300                                         "%d\n", ret);
7301                                 btrfs_release_path(&path);
7302                                 return ret;
7303                         } else if (ret) {
7304                                 break;
7305                         }
7306                 }
7307                 leaf = path.nodes[0];
7308                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7309                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7310                         path.slots[0]++;
7311                         continue;
7312                 }
7313                 if (key.objectid + key.offset < bytenr) {
7314                         path.slots[0]++;
7315                         continue;
7316                 }
7317                 if (key.objectid > bytenr + num_bytes)
7318                         break;
7319
7320                 if (key.objectid == bytenr) {
7321                         if (key.offset >= num_bytes) {
7322                                 num_bytes = 0;
7323                                 break;
7324                         }
7325                         num_bytes -= key.offset;
7326                         bytenr += key.offset;
7327                 } else if (key.objectid < bytenr) {
7328                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7329                                 num_bytes = 0;
7330                                 break;
7331                         }
7332                         num_bytes = (bytenr + num_bytes) -
7333                                 (key.objectid + key.offset);
7334                         bytenr = key.objectid + key.offset;
7335                 } else {
7336                         if (key.objectid + key.offset < bytenr + num_bytes) {
7337                                 u64 new_start = key.objectid + key.offset;
7338                                 u64 new_bytes = bytenr + num_bytes - new_start;
7339
7340                                 /*
7341                                  * Weird case, the extent is in the middle of
7342                                  * our range, we'll have to search one side
7343                                  * and then the other.  Not sure if this happens
7344                                  * in real life, but no harm in coding it up
7345                                  * anyway just in case.
7346                                  */
7347                                 btrfs_release_path(&path);
7348                                 ret = check_extent_exists(root, new_start,
7349                                                           new_bytes);
7350                                 if (ret) {
7351                                         fprintf(stderr, "Right section didn't "
7352                                                 "have a record\n");
7353                                         break;
7354                                 }
7355                                 num_bytes = key.objectid - bytenr;
7356                                 goto again;
7357                         }
7358                         num_bytes = key.objectid - bytenr;
7359                 }
7360                 path.slots[0]++;
7361         }
7362         ret = 0;
7363
7364 out:
7365         if (num_bytes && !ret) {
7366                 fprintf(stderr, "There are no extents for csum range "
7367                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7368                 ret = 1;
7369         }
7370
7371         btrfs_release_path(&path);
7372         return ret;
7373 }
7374
7375 static int check_csums(struct btrfs_root *root)
7376 {
7377         struct btrfs_path path;
7378         struct extent_buffer *leaf;
7379         struct btrfs_key key;
7380         u64 offset = 0, num_bytes = 0;
7381         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7382         int errors = 0;
7383         int ret;
7384         u64 data_len;
7385         unsigned long leaf_offset;
7386
7387         root = root->fs_info->csum_root;
7388         if (!extent_buffer_uptodate(root->node)) {
7389                 fprintf(stderr, "No valid csum tree found\n");
7390                 return -ENOENT;
7391         }
7392
7393         btrfs_init_path(&path);
7394         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7395         key.type = BTRFS_EXTENT_CSUM_KEY;
7396         key.offset = 0;
7397         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7398         if (ret < 0) {
7399                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7400                 btrfs_release_path(&path);
7401                 return ret;
7402         }
7403
7404         if (ret > 0 && path.slots[0])
7405                 path.slots[0]--;
7406         ret = 0;
7407
7408         while (1) {
7409                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7410                         ret = btrfs_next_leaf(root, &path);
7411                         if (ret < 0) {
7412                                 fprintf(stderr, "Error going to next leaf "
7413                                         "%d\n", ret);
7414                                 break;
7415                         }
7416                         if (ret)
7417                                 break;
7418                 }
7419                 leaf = path.nodes[0];
7420
7421                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7422                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7423                         path.slots[0]++;
7424                         continue;
7425                 }
7426
7427                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7428                               csum_size) * root->fs_info->sectorsize;
7429                 if (!check_data_csum)
7430                         goto skip_csum_check;
7431                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7432                 ret = check_extent_csums(root, key.offset, data_len,
7433                                          leaf_offset, leaf);
7434                 if (ret)
7435                         break;
7436 skip_csum_check:
7437                 if (!num_bytes) {
7438                         offset = key.offset;
7439                 } else if (key.offset != offset + num_bytes) {
7440                         ret = check_extent_exists(root, offset, num_bytes);
7441                         if (ret) {
7442                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7443                                         "there is no extent record\n",
7444                                         offset, offset+num_bytes);
7445                                 errors++;
7446                         }
7447                         offset = key.offset;
7448                         num_bytes = 0;
7449                 }
7450                 num_bytes += data_len;
7451                 path.slots[0]++;
7452         }
7453
7454         btrfs_release_path(&path);
7455         return errors;
7456 }
7457
7458 static int is_dropped_key(struct btrfs_key *key,
7459                           struct btrfs_key *drop_key) {
7460         if (key->objectid < drop_key->objectid)
7461                 return 1;
7462         else if (key->objectid == drop_key->objectid) {
7463                 if (key->type < drop_key->type)
7464                         return 1;
7465                 else if (key->type == drop_key->type) {
7466                         if (key->offset < drop_key->offset)
7467                                 return 1;
7468                 }
7469         }
7470         return 0;
7471 }
7472
7473 /*
7474  * Here are the rules for FULL_BACKREF.
7475  *
7476  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7477  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7478  *      FULL_BACKREF set.
7479  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7480  *    if it happened after the relocation occurred since we'll have dropped the
7481  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7482  *    have no real way to know for sure.
7483  *
7484  * We process the blocks one root at a time, and we start from the lowest root
7485  * objectid and go to the highest.  So we can just lookup the owner backref for
7486  * the record and if we don't find it then we know it doesn't exist and we have
7487  * a FULL BACKREF.
7488  *
7489  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7490  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7491  * be set or not and then we can check later once we've gathered all the refs.
7492  */
7493 static int calc_extent_flag(struct cache_tree *extent_cache,
7494                            struct extent_buffer *buf,
7495                            struct root_item_record *ri,
7496                            u64 *flags)
7497 {
7498         struct extent_record *rec;
7499         struct cache_extent *cache;
7500         struct tree_backref *tback;
7501         u64 owner = 0;
7502
7503         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7504         /* we have added this extent before */
7505         if (!cache)
7506                 return -ENOENT;
7507
7508         rec = container_of(cache, struct extent_record, cache);
7509
7510         /*
7511          * Except file/reloc tree, we can not have
7512          * FULL BACKREF MODE
7513          */
7514         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7515                 goto normal;
7516         /*
7517          * root node
7518          */
7519         if (buf->start == ri->bytenr)
7520                 goto normal;
7521
7522         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7523                 goto full_backref;
7524
7525         owner = btrfs_header_owner(buf);
7526         if (owner == ri->objectid)
7527                 goto normal;
7528
7529         tback = find_tree_backref(rec, 0, owner);
7530         if (!tback)
7531                 goto full_backref;
7532 normal:
7533         *flags = 0;
7534         if (rec->flag_block_full_backref != FLAG_UNSET &&
7535             rec->flag_block_full_backref != 0)
7536                 rec->bad_full_backref = 1;
7537         return 0;
7538 full_backref:
7539         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7540         if (rec->flag_block_full_backref != FLAG_UNSET &&
7541             rec->flag_block_full_backref != 1)
7542                 rec->bad_full_backref = 1;
7543         return 0;
7544 }
7545
7546 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7547 {
7548         fprintf(stderr, "Invalid key type(");
7549         print_key_type(stderr, 0, key_type);
7550         fprintf(stderr, ") found in root(");
7551         print_objectid(stderr, rootid, 0);
7552         fprintf(stderr, ")\n");
7553 }
7554
7555 /*
7556  * Check if the key is valid with its extent buffer.
7557  *
7558  * This is a early check in case invalid key exists in a extent buffer
7559  * This is not comprehensive yet, but should prevent wrong key/item passed
7560  * further
7561  */
7562 static int check_type_with_root(u64 rootid, u8 key_type)
7563 {
7564         switch (key_type) {
7565         /* Only valid in chunk tree */
7566         case BTRFS_DEV_ITEM_KEY:
7567         case BTRFS_CHUNK_ITEM_KEY:
7568                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7569                         goto err;
7570                 break;
7571         /* valid in csum and log tree */
7572         case BTRFS_CSUM_TREE_OBJECTID:
7573                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7574                       is_fstree(rootid)))
7575                         goto err;
7576                 break;
7577         case BTRFS_EXTENT_ITEM_KEY:
7578         case BTRFS_METADATA_ITEM_KEY:
7579         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7580                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7581                         goto err;
7582                 break;
7583         case BTRFS_ROOT_ITEM_KEY:
7584                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7585                         goto err;
7586                 break;
7587         case BTRFS_DEV_EXTENT_KEY:
7588                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7589                         goto err;
7590                 break;
7591         }
7592         return 0;
7593 err:
7594         report_mismatch_key_root(key_type, rootid);
7595         return -EINVAL;
7596 }
7597
7598 static int run_next_block(struct btrfs_root *root,
7599                           struct block_info *bits,
7600                           int bits_nr,
7601                           u64 *last,
7602                           struct cache_tree *pending,
7603                           struct cache_tree *seen,
7604                           struct cache_tree *reada,
7605                           struct cache_tree *nodes,
7606                           struct cache_tree *extent_cache,
7607                           struct cache_tree *chunk_cache,
7608                           struct rb_root *dev_cache,
7609                           struct block_group_tree *block_group_cache,
7610                           struct device_extent_tree *dev_extent_cache,
7611                           struct root_item_record *ri)
7612 {
7613         struct extent_buffer *buf;
7614         struct extent_record *rec = NULL;
7615         u64 bytenr;
7616         u32 size;
7617         u64 parent;
7618         u64 owner;
7619         u64 flags;
7620         u64 ptr;
7621         u64 gen = 0;
7622         int ret = 0;
7623         int i;
7624         int nritems;
7625         struct btrfs_key key;
7626         struct cache_extent *cache;
7627         int reada_bits;
7628
7629         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7630                                     bits_nr, &reada_bits);
7631         if (nritems == 0)
7632                 return 1;
7633
7634         if (!reada_bits) {
7635                 for(i = 0; i < nritems; i++) {
7636                         ret = add_cache_extent(reada, bits[i].start,
7637                                                bits[i].size);
7638                         if (ret == -EEXIST)
7639                                 continue;
7640
7641                         /* fixme, get the parent transid */
7642                         readahead_tree_block(root, bits[i].start,
7643                                              bits[i].size, 0);
7644                 }
7645         }
7646         *last = bits[0].start;
7647         bytenr = bits[0].start;
7648         size = bits[0].size;
7649
7650         cache = lookup_cache_extent(pending, bytenr, size);
7651         if (cache) {
7652                 remove_cache_extent(pending, cache);
7653                 free(cache);
7654         }
7655         cache = lookup_cache_extent(reada, bytenr, size);
7656         if (cache) {
7657                 remove_cache_extent(reada, cache);
7658                 free(cache);
7659         }
7660         cache = lookup_cache_extent(nodes, bytenr, size);
7661         if (cache) {
7662                 remove_cache_extent(nodes, cache);
7663                 free(cache);
7664         }
7665         cache = lookup_cache_extent(extent_cache, bytenr, size);
7666         if (cache) {
7667                 rec = container_of(cache, struct extent_record, cache);
7668                 gen = rec->parent_generation;
7669         }
7670
7671         /* fixme, get the real parent transid */
7672         buf = read_tree_block(root->fs_info, bytenr, size, gen);
7673         if (!extent_buffer_uptodate(buf)) {
7674                 record_bad_block_io(root->fs_info,
7675                                     extent_cache, bytenr, size);
7676                 goto out;
7677         }
7678
7679         nritems = btrfs_header_nritems(buf);
7680
7681         flags = 0;
7682         if (!init_extent_tree) {
7683                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7684                                        btrfs_header_level(buf), 1, NULL,
7685                                        &flags);
7686                 if (ret < 0) {
7687                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7688                         if (ret < 0) {
7689                                 fprintf(stderr, "Couldn't calc extent flags\n");
7690                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7691                         }
7692                 }
7693         } else {
7694                 flags = 0;
7695                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7696                 if (ret < 0) {
7697                         fprintf(stderr, "Couldn't calc extent flags\n");
7698                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7699                 }
7700         }
7701
7702         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7703                 if (ri != NULL &&
7704                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7705                     ri->objectid == btrfs_header_owner(buf)) {
7706                         /*
7707                          * Ok we got to this block from it's original owner and
7708                          * we have FULL_BACKREF set.  Relocation can leave
7709                          * converted blocks over so this is altogether possible,
7710                          * however it's not possible if the generation > the
7711                          * last snapshot, so check for this case.
7712                          */
7713                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7714                             btrfs_header_generation(buf) > ri->last_snapshot) {
7715                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7716                                 rec->bad_full_backref = 1;
7717                         }
7718                 }
7719         } else {
7720                 if (ri != NULL &&
7721                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7722                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7723                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7724                         rec->bad_full_backref = 1;
7725                 }
7726         }
7727
7728         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7729                 rec->flag_block_full_backref = 1;
7730                 parent = bytenr;
7731                 owner = 0;
7732         } else {
7733                 rec->flag_block_full_backref = 0;
7734                 parent = 0;
7735                 owner = btrfs_header_owner(buf);
7736         }
7737
7738         ret = check_block(root, extent_cache, buf, flags);
7739         if (ret)
7740                 goto out;
7741
7742         if (btrfs_is_leaf(buf)) {
7743                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7744                 for (i = 0; i < nritems; i++) {
7745                         struct btrfs_file_extent_item *fi;
7746                         btrfs_item_key_to_cpu(buf, &key, i);
7747                         /*
7748                          * Check key type against the leaf owner.
7749                          * Could filter quite a lot of early error if
7750                          * owner is correct
7751                          */
7752                         if (check_type_with_root(btrfs_header_owner(buf),
7753                                                  key.type)) {
7754                                 fprintf(stderr, "ignoring invalid key\n");
7755                                 continue;
7756                         }
7757                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7758                                 process_extent_item(root, extent_cache, buf,
7759                                                     i);
7760                                 continue;
7761                         }
7762                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7763                                 process_extent_item(root, extent_cache, buf,
7764                                                     i);
7765                                 continue;
7766                         }
7767                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7768                                 total_csum_bytes +=
7769                                         btrfs_item_size_nr(buf, i);
7770                                 continue;
7771                         }
7772                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7773                                 process_chunk_item(chunk_cache, &key, buf, i);
7774                                 continue;
7775                         }
7776                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7777                                 process_device_item(dev_cache, &key, buf, i);
7778                                 continue;
7779                         }
7780                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7781                                 process_block_group_item(block_group_cache,
7782                                         &key, buf, i);
7783                                 continue;
7784                         }
7785                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7786                                 process_device_extent_item(dev_extent_cache,
7787                                         &key, buf, i);
7788                                 continue;
7789
7790                         }
7791                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7792 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7793                                 process_extent_ref_v0(extent_cache, buf, i);
7794 #else
7795                                 BUG();
7796 #endif
7797                                 continue;
7798                         }
7799
7800                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7801                                 ret = add_tree_backref(extent_cache,
7802                                                 key.objectid, 0, key.offset, 0);
7803                                 if (ret < 0)
7804                                         error(
7805                                 "add_tree_backref failed (leaf tree block): %s",
7806                                               strerror(-ret));
7807                                 continue;
7808                         }
7809                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7810                                 ret = add_tree_backref(extent_cache,
7811                                                 key.objectid, key.offset, 0, 0);
7812                                 if (ret < 0)
7813                                         error(
7814                                 "add_tree_backref failed (leaf shared block): %s",
7815                                               strerror(-ret));
7816                                 continue;
7817                         }
7818                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7819                                 struct btrfs_extent_data_ref *ref;
7820                                 ref = btrfs_item_ptr(buf, i,
7821                                                 struct btrfs_extent_data_ref);
7822                                 add_data_backref(extent_cache,
7823                                         key.objectid, 0,
7824                                         btrfs_extent_data_ref_root(buf, ref),
7825                                         btrfs_extent_data_ref_objectid(buf,
7826                                                                        ref),
7827                                         btrfs_extent_data_ref_offset(buf, ref),
7828                                         btrfs_extent_data_ref_count(buf, ref),
7829                                         0, root->fs_info->sectorsize);
7830                                 continue;
7831                         }
7832                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7833                                 struct btrfs_shared_data_ref *ref;
7834                                 ref = btrfs_item_ptr(buf, i,
7835                                                 struct btrfs_shared_data_ref);
7836                                 add_data_backref(extent_cache,
7837                                         key.objectid, key.offset, 0, 0, 0,
7838                                         btrfs_shared_data_ref_count(buf, ref),
7839                                         0, root->fs_info->sectorsize);
7840                                 continue;
7841                         }
7842                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7843                                 struct bad_item *bad;
7844
7845                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7846                                         continue;
7847                                 if (!owner)
7848                                         continue;
7849                                 bad = malloc(sizeof(struct bad_item));
7850                                 if (!bad)
7851                                         continue;
7852                                 INIT_LIST_HEAD(&bad->list);
7853                                 memcpy(&bad->key, &key,
7854                                        sizeof(struct btrfs_key));
7855                                 bad->root_id = owner;
7856                                 list_add_tail(&bad->list, &delete_items);
7857                                 continue;
7858                         }
7859                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7860                                 continue;
7861                         fi = btrfs_item_ptr(buf, i,
7862                                             struct btrfs_file_extent_item);
7863                         if (btrfs_file_extent_type(buf, fi) ==
7864                             BTRFS_FILE_EXTENT_INLINE)
7865                                 continue;
7866                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7867                                 continue;
7868
7869                         data_bytes_allocated +=
7870                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7871                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7872                                 abort();
7873                         }
7874                         data_bytes_referenced +=
7875                                 btrfs_file_extent_num_bytes(buf, fi);
7876                         add_data_backref(extent_cache,
7877                                 btrfs_file_extent_disk_bytenr(buf, fi),
7878                                 parent, owner, key.objectid, key.offset -
7879                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7880                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7881                 }
7882         } else {
7883                 int level;
7884                 struct btrfs_key first_key;
7885
7886                 first_key.objectid = 0;
7887
7888                 if (nritems > 0)
7889                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7890                 level = btrfs_header_level(buf);
7891                 for (i = 0; i < nritems; i++) {
7892                         struct extent_record tmpl;
7893
7894                         ptr = btrfs_node_blockptr(buf, i);
7895                         size = root->fs_info->nodesize;
7896                         btrfs_node_key_to_cpu(buf, &key, i);
7897                         if (ri != NULL) {
7898                                 if ((level == ri->drop_level)
7899                                     && is_dropped_key(&key, &ri->drop_key)) {
7900                                         continue;
7901                                 }
7902                         }
7903
7904                         memset(&tmpl, 0, sizeof(tmpl));
7905                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7906                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7907                         tmpl.start = ptr;
7908                         tmpl.nr = size;
7909                         tmpl.refs = 1;
7910                         tmpl.metadata = 1;
7911                         tmpl.max_size = size;
7912                         ret = add_extent_rec(extent_cache, &tmpl);
7913                         if (ret < 0)
7914                                 goto out;
7915
7916                         ret = add_tree_backref(extent_cache, ptr, parent,
7917                                         owner, 1);
7918                         if (ret < 0) {
7919                                 error(
7920                                 "add_tree_backref failed (non-leaf block): %s",
7921                                       strerror(-ret));
7922                                 continue;
7923                         }
7924
7925                         if (level > 1) {
7926                                 add_pending(nodes, seen, ptr, size);
7927                         } else {
7928                                 add_pending(pending, seen, ptr, size);
7929                         }
7930                 }
7931                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7932                                       nritems) * sizeof(struct btrfs_key_ptr);
7933         }
7934         total_btree_bytes += buf->len;
7935         if (fs_root_objectid(btrfs_header_owner(buf)))
7936                 total_fs_tree_bytes += buf->len;
7937         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7938                 total_extent_tree_bytes += buf->len;
7939         if (!found_old_backref &&
7940             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7941             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7942             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7943                 found_old_backref = 1;
7944 out:
7945         free_extent_buffer(buf);
7946         return ret;
7947 }
7948
7949 static int add_root_to_pending(struct extent_buffer *buf,
7950                                struct cache_tree *extent_cache,
7951                                struct cache_tree *pending,
7952                                struct cache_tree *seen,
7953                                struct cache_tree *nodes,
7954                                u64 objectid)
7955 {
7956         struct extent_record tmpl;
7957         int ret;
7958
7959         if (btrfs_header_level(buf) > 0)
7960                 add_pending(nodes, seen, buf->start, buf->len);
7961         else
7962                 add_pending(pending, seen, buf->start, buf->len);
7963
7964         memset(&tmpl, 0, sizeof(tmpl));
7965         tmpl.start = buf->start;
7966         tmpl.nr = buf->len;
7967         tmpl.is_root = 1;
7968         tmpl.refs = 1;
7969         tmpl.metadata = 1;
7970         tmpl.max_size = buf->len;
7971         add_extent_rec(extent_cache, &tmpl);
7972
7973         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7974             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7975                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7976                                 0, 1);
7977         else
7978                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7979                                 1);
7980         return ret;
7981 }
7982
7983 /* as we fix the tree, we might be deleting blocks that
7984  * we're tracking for repair.  This hook makes sure we
7985  * remove any backrefs for blocks as we are fixing them.
7986  */
7987 static int free_extent_hook(struct btrfs_trans_handle *trans,
7988                             struct btrfs_root *root,
7989                             u64 bytenr, u64 num_bytes, u64 parent,
7990                             u64 root_objectid, u64 owner, u64 offset,
7991                             int refs_to_drop)
7992 {
7993         struct extent_record *rec;
7994         struct cache_extent *cache;
7995         int is_data;
7996         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7997
7998         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7999         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8000         if (!cache)
8001                 return 0;
8002
8003         rec = container_of(cache, struct extent_record, cache);
8004         if (is_data) {
8005                 struct data_backref *back;
8006                 back = find_data_backref(rec, parent, root_objectid, owner,
8007                                          offset, 1, bytenr, num_bytes);
8008                 if (!back)
8009                         goto out;
8010                 if (back->node.found_ref) {
8011                         back->found_ref -= refs_to_drop;
8012                         if (rec->refs)
8013                                 rec->refs -= refs_to_drop;
8014                 }
8015                 if (back->node.found_extent_tree) {
8016                         back->num_refs -= refs_to_drop;
8017                         if (rec->extent_item_refs)
8018                                 rec->extent_item_refs -= refs_to_drop;
8019                 }
8020                 if (back->found_ref == 0)
8021                         back->node.found_ref = 0;
8022                 if (back->num_refs == 0)
8023                         back->node.found_extent_tree = 0;
8024
8025                 if (!back->node.found_extent_tree && back->node.found_ref) {
8026                         list_del(&back->node.list);
8027                         free(back);
8028                 }
8029         } else {
8030                 struct tree_backref *back;
8031                 back = find_tree_backref(rec, parent, root_objectid);
8032                 if (!back)
8033                         goto out;
8034                 if (back->node.found_ref) {
8035                         if (rec->refs)
8036                                 rec->refs--;
8037                         back->node.found_ref = 0;
8038                 }
8039                 if (back->node.found_extent_tree) {
8040                         if (rec->extent_item_refs)
8041                                 rec->extent_item_refs--;
8042                         back->node.found_extent_tree = 0;
8043                 }
8044                 if (!back->node.found_extent_tree && back->node.found_ref) {
8045                         list_del(&back->node.list);
8046                         free(back);
8047                 }
8048         }
8049         maybe_free_extent_rec(extent_cache, rec);
8050 out:
8051         return 0;
8052 }
8053
8054 static int delete_extent_records(struct btrfs_trans_handle *trans,
8055                                  struct btrfs_root *root,
8056                                  struct btrfs_path *path,
8057                                  u64 bytenr)
8058 {
8059         struct btrfs_key key;
8060         struct btrfs_key found_key;
8061         struct extent_buffer *leaf;
8062         int ret;
8063         int slot;
8064
8065
8066         key.objectid = bytenr;
8067         key.type = (u8)-1;
8068         key.offset = (u64)-1;
8069
8070         while(1) {
8071                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8072                                         &key, path, 0, 1);
8073                 if (ret < 0)
8074                         break;
8075
8076                 if (ret > 0) {
8077                         ret = 0;
8078                         if (path->slots[0] == 0)
8079                                 break;
8080                         path->slots[0]--;
8081                 }
8082                 ret = 0;
8083
8084                 leaf = path->nodes[0];
8085                 slot = path->slots[0];
8086
8087                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8088                 if (found_key.objectid != bytenr)
8089                         break;
8090
8091                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8092                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8093                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8094                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8095                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8096                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8097                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8098                         btrfs_release_path(path);
8099                         if (found_key.type == 0) {
8100                                 if (found_key.offset == 0)
8101                                         break;
8102                                 key.offset = found_key.offset - 1;
8103                                 key.type = found_key.type;
8104                         }
8105                         key.type = found_key.type - 1;
8106                         key.offset = (u64)-1;
8107                         continue;
8108                 }
8109
8110                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8111                         found_key.objectid, found_key.type, found_key.offset);
8112
8113                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8114                 if (ret)
8115                         break;
8116                 btrfs_release_path(path);
8117
8118                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8119                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8120                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8121                                 found_key.offset : root->fs_info->nodesize;
8122
8123                         ret = btrfs_update_block_group(trans, root, bytenr,
8124                                                        bytes, 0, 0);
8125                         if (ret)
8126                                 break;
8127                 }
8128         }
8129
8130         btrfs_release_path(path);
8131         return ret;
8132 }
8133
8134 /*
8135  * for a single backref, this will allocate a new extent
8136  * and add the backref to it.
8137  */
8138 static int record_extent(struct btrfs_trans_handle *trans,
8139                          struct btrfs_fs_info *info,
8140                          struct btrfs_path *path,
8141                          struct extent_record *rec,
8142                          struct extent_backref *back,
8143                          int allocated, u64 flags)
8144 {
8145         int ret = 0;
8146         struct btrfs_root *extent_root = info->extent_root;
8147         struct extent_buffer *leaf;
8148         struct btrfs_key ins_key;
8149         struct btrfs_extent_item *ei;
8150         struct data_backref *dback;
8151         struct btrfs_tree_block_info *bi;
8152
8153         if (!back->is_data)
8154                 rec->max_size = max_t(u64, rec->max_size,
8155                                     info->nodesize);
8156
8157         if (!allocated) {
8158                 u32 item_size = sizeof(*ei);
8159
8160                 if (!back->is_data)
8161                         item_size += sizeof(*bi);
8162
8163                 ins_key.objectid = rec->start;
8164                 ins_key.offset = rec->max_size;
8165                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8166
8167                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8168                                         &ins_key, item_size);
8169                 if (ret)
8170                         goto fail;
8171
8172                 leaf = path->nodes[0];
8173                 ei = btrfs_item_ptr(leaf, path->slots[0],
8174                                     struct btrfs_extent_item);
8175
8176                 btrfs_set_extent_refs(leaf, ei, 0);
8177                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8178
8179                 if (back->is_data) {
8180                         btrfs_set_extent_flags(leaf, ei,
8181                                                BTRFS_EXTENT_FLAG_DATA);
8182                 } else {
8183                         struct btrfs_disk_key copy_key;;
8184
8185                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8186                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8187                                              sizeof(*bi));
8188
8189                         btrfs_set_disk_key_objectid(&copy_key,
8190                                                     rec->info_objectid);
8191                         btrfs_set_disk_key_type(&copy_key, 0);
8192                         btrfs_set_disk_key_offset(&copy_key, 0);
8193
8194                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8195                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8196
8197                         btrfs_set_extent_flags(leaf, ei,
8198                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8199                 }
8200
8201                 btrfs_mark_buffer_dirty(leaf);
8202                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8203                                                rec->max_size, 1, 0);
8204                 if (ret)
8205                         goto fail;
8206                 btrfs_release_path(path);
8207         }
8208
8209         if (back->is_data) {
8210                 u64 parent;
8211                 int i;
8212
8213                 dback = to_data_backref(back);
8214                 if (back->full_backref)
8215                         parent = dback->parent;
8216                 else
8217                         parent = 0;
8218
8219                 for (i = 0; i < dback->found_ref; i++) {
8220                         /* if parent != 0, we're doing a full backref
8221                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8222                          * just makes the backref allocator create a data
8223                          * backref
8224                          */
8225                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8226                                                    rec->start, rec->max_size,
8227                                                    parent,
8228                                                    dback->root,
8229                                                    parent ?
8230                                                    BTRFS_FIRST_FREE_OBJECTID :
8231                                                    dback->owner,
8232                                                    dback->offset);
8233                         if (ret)
8234                                 break;
8235                 }
8236                 fprintf(stderr, "adding new data backref"
8237                                 " on %llu %s %llu owner %llu"
8238                                 " offset %llu found %d\n",
8239                                 (unsigned long long)rec->start,
8240                                 back->full_backref ?
8241                                 "parent" : "root",
8242                                 back->full_backref ?
8243                                 (unsigned long long)parent :
8244                                 (unsigned long long)dback->root,
8245                                 (unsigned long long)dback->owner,
8246                                 (unsigned long long)dback->offset,
8247                                 dback->found_ref);
8248         } else {
8249                 u64 parent;
8250                 struct tree_backref *tback;
8251
8252                 tback = to_tree_backref(back);
8253                 if (back->full_backref)
8254                         parent = tback->parent;
8255                 else
8256                         parent = 0;
8257
8258                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8259                                            rec->start, rec->max_size,
8260                                            parent, tback->root, 0, 0);
8261                 fprintf(stderr, "adding new tree backref on "
8262                         "start %llu len %llu parent %llu root %llu\n",
8263                         rec->start, rec->max_size, parent, tback->root);
8264         }
8265 fail:
8266         btrfs_release_path(path);
8267         return ret;
8268 }
8269
8270 static struct extent_entry *find_entry(struct list_head *entries,
8271                                        u64 bytenr, u64 bytes)
8272 {
8273         struct extent_entry *entry = NULL;
8274
8275         list_for_each_entry(entry, entries, list) {
8276                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8277                         return entry;
8278         }
8279
8280         return NULL;
8281 }
8282
8283 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8284 {
8285         struct extent_entry *entry, *best = NULL, *prev = NULL;
8286
8287         list_for_each_entry(entry, entries, list) {
8288                 /*
8289                  * If there are as many broken entries as entries then we know
8290                  * not to trust this particular entry.
8291                  */
8292                 if (entry->broken == entry->count)
8293                         continue;
8294
8295                 /*
8296                  * Special case, when there are only two entries and 'best' is
8297                  * the first one
8298                  */
8299                 if (!prev) {
8300                         best = entry;
8301                         prev = entry;
8302                         continue;
8303                 }
8304
8305                 /*
8306                  * If our current entry == best then we can't be sure our best
8307                  * is really the best, so we need to keep searching.
8308                  */
8309                 if (best && best->count == entry->count) {
8310                         prev = entry;
8311                         best = NULL;
8312                         continue;
8313                 }
8314
8315                 /* Prev == entry, not good enough, have to keep searching */
8316                 if (!prev->broken && prev->count == entry->count)
8317                         continue;
8318
8319                 if (!best)
8320                         best = (prev->count > entry->count) ? prev : entry;
8321                 else if (best->count < entry->count)
8322                         best = entry;
8323                 prev = entry;
8324         }
8325
8326         return best;
8327 }
8328
8329 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8330                       struct data_backref *dback, struct extent_entry *entry)
8331 {
8332         struct btrfs_trans_handle *trans;
8333         struct btrfs_root *root;
8334         struct btrfs_file_extent_item *fi;
8335         struct extent_buffer *leaf;
8336         struct btrfs_key key;
8337         u64 bytenr, bytes;
8338         int ret, err;
8339
8340         key.objectid = dback->root;
8341         key.type = BTRFS_ROOT_ITEM_KEY;
8342         key.offset = (u64)-1;
8343         root = btrfs_read_fs_root(info, &key);
8344         if (IS_ERR(root)) {
8345                 fprintf(stderr, "Couldn't find root for our ref\n");
8346                 return -EINVAL;
8347         }
8348
8349         /*
8350          * The backref points to the original offset of the extent if it was
8351          * split, so we need to search down to the offset we have and then walk
8352          * forward until we find the backref we're looking for.
8353          */
8354         key.objectid = dback->owner;
8355         key.type = BTRFS_EXTENT_DATA_KEY;
8356         key.offset = dback->offset;
8357         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8358         if (ret < 0) {
8359                 fprintf(stderr, "Error looking up ref %d\n", ret);
8360                 return ret;
8361         }
8362
8363         while (1) {
8364                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8365                         ret = btrfs_next_leaf(root, path);
8366                         if (ret) {
8367                                 fprintf(stderr, "Couldn't find our ref, next\n");
8368                                 return -EINVAL;
8369                         }
8370                 }
8371                 leaf = path->nodes[0];
8372                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8373                 if (key.objectid != dback->owner ||
8374                     key.type != BTRFS_EXTENT_DATA_KEY) {
8375                         fprintf(stderr, "Couldn't find our ref, search\n");
8376                         return -EINVAL;
8377                 }
8378                 fi = btrfs_item_ptr(leaf, path->slots[0],
8379                                     struct btrfs_file_extent_item);
8380                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8381                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8382
8383                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8384                         break;
8385                 path->slots[0]++;
8386         }
8387
8388         btrfs_release_path(path);
8389
8390         trans = btrfs_start_transaction(root, 1);
8391         if (IS_ERR(trans))
8392                 return PTR_ERR(trans);
8393
8394         /*
8395          * Ok we have the key of the file extent we want to fix, now we can cow
8396          * down to the thing and fix it.
8397          */
8398         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8399         if (ret < 0) {
8400                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8401                         key.objectid, key.type, key.offset, ret);
8402                 goto out;
8403         }
8404         if (ret > 0) {
8405                 fprintf(stderr, "Well that's odd, we just found this key "
8406                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8407                         key.offset);
8408                 ret = -EINVAL;
8409                 goto out;
8410         }
8411         leaf = path->nodes[0];
8412         fi = btrfs_item_ptr(leaf, path->slots[0],
8413                             struct btrfs_file_extent_item);
8414
8415         if (btrfs_file_extent_compression(leaf, fi) &&
8416             dback->disk_bytenr != entry->bytenr) {
8417                 fprintf(stderr, "Ref doesn't match the record start and is "
8418                         "compressed, please take a btrfs-image of this file "
8419                         "system and send it to a btrfs developer so they can "
8420                         "complete this functionality for bytenr %Lu\n",
8421                         dback->disk_bytenr);
8422                 ret = -EINVAL;
8423                 goto out;
8424         }
8425
8426         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8427                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8428         } else if (dback->disk_bytenr > entry->bytenr) {
8429                 u64 off_diff, offset;
8430
8431                 off_diff = dback->disk_bytenr - entry->bytenr;
8432                 offset = btrfs_file_extent_offset(leaf, fi);
8433                 if (dback->disk_bytenr + offset +
8434                     btrfs_file_extent_num_bytes(leaf, fi) >
8435                     entry->bytenr + entry->bytes) {
8436                         fprintf(stderr, "Ref is past the entry end, please "
8437                                 "take a btrfs-image of this file system and "
8438                                 "send it to a btrfs developer, ref %Lu\n",
8439                                 dback->disk_bytenr);
8440                         ret = -EINVAL;
8441                         goto out;
8442                 }
8443                 offset += off_diff;
8444                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8445                 btrfs_set_file_extent_offset(leaf, fi, offset);
8446         } else if (dback->disk_bytenr < entry->bytenr) {
8447                 u64 offset;
8448
8449                 offset = btrfs_file_extent_offset(leaf, fi);
8450                 if (dback->disk_bytenr + offset < entry->bytenr) {
8451                         fprintf(stderr, "Ref is before the entry start, please"
8452                                 " take a btrfs-image of this file system and "
8453                                 "send it to a btrfs developer, ref %Lu\n",
8454                                 dback->disk_bytenr);
8455                         ret = -EINVAL;
8456                         goto out;
8457                 }
8458
8459                 offset += dback->disk_bytenr;
8460                 offset -= entry->bytenr;
8461                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8462                 btrfs_set_file_extent_offset(leaf, fi, offset);
8463         }
8464
8465         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8466
8467         /*
8468          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8469          * only do this if we aren't using compression, otherwise it's a
8470          * trickier case.
8471          */
8472         if (!btrfs_file_extent_compression(leaf, fi))
8473                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8474         else
8475                 printf("ram bytes may be wrong?\n");
8476         btrfs_mark_buffer_dirty(leaf);
8477 out:
8478         err = btrfs_commit_transaction(trans, root);
8479         btrfs_release_path(path);
8480         return ret ? ret : err;
8481 }
8482
8483 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8484                            struct extent_record *rec)
8485 {
8486         struct extent_backref *back;
8487         struct data_backref *dback;
8488         struct extent_entry *entry, *best = NULL;
8489         LIST_HEAD(entries);
8490         int nr_entries = 0;
8491         int broken_entries = 0;
8492         int ret = 0;
8493         short mismatch = 0;
8494
8495         /*
8496          * Metadata is easy and the backrefs should always agree on bytenr and
8497          * size, if not we've got bigger issues.
8498          */
8499         if (rec->metadata)
8500                 return 0;
8501
8502         list_for_each_entry(back, &rec->backrefs, list) {
8503                 if (back->full_backref || !back->is_data)
8504                         continue;
8505
8506                 dback = to_data_backref(back);
8507
8508                 /*
8509                  * We only pay attention to backrefs that we found a real
8510                  * backref for.
8511                  */
8512                 if (dback->found_ref == 0)
8513                         continue;
8514
8515                 /*
8516                  * For now we only catch when the bytes don't match, not the
8517                  * bytenr.  We can easily do this at the same time, but I want
8518                  * to have a fs image to test on before we just add repair
8519                  * functionality willy-nilly so we know we won't screw up the
8520                  * repair.
8521                  */
8522
8523                 entry = find_entry(&entries, dback->disk_bytenr,
8524                                    dback->bytes);
8525                 if (!entry) {
8526                         entry = malloc(sizeof(struct extent_entry));
8527                         if (!entry) {
8528                                 ret = -ENOMEM;
8529                                 goto out;
8530                         }
8531                         memset(entry, 0, sizeof(*entry));
8532                         entry->bytenr = dback->disk_bytenr;
8533                         entry->bytes = dback->bytes;
8534                         list_add_tail(&entry->list, &entries);
8535                         nr_entries++;
8536                 }
8537
8538                 /*
8539                  * If we only have on entry we may think the entries agree when
8540                  * in reality they don't so we have to do some extra checking.
8541                  */
8542                 if (dback->disk_bytenr != rec->start ||
8543                     dback->bytes != rec->nr || back->broken)
8544                         mismatch = 1;
8545
8546                 if (back->broken) {
8547                         entry->broken++;
8548                         broken_entries++;
8549                 }
8550
8551                 entry->count++;
8552         }
8553
8554         /* Yay all the backrefs agree, carry on good sir */
8555         if (nr_entries <= 1 && !mismatch)
8556                 goto out;
8557
8558         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8559                 "%Lu\n", rec->start);
8560
8561         /*
8562          * First we want to see if the backrefs can agree amongst themselves who
8563          * is right, so figure out which one of the entries has the highest
8564          * count.
8565          */
8566         best = find_most_right_entry(&entries);
8567
8568         /*
8569          * Ok so we may have an even split between what the backrefs think, so
8570          * this is where we use the extent ref to see what it thinks.
8571          */
8572         if (!best) {
8573                 entry = find_entry(&entries, rec->start, rec->nr);
8574                 if (!entry && (!broken_entries || !rec->found_rec)) {
8575                         fprintf(stderr, "Backrefs don't agree with each other "
8576                                 "and extent record doesn't agree with anybody,"
8577                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8578                                 rec->start, rec->nr);
8579                         ret = -EINVAL;
8580                         goto out;
8581                 } else if (!entry) {
8582                         /*
8583                          * Ok our backrefs were broken, we'll assume this is the
8584                          * correct value and add an entry for this range.
8585                          */
8586                         entry = malloc(sizeof(struct extent_entry));
8587                         if (!entry) {
8588                                 ret = -ENOMEM;
8589                                 goto out;
8590                         }
8591                         memset(entry, 0, sizeof(*entry));
8592                         entry->bytenr = rec->start;
8593                         entry->bytes = rec->nr;
8594                         list_add_tail(&entry->list, &entries);
8595                         nr_entries++;
8596                 }
8597                 entry->count++;
8598                 best = find_most_right_entry(&entries);
8599                 if (!best) {
8600                         fprintf(stderr, "Backrefs and extent record evenly "
8601                                 "split on who is right, this is going to "
8602                                 "require user input to fix bytenr %Lu bytes "
8603                                 "%Lu\n", rec->start, rec->nr);
8604                         ret = -EINVAL;
8605                         goto out;
8606                 }
8607         }
8608
8609         /*
8610          * I don't think this can happen currently as we'll abort() if we catch
8611          * this case higher up, but in case somebody removes that we still can't
8612          * deal with it properly here yet, so just bail out of that's the case.
8613          */
8614         if (best->bytenr != rec->start) {
8615                 fprintf(stderr, "Extent start and backref starts don't match, "
8616                         "please use btrfs-image on this file system and send "
8617                         "it to a btrfs developer so they can make fsck fix "
8618                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8619                         rec->start, rec->nr);
8620                 ret = -EINVAL;
8621                 goto out;
8622         }
8623
8624         /*
8625          * Ok great we all agreed on an extent record, let's go find the real
8626          * references and fix up the ones that don't match.
8627          */
8628         list_for_each_entry(back, &rec->backrefs, list) {
8629                 if (back->full_backref || !back->is_data)
8630                         continue;
8631
8632                 dback = to_data_backref(back);
8633
8634                 /*
8635                  * Still ignoring backrefs that don't have a real ref attached
8636                  * to them.
8637                  */
8638                 if (dback->found_ref == 0)
8639                         continue;
8640
8641                 if (dback->bytes == best->bytes &&
8642                     dback->disk_bytenr == best->bytenr)
8643                         continue;
8644
8645                 ret = repair_ref(info, path, dback, best);
8646                 if (ret)
8647                         goto out;
8648         }
8649
8650         /*
8651          * Ok we messed with the actual refs, which means we need to drop our
8652          * entire cache and go back and rescan.  I know this is a huge pain and
8653          * adds a lot of extra work, but it's the only way to be safe.  Once all
8654          * the backrefs agree we may not need to do anything to the extent
8655          * record itself.
8656          */
8657         ret = -EAGAIN;
8658 out:
8659         while (!list_empty(&entries)) {
8660                 entry = list_entry(entries.next, struct extent_entry, list);
8661                 list_del_init(&entry->list);
8662                 free(entry);
8663         }
8664         return ret;
8665 }
8666
8667 static int process_duplicates(struct cache_tree *extent_cache,
8668                               struct extent_record *rec)
8669 {
8670         struct extent_record *good, *tmp;
8671         struct cache_extent *cache;
8672         int ret;
8673
8674         /*
8675          * If we found a extent record for this extent then return, or if we
8676          * have more than one duplicate we are likely going to need to delete
8677          * something.
8678          */
8679         if (rec->found_rec || rec->num_duplicates > 1)
8680                 return 0;
8681
8682         /* Shouldn't happen but just in case */
8683         BUG_ON(!rec->num_duplicates);
8684
8685         /*
8686          * So this happens if we end up with a backref that doesn't match the
8687          * actual extent entry.  So either the backref is bad or the extent
8688          * entry is bad.  Either way we want to have the extent_record actually
8689          * reflect what we found in the extent_tree, so we need to take the
8690          * duplicate out and use that as the extent_record since the only way we
8691          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8692          */
8693         remove_cache_extent(extent_cache, &rec->cache);
8694
8695         good = to_extent_record(rec->dups.next);
8696         list_del_init(&good->list);
8697         INIT_LIST_HEAD(&good->backrefs);
8698         INIT_LIST_HEAD(&good->dups);
8699         good->cache.start = good->start;
8700         good->cache.size = good->nr;
8701         good->content_checked = 0;
8702         good->owner_ref_checked = 0;
8703         good->num_duplicates = 0;
8704         good->refs = rec->refs;
8705         list_splice_init(&rec->backrefs, &good->backrefs);
8706         while (1) {
8707                 cache = lookup_cache_extent(extent_cache, good->start,
8708                                             good->nr);
8709                 if (!cache)
8710                         break;
8711                 tmp = container_of(cache, struct extent_record, cache);
8712
8713                 /*
8714                  * If we find another overlapping extent and it's found_rec is
8715                  * set then it's a duplicate and we need to try and delete
8716                  * something.
8717                  */
8718                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8719                         if (list_empty(&good->list))
8720                                 list_add_tail(&good->list,
8721                                               &duplicate_extents);
8722                         good->num_duplicates += tmp->num_duplicates + 1;
8723                         list_splice_init(&tmp->dups, &good->dups);
8724                         list_del_init(&tmp->list);
8725                         list_add_tail(&tmp->list, &good->dups);
8726                         remove_cache_extent(extent_cache, &tmp->cache);
8727                         continue;
8728                 }
8729
8730                 /*
8731                  * Ok we have another non extent item backed extent rec, so lets
8732                  * just add it to this extent and carry on like we did above.
8733                  */
8734                 good->refs += tmp->refs;
8735                 list_splice_init(&tmp->backrefs, &good->backrefs);
8736                 remove_cache_extent(extent_cache, &tmp->cache);
8737                 free(tmp);
8738         }
8739         ret = insert_cache_extent(extent_cache, &good->cache);
8740         BUG_ON(ret);
8741         free(rec);
8742         return good->num_duplicates ? 0 : 1;
8743 }
8744
8745 static int delete_duplicate_records(struct btrfs_root *root,
8746                                     struct extent_record *rec)
8747 {
8748         struct btrfs_trans_handle *trans;
8749         LIST_HEAD(delete_list);
8750         struct btrfs_path path;
8751         struct extent_record *tmp, *good, *n;
8752         int nr_del = 0;
8753         int ret = 0, err;
8754         struct btrfs_key key;
8755
8756         btrfs_init_path(&path);
8757
8758         good = rec;
8759         /* Find the record that covers all of the duplicates. */
8760         list_for_each_entry(tmp, &rec->dups, list) {
8761                 if (good->start < tmp->start)
8762                         continue;
8763                 if (good->nr > tmp->nr)
8764                         continue;
8765
8766                 if (tmp->start + tmp->nr < good->start + good->nr) {
8767                         fprintf(stderr, "Ok we have overlapping extents that "
8768                                 "aren't completely covered by each other, this "
8769                                 "is going to require more careful thought.  "
8770                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8771                                 tmp->start, tmp->nr, good->start, good->nr);
8772                         abort();
8773                 }
8774                 good = tmp;
8775         }
8776
8777         if (good != rec)
8778                 list_add_tail(&rec->list, &delete_list);
8779
8780         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8781                 if (tmp == good)
8782                         continue;
8783                 list_move_tail(&tmp->list, &delete_list);
8784         }
8785
8786         root = root->fs_info->extent_root;
8787         trans = btrfs_start_transaction(root, 1);
8788         if (IS_ERR(trans)) {
8789                 ret = PTR_ERR(trans);
8790                 goto out;
8791         }
8792
8793         list_for_each_entry(tmp, &delete_list, list) {
8794                 if (tmp->found_rec == 0)
8795                         continue;
8796                 key.objectid = tmp->start;
8797                 key.type = BTRFS_EXTENT_ITEM_KEY;
8798                 key.offset = tmp->nr;
8799
8800                 /* Shouldn't happen but just in case */
8801                 if (tmp->metadata) {
8802                         fprintf(stderr, "Well this shouldn't happen, extent "
8803                                 "record overlaps but is metadata? "
8804                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8805                         abort();
8806                 }
8807
8808                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8809                 if (ret) {
8810                         if (ret > 0)
8811                                 ret = -EINVAL;
8812                         break;
8813                 }
8814                 ret = btrfs_del_item(trans, root, &path);
8815                 if (ret)
8816                         break;
8817                 btrfs_release_path(&path);
8818                 nr_del++;
8819         }
8820         err = btrfs_commit_transaction(trans, root);
8821         if (err && !ret)
8822                 ret = err;
8823 out:
8824         while (!list_empty(&delete_list)) {
8825                 tmp = to_extent_record(delete_list.next);
8826                 list_del_init(&tmp->list);
8827                 if (tmp == rec)
8828                         continue;
8829                 free(tmp);
8830         }
8831
8832         while (!list_empty(&rec->dups)) {
8833                 tmp = to_extent_record(rec->dups.next);
8834                 list_del_init(&tmp->list);
8835                 free(tmp);
8836         }
8837
8838         btrfs_release_path(&path);
8839
8840         if (!ret && !nr_del)
8841                 rec->num_duplicates = 0;
8842
8843         return ret ? ret : nr_del;
8844 }
8845
8846 static int find_possible_backrefs(struct btrfs_fs_info *info,
8847                                   struct btrfs_path *path,
8848                                   struct cache_tree *extent_cache,
8849                                   struct extent_record *rec)
8850 {
8851         struct btrfs_root *root;
8852         struct extent_backref *back;
8853         struct data_backref *dback;
8854         struct cache_extent *cache;
8855         struct btrfs_file_extent_item *fi;
8856         struct btrfs_key key;
8857         u64 bytenr, bytes;
8858         int ret;
8859
8860         list_for_each_entry(back, &rec->backrefs, list) {
8861                 /* Don't care about full backrefs (poor unloved backrefs) */
8862                 if (back->full_backref || !back->is_data)
8863                         continue;
8864
8865                 dback = to_data_backref(back);
8866
8867                 /* We found this one, we don't need to do a lookup */
8868                 if (dback->found_ref)
8869                         continue;
8870
8871                 key.objectid = dback->root;
8872                 key.type = BTRFS_ROOT_ITEM_KEY;
8873                 key.offset = (u64)-1;
8874
8875                 root = btrfs_read_fs_root(info, &key);
8876
8877                 /* No root, definitely a bad ref, skip */
8878                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8879                         continue;
8880                 /* Other err, exit */
8881                 if (IS_ERR(root))
8882                         return PTR_ERR(root);
8883
8884                 key.objectid = dback->owner;
8885                 key.type = BTRFS_EXTENT_DATA_KEY;
8886                 key.offset = dback->offset;
8887                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8888                 if (ret) {
8889                         btrfs_release_path(path);
8890                         if (ret < 0)
8891                                 return ret;
8892                         /* Didn't find it, we can carry on */
8893                         ret = 0;
8894                         continue;
8895                 }
8896
8897                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8898                                     struct btrfs_file_extent_item);
8899                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8900                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8901                 btrfs_release_path(path);
8902                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8903                 if (cache) {
8904                         struct extent_record *tmp;
8905                         tmp = container_of(cache, struct extent_record, cache);
8906
8907                         /*
8908                          * If we found an extent record for the bytenr for this
8909                          * particular backref then we can't add it to our
8910                          * current extent record.  We only want to add backrefs
8911                          * that don't have a corresponding extent item in the
8912                          * extent tree since they likely belong to this record
8913                          * and we need to fix it if it doesn't match bytenrs.
8914                          */
8915                         if  (tmp->found_rec)
8916                                 continue;
8917                 }
8918
8919                 dback->found_ref += 1;
8920                 dback->disk_bytenr = bytenr;
8921                 dback->bytes = bytes;
8922
8923                 /*
8924                  * Set this so the verify backref code knows not to trust the
8925                  * values in this backref.
8926                  */
8927                 back->broken = 1;
8928         }
8929
8930         return 0;
8931 }
8932
8933 /*
8934  * Record orphan data ref into corresponding root.
8935  *
8936  * Return 0 if the extent item contains data ref and recorded.
8937  * Return 1 if the extent item contains no useful data ref
8938  *   On that case, it may contains only shared_dataref or metadata backref
8939  *   or the file extent exists(this should be handled by the extent bytenr
8940  *   recovery routine)
8941  * Return <0 if something goes wrong.
8942  */
8943 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8944                                       struct extent_record *rec)
8945 {
8946         struct btrfs_key key;
8947         struct btrfs_root *dest_root;
8948         struct extent_backref *back;
8949         struct data_backref *dback;
8950         struct orphan_data_extent *orphan;
8951         struct btrfs_path path;
8952         int recorded_data_ref = 0;
8953         int ret = 0;
8954
8955         if (rec->metadata)
8956                 return 1;
8957         btrfs_init_path(&path);
8958         list_for_each_entry(back, &rec->backrefs, list) {
8959                 if (back->full_backref || !back->is_data ||
8960                     !back->found_extent_tree)
8961                         continue;
8962                 dback = to_data_backref(back);
8963                 if (dback->found_ref)
8964                         continue;
8965                 key.objectid = dback->root;
8966                 key.type = BTRFS_ROOT_ITEM_KEY;
8967                 key.offset = (u64)-1;
8968
8969                 dest_root = btrfs_read_fs_root(fs_info, &key);
8970
8971                 /* For non-exist root we just skip it */
8972                 if (IS_ERR(dest_root) || !dest_root)
8973                         continue;
8974
8975                 key.objectid = dback->owner;
8976                 key.type = BTRFS_EXTENT_DATA_KEY;
8977                 key.offset = dback->offset;
8978
8979                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8980                 btrfs_release_path(&path);
8981                 /*
8982                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8983                  * we need to record it for inode/file extent rebuild.
8984                  * For ret > 0, we record it only for file extent rebuild.
8985                  * For ret == 0, the file extent exists but only bytenr
8986                  * mismatch, let the original bytenr fix routine to handle,
8987                  * don't record it.
8988                  */
8989                 if (ret == 0)
8990                         continue;
8991                 ret = 0;
8992                 orphan = malloc(sizeof(*orphan));
8993                 if (!orphan) {
8994                         ret = -ENOMEM;
8995                         goto out;
8996                 }
8997                 INIT_LIST_HEAD(&orphan->list);
8998                 orphan->root = dback->root;
8999                 orphan->objectid = dback->owner;
9000                 orphan->offset = dback->offset;
9001                 orphan->disk_bytenr = rec->cache.start;
9002                 orphan->disk_len = rec->cache.size;
9003                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9004                 recorded_data_ref = 1;
9005         }
9006 out:
9007         btrfs_release_path(&path);
9008         if (!ret)
9009                 return !recorded_data_ref;
9010         else
9011                 return ret;
9012 }
9013
9014 /*
9015  * when an incorrect extent item is found, this will delete
9016  * all of the existing entries for it and recreate them
9017  * based on what the tree scan found.
9018  */
9019 static int fixup_extent_refs(struct btrfs_fs_info *info,
9020                              struct cache_tree *extent_cache,
9021                              struct extent_record *rec)
9022 {
9023         struct btrfs_trans_handle *trans = NULL;
9024         int ret;
9025         struct btrfs_path path;
9026         struct list_head *cur = rec->backrefs.next;
9027         struct cache_extent *cache;
9028         struct extent_backref *back;
9029         int allocated = 0;
9030         u64 flags = 0;
9031
9032         if (rec->flag_block_full_backref)
9033                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9034
9035         btrfs_init_path(&path);
9036         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9037                 /*
9038                  * Sometimes the backrefs themselves are so broken they don't
9039                  * get attached to any meaningful rec, so first go back and
9040                  * check any of our backrefs that we couldn't find and throw
9041                  * them into the list if we find the backref so that
9042                  * verify_backrefs can figure out what to do.
9043                  */
9044                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9045                 if (ret < 0)
9046                         goto out;
9047         }
9048
9049         /* step one, make sure all of the backrefs agree */
9050         ret = verify_backrefs(info, &path, rec);
9051         if (ret < 0)
9052                 goto out;
9053
9054         trans = btrfs_start_transaction(info->extent_root, 1);
9055         if (IS_ERR(trans)) {
9056                 ret = PTR_ERR(trans);
9057                 goto out;
9058         }
9059
9060         /* step two, delete all the existing records */
9061         ret = delete_extent_records(trans, info->extent_root, &path,
9062                                     rec->start);
9063
9064         if (ret < 0)
9065                 goto out;
9066
9067         /* was this block corrupt?  If so, don't add references to it */
9068         cache = lookup_cache_extent(info->corrupt_blocks,
9069                                     rec->start, rec->max_size);
9070         if (cache) {
9071                 ret = 0;
9072                 goto out;
9073         }
9074
9075         /* step three, recreate all the refs we did find */
9076         while(cur != &rec->backrefs) {
9077                 back = to_extent_backref(cur);
9078                 cur = cur->next;
9079
9080                 /*
9081                  * if we didn't find any references, don't create a
9082                  * new extent record
9083                  */
9084                 if (!back->found_ref)
9085                         continue;
9086
9087                 rec->bad_full_backref = 0;
9088                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9089                 allocated = 1;
9090
9091                 if (ret)
9092                         goto out;
9093         }
9094 out:
9095         if (trans) {
9096                 int err = btrfs_commit_transaction(trans, info->extent_root);
9097                 if (!ret)
9098                         ret = err;
9099         }
9100
9101         if (!ret)
9102                 fprintf(stderr, "Repaired extent references for %llu\n",
9103                                 (unsigned long long)rec->start);
9104
9105         btrfs_release_path(&path);
9106         return ret;
9107 }
9108
9109 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9110                               struct extent_record *rec)
9111 {
9112         struct btrfs_trans_handle *trans;
9113         struct btrfs_root *root = fs_info->extent_root;
9114         struct btrfs_path path;
9115         struct btrfs_extent_item *ei;
9116         struct btrfs_key key;
9117         u64 flags;
9118         int ret = 0;
9119
9120         key.objectid = rec->start;
9121         if (rec->metadata) {
9122                 key.type = BTRFS_METADATA_ITEM_KEY;
9123                 key.offset = rec->info_level;
9124         } else {
9125                 key.type = BTRFS_EXTENT_ITEM_KEY;
9126                 key.offset = rec->max_size;
9127         }
9128
9129         trans = btrfs_start_transaction(root, 0);
9130         if (IS_ERR(trans))
9131                 return PTR_ERR(trans);
9132
9133         btrfs_init_path(&path);
9134         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9135         if (ret < 0) {
9136                 btrfs_release_path(&path);
9137                 btrfs_commit_transaction(trans, root);
9138                 return ret;
9139         } else if (ret) {
9140                 fprintf(stderr, "Didn't find extent for %llu\n",
9141                         (unsigned long long)rec->start);
9142                 btrfs_release_path(&path);
9143                 btrfs_commit_transaction(trans, root);
9144                 return -ENOENT;
9145         }
9146
9147         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9148                             struct btrfs_extent_item);
9149         flags = btrfs_extent_flags(path.nodes[0], ei);
9150         if (rec->flag_block_full_backref) {
9151                 fprintf(stderr, "setting full backref on %llu\n",
9152                         (unsigned long long)key.objectid);
9153                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9154         } else {
9155                 fprintf(stderr, "clearing full backref on %llu\n",
9156                         (unsigned long long)key.objectid);
9157                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9158         }
9159         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9160         btrfs_mark_buffer_dirty(path.nodes[0]);
9161         btrfs_release_path(&path);
9162         ret = btrfs_commit_transaction(trans, root);
9163         if (!ret)
9164                 fprintf(stderr, "Repaired extent flags for %llu\n",
9165                                 (unsigned long long)rec->start);
9166
9167         return ret;
9168 }
9169
9170 /* right now we only prune from the extent allocation tree */
9171 static int prune_one_block(struct btrfs_trans_handle *trans,
9172                            struct btrfs_fs_info *info,
9173                            struct btrfs_corrupt_block *corrupt)
9174 {
9175         int ret;
9176         struct btrfs_path path;
9177         struct extent_buffer *eb;
9178         u64 found;
9179         int slot;
9180         int nritems;
9181         int level = corrupt->level + 1;
9182
9183         btrfs_init_path(&path);
9184 again:
9185         /* we want to stop at the parent to our busted block */
9186         path.lowest_level = level;
9187
9188         ret = btrfs_search_slot(trans, info->extent_root,
9189                                 &corrupt->key, &path, -1, 1);
9190
9191         if (ret < 0)
9192                 goto out;
9193
9194         eb = path.nodes[level];
9195         if (!eb) {
9196                 ret = -ENOENT;
9197                 goto out;
9198         }
9199
9200         /*
9201          * hopefully the search gave us the block we want to prune,
9202          * lets try that first
9203          */
9204         slot = path.slots[level];
9205         found =  btrfs_node_blockptr(eb, slot);
9206         if (found == corrupt->cache.start)
9207                 goto del_ptr;
9208
9209         nritems = btrfs_header_nritems(eb);
9210
9211         /* the search failed, lets scan this node and hope we find it */
9212         for (slot = 0; slot < nritems; slot++) {
9213                 found =  btrfs_node_blockptr(eb, slot);
9214                 if (found == corrupt->cache.start)
9215                         goto del_ptr;
9216         }
9217         /*
9218          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9219          * to this block
9220          */
9221         if (eb == info->extent_root->node) {
9222                 ret = -ENOENT;
9223                 goto out;
9224         } else {
9225                 level++;
9226                 btrfs_release_path(&path);
9227                 goto again;
9228         }
9229
9230 del_ptr:
9231         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9232         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9233
9234 out:
9235         btrfs_release_path(&path);
9236         return ret;
9237 }
9238
9239 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9240 {
9241         struct btrfs_trans_handle *trans = NULL;
9242         struct cache_extent *cache;
9243         struct btrfs_corrupt_block *corrupt;
9244
9245         while (1) {
9246                 cache = search_cache_extent(info->corrupt_blocks, 0);
9247                 if (!cache)
9248                         break;
9249                 if (!trans) {
9250                         trans = btrfs_start_transaction(info->extent_root, 1);
9251                         if (IS_ERR(trans))
9252                                 return PTR_ERR(trans);
9253                 }
9254                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9255                 prune_one_block(trans, info, corrupt);
9256                 remove_cache_extent(info->corrupt_blocks, cache);
9257         }
9258         if (trans)
9259                 return btrfs_commit_transaction(trans, info->extent_root);
9260         return 0;
9261 }
9262
9263 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9264 {
9265         struct btrfs_block_group_cache *cache;
9266         u64 start, end;
9267         int ret;
9268
9269         while (1) {
9270                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9271                                             &start, &end, EXTENT_DIRTY);
9272                 if (ret)
9273                         break;
9274                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9275         }
9276
9277         start = 0;
9278         while (1) {
9279                 cache = btrfs_lookup_first_block_group(fs_info, start);
9280                 if (!cache)
9281                         break;
9282                 if (cache->cached)
9283                         cache->cached = 0;
9284                 start = cache->key.objectid + cache->key.offset;
9285         }
9286 }
9287
9288 static int check_extent_refs(struct btrfs_root *root,
9289                              struct cache_tree *extent_cache)
9290 {
9291         struct extent_record *rec;
9292         struct cache_extent *cache;
9293         int ret = 0;
9294         int had_dups = 0;
9295
9296         if (repair) {
9297                 /*
9298                  * if we're doing a repair, we have to make sure
9299                  * we don't allocate from the problem extents.
9300                  * In the worst case, this will be all the
9301                  * extents in the FS
9302                  */
9303                 cache = search_cache_extent(extent_cache, 0);
9304                 while(cache) {
9305                         rec = container_of(cache, struct extent_record, cache);
9306                         set_extent_dirty(root->fs_info->excluded_extents,
9307                                          rec->start,
9308                                          rec->start + rec->max_size - 1);
9309                         cache = next_cache_extent(cache);
9310                 }
9311
9312                 /* pin down all the corrupted blocks too */
9313                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9314                 while(cache) {
9315                         set_extent_dirty(root->fs_info->excluded_extents,
9316                                          cache->start,
9317                                          cache->start + cache->size - 1);
9318                         cache = next_cache_extent(cache);
9319                 }
9320                 prune_corrupt_blocks(root->fs_info);
9321                 reset_cached_block_groups(root->fs_info);
9322         }
9323
9324         reset_cached_block_groups(root->fs_info);
9325
9326         /*
9327          * We need to delete any duplicate entries we find first otherwise we
9328          * could mess up the extent tree when we have backrefs that actually
9329          * belong to a different extent item and not the weird duplicate one.
9330          */
9331         while (repair && !list_empty(&duplicate_extents)) {
9332                 rec = to_extent_record(duplicate_extents.next);
9333                 list_del_init(&rec->list);
9334
9335                 /* Sometimes we can find a backref before we find an actual
9336                  * extent, so we need to process it a little bit to see if there
9337                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9338                  * if this is a backref screwup.  If we need to delete stuff
9339                  * process_duplicates() will return 0, otherwise it will return
9340                  * 1 and we
9341                  */
9342                 if (process_duplicates(extent_cache, rec))
9343                         continue;
9344                 ret = delete_duplicate_records(root, rec);
9345                 if (ret < 0)
9346                         return ret;
9347                 /*
9348                  * delete_duplicate_records will return the number of entries
9349                  * deleted, so if it's greater than 0 then we know we actually
9350                  * did something and we need to remove.
9351                  */
9352                 if (ret)
9353                         had_dups = 1;
9354         }
9355
9356         if (had_dups)
9357                 return -EAGAIN;
9358
9359         while(1) {
9360                 int cur_err = 0;
9361                 int fix = 0;
9362
9363                 cache = search_cache_extent(extent_cache, 0);
9364                 if (!cache)
9365                         break;
9366                 rec = container_of(cache, struct extent_record, cache);
9367                 if (rec->num_duplicates) {
9368                         fprintf(stderr, "extent item %llu has multiple extent "
9369                                 "items\n", (unsigned long long)rec->start);
9370                         cur_err = 1;
9371                 }
9372
9373                 if (rec->refs != rec->extent_item_refs) {
9374                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9375                                 (unsigned long long)rec->start,
9376                                 (unsigned long long)rec->nr);
9377                         fprintf(stderr, "extent item %llu, found %llu\n",
9378                                 (unsigned long long)rec->extent_item_refs,
9379                                 (unsigned long long)rec->refs);
9380                         ret = record_orphan_data_extents(root->fs_info, rec);
9381                         if (ret < 0)
9382                                 goto repair_abort;
9383                         fix = ret;
9384                         cur_err = 1;
9385                 }
9386                 if (all_backpointers_checked(rec, 1)) {
9387                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9388                                 (unsigned long long)rec->start,
9389                                 (unsigned long long)rec->nr);
9390                         fix = 1;
9391                         cur_err = 1;
9392                 }
9393                 if (!rec->owner_ref_checked) {
9394                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9395                                 (unsigned long long)rec->start,
9396                                 (unsigned long long)rec->nr);
9397                         fix = 1;
9398                         cur_err = 1;
9399                 }
9400
9401                 if (repair && fix) {
9402                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9403                         if (ret)
9404                                 goto repair_abort;
9405                 }
9406
9407
9408                 if (rec->bad_full_backref) {
9409                         fprintf(stderr, "bad full backref, on [%llu]\n",
9410                                 (unsigned long long)rec->start);
9411                         if (repair) {
9412                                 ret = fixup_extent_flags(root->fs_info, rec);
9413                                 if (ret)
9414                                         goto repair_abort;
9415                                 fix = 1;
9416                         }
9417                         cur_err = 1;
9418                 }
9419                 /*
9420                  * Although it's not a extent ref's problem, we reuse this
9421                  * routine for error reporting.
9422                  * No repair function yet.
9423                  */
9424                 if (rec->crossing_stripes) {
9425                         fprintf(stderr,
9426                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9427                                 rec->start, rec->start + rec->max_size);
9428                         cur_err = 1;
9429                 }
9430
9431                 if (rec->wrong_chunk_type) {
9432                         fprintf(stderr,
9433                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9434                                 rec->start, rec->start + rec->max_size);
9435                         cur_err = 1;
9436                 }
9437
9438                 remove_cache_extent(extent_cache, cache);
9439                 free_all_extent_backrefs(rec);
9440                 if (!init_extent_tree && repair && (!cur_err || fix))
9441                         clear_extent_dirty(root->fs_info->excluded_extents,
9442                                            rec->start,
9443                                            rec->start + rec->max_size - 1);
9444                 free(rec);
9445         }
9446 repair_abort:
9447         if (repair) {
9448                 if (ret && ret != -EAGAIN) {
9449                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9450                         exit(1);
9451                 } else if (!ret) {
9452                         struct btrfs_trans_handle *trans;
9453
9454                         root = root->fs_info->extent_root;
9455                         trans = btrfs_start_transaction(root, 1);
9456                         if (IS_ERR(trans)) {
9457                                 ret = PTR_ERR(trans);
9458                                 goto repair_abort;
9459                         }
9460
9461                         btrfs_fix_block_accounting(trans, root);
9462                         ret = btrfs_commit_transaction(trans, root);
9463                         if (ret)
9464                                 goto repair_abort;
9465                 }
9466                 return ret;
9467         }
9468         return 0;
9469 }
9470
9471 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9472 {
9473         u64 stripe_size;
9474
9475         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9476                 stripe_size = length;
9477                 stripe_size /= num_stripes;
9478         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9479                 stripe_size = length * 2;
9480                 stripe_size /= num_stripes;
9481         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9482                 stripe_size = length;
9483                 stripe_size /= (num_stripes - 1);
9484         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9485                 stripe_size = length;
9486                 stripe_size /= (num_stripes - 2);
9487         } else {
9488                 stripe_size = length;
9489         }
9490         return stripe_size;
9491 }
9492
9493 /*
9494  * Check the chunk with its block group/dev list ref:
9495  * Return 0 if all refs seems valid.
9496  * Return 1 if part of refs seems valid, need later check for rebuild ref
9497  * like missing block group and needs to search extent tree to rebuild them.
9498  * Return -1 if essential refs are missing and unable to rebuild.
9499  */
9500 static int check_chunk_refs(struct chunk_record *chunk_rec,
9501                             struct block_group_tree *block_group_cache,
9502                             struct device_extent_tree *dev_extent_cache,
9503                             int silent)
9504 {
9505         struct cache_extent *block_group_item;
9506         struct block_group_record *block_group_rec;
9507         struct cache_extent *dev_extent_item;
9508         struct device_extent_record *dev_extent_rec;
9509         u64 devid;
9510         u64 offset;
9511         u64 length;
9512         int metadump_v2 = 0;
9513         int i;
9514         int ret = 0;
9515
9516         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9517                                                chunk_rec->offset,
9518                                                chunk_rec->length);
9519         if (block_group_item) {
9520                 block_group_rec = container_of(block_group_item,
9521                                                struct block_group_record,
9522                                                cache);
9523                 if (chunk_rec->length != block_group_rec->offset ||
9524                     chunk_rec->offset != block_group_rec->objectid ||
9525                     (!metadump_v2 &&
9526                      chunk_rec->type_flags != block_group_rec->flags)) {
9527                         if (!silent)
9528                                 fprintf(stderr,
9529                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9530                                         chunk_rec->objectid,
9531                                         chunk_rec->type,
9532                                         chunk_rec->offset,
9533                                         chunk_rec->length,
9534                                         chunk_rec->offset,
9535                                         chunk_rec->type_flags,
9536                                         block_group_rec->objectid,
9537                                         block_group_rec->type,
9538                                         block_group_rec->offset,
9539                                         block_group_rec->offset,
9540                                         block_group_rec->objectid,
9541                                         block_group_rec->flags);
9542                         ret = -1;
9543                 } else {
9544                         list_del_init(&block_group_rec->list);
9545                         chunk_rec->bg_rec = block_group_rec;
9546                 }
9547         } else {
9548                 if (!silent)
9549                         fprintf(stderr,
9550                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9551                                 chunk_rec->objectid,
9552                                 chunk_rec->type,
9553                                 chunk_rec->offset,
9554                                 chunk_rec->length,
9555                                 chunk_rec->offset,
9556                                 chunk_rec->type_flags);
9557                 ret = 1;
9558         }
9559
9560         if (metadump_v2)
9561                 return ret;
9562
9563         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9564                                     chunk_rec->num_stripes);
9565         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9566                 devid = chunk_rec->stripes[i].devid;
9567                 offset = chunk_rec->stripes[i].offset;
9568                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9569                                                        devid, offset, length);
9570                 if (dev_extent_item) {
9571                         dev_extent_rec = container_of(dev_extent_item,
9572                                                 struct device_extent_record,
9573                                                 cache);
9574                         if (dev_extent_rec->objectid != devid ||
9575                             dev_extent_rec->offset != offset ||
9576                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9577                             dev_extent_rec->length != length) {
9578                                 if (!silent)
9579                                         fprintf(stderr,
9580                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9581                                                 chunk_rec->objectid,
9582                                                 chunk_rec->type,
9583                                                 chunk_rec->offset,
9584                                                 chunk_rec->stripes[i].devid,
9585                                                 chunk_rec->stripes[i].offset,
9586                                                 dev_extent_rec->objectid,
9587                                                 dev_extent_rec->offset,
9588                                                 dev_extent_rec->length);
9589                                 ret = -1;
9590                         } else {
9591                                 list_move(&dev_extent_rec->chunk_list,
9592                                           &chunk_rec->dextents);
9593                         }
9594                 } else {
9595                         if (!silent)
9596                                 fprintf(stderr,
9597                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9598                                         chunk_rec->objectid,
9599                                         chunk_rec->type,
9600                                         chunk_rec->offset,
9601                                         chunk_rec->stripes[i].devid,
9602                                         chunk_rec->stripes[i].offset);
9603                         ret = -1;
9604                 }
9605         }
9606         return ret;
9607 }
9608
9609 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9610 int check_chunks(struct cache_tree *chunk_cache,
9611                  struct block_group_tree *block_group_cache,
9612                  struct device_extent_tree *dev_extent_cache,
9613                  struct list_head *good, struct list_head *bad,
9614                  struct list_head *rebuild, int silent)
9615 {
9616         struct cache_extent *chunk_item;
9617         struct chunk_record *chunk_rec;
9618         struct block_group_record *bg_rec;
9619         struct device_extent_record *dext_rec;
9620         int err;
9621         int ret = 0;
9622
9623         chunk_item = first_cache_extent(chunk_cache);
9624         while (chunk_item) {
9625                 chunk_rec = container_of(chunk_item, struct chunk_record,
9626                                          cache);
9627                 err = check_chunk_refs(chunk_rec, block_group_cache,
9628                                        dev_extent_cache, silent);
9629                 if (err < 0)
9630                         ret = err;
9631                 if (err == 0 && good)
9632                         list_add_tail(&chunk_rec->list, good);
9633                 if (err > 0 && rebuild)
9634                         list_add_tail(&chunk_rec->list, rebuild);
9635                 if (err < 0 && bad)
9636                         list_add_tail(&chunk_rec->list, bad);
9637                 chunk_item = next_cache_extent(chunk_item);
9638         }
9639
9640         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9641                 if (!silent)
9642                         fprintf(stderr,
9643                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9644                                 bg_rec->objectid,
9645                                 bg_rec->offset,
9646                                 bg_rec->flags);
9647                 if (!ret)
9648                         ret = 1;
9649         }
9650
9651         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9652                             chunk_list) {
9653                 if (!silent)
9654                         fprintf(stderr,
9655                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9656                                 dext_rec->objectid,
9657                                 dext_rec->offset,
9658                                 dext_rec->length);
9659                 if (!ret)
9660                         ret = 1;
9661         }
9662         return ret;
9663 }
9664
9665
9666 static int check_device_used(struct device_record *dev_rec,
9667                              struct device_extent_tree *dext_cache)
9668 {
9669         struct cache_extent *cache;
9670         struct device_extent_record *dev_extent_rec;
9671         u64 total_byte = 0;
9672
9673         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9674         while (cache) {
9675                 dev_extent_rec = container_of(cache,
9676                                               struct device_extent_record,
9677                                               cache);
9678                 if (dev_extent_rec->objectid != dev_rec->devid)
9679                         break;
9680
9681                 list_del_init(&dev_extent_rec->device_list);
9682                 total_byte += dev_extent_rec->length;
9683                 cache = next_cache_extent(cache);
9684         }
9685
9686         if (total_byte != dev_rec->byte_used) {
9687                 fprintf(stderr,
9688                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9689                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9690                         dev_rec->type, dev_rec->offset);
9691                 return -1;
9692         } else {
9693                 return 0;
9694         }
9695 }
9696
9697 /* check btrfs_dev_item -> btrfs_dev_extent */
9698 static int check_devices(struct rb_root *dev_cache,
9699                          struct device_extent_tree *dev_extent_cache)
9700 {
9701         struct rb_node *dev_node;
9702         struct device_record *dev_rec;
9703         struct device_extent_record *dext_rec;
9704         int err;
9705         int ret = 0;
9706
9707         dev_node = rb_first(dev_cache);
9708         while (dev_node) {
9709                 dev_rec = container_of(dev_node, struct device_record, node);
9710                 err = check_device_used(dev_rec, dev_extent_cache);
9711                 if (err)
9712                         ret = err;
9713
9714                 dev_node = rb_next(dev_node);
9715         }
9716         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9717                             device_list) {
9718                 fprintf(stderr,
9719                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9720                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9721                 if (!ret)
9722                         ret = 1;
9723         }
9724         return ret;
9725 }
9726
9727 static int add_root_item_to_list(struct list_head *head,
9728                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9729                                   u8 level, u8 drop_level,
9730                                   int level_size, struct btrfs_key *drop_key)
9731 {
9732
9733         struct root_item_record *ri_rec;
9734         ri_rec = malloc(sizeof(*ri_rec));
9735         if (!ri_rec)
9736                 return -ENOMEM;
9737         ri_rec->bytenr = bytenr;
9738         ri_rec->objectid = objectid;
9739         ri_rec->level = level;
9740         ri_rec->level_size = level_size;
9741         ri_rec->drop_level = drop_level;
9742         ri_rec->last_snapshot = last_snapshot;
9743         if (drop_key)
9744                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9745         list_add_tail(&ri_rec->list, head);
9746
9747         return 0;
9748 }
9749
9750 static void free_root_item_list(struct list_head *list)
9751 {
9752         struct root_item_record *ri_rec;
9753
9754         while (!list_empty(list)) {
9755                 ri_rec = list_first_entry(list, struct root_item_record,
9756                                           list);
9757                 list_del_init(&ri_rec->list);
9758                 free(ri_rec);
9759         }
9760 }
9761
9762 static int deal_root_from_list(struct list_head *list,
9763                                struct btrfs_root *root,
9764                                struct block_info *bits,
9765                                int bits_nr,
9766                                struct cache_tree *pending,
9767                                struct cache_tree *seen,
9768                                struct cache_tree *reada,
9769                                struct cache_tree *nodes,
9770                                struct cache_tree *extent_cache,
9771                                struct cache_tree *chunk_cache,
9772                                struct rb_root *dev_cache,
9773                                struct block_group_tree *block_group_cache,
9774                                struct device_extent_tree *dev_extent_cache)
9775 {
9776         int ret = 0;
9777         u64 last;
9778
9779         while (!list_empty(list)) {
9780                 struct root_item_record *rec;
9781                 struct extent_buffer *buf;
9782                 rec = list_entry(list->next,
9783                                  struct root_item_record, list);
9784                 last = 0;
9785                 buf = read_tree_block(root->fs_info,
9786                                       rec->bytenr, rec->level_size, 0);
9787                 if (!extent_buffer_uptodate(buf)) {
9788                         free_extent_buffer(buf);
9789                         ret = -EIO;
9790                         break;
9791                 }
9792                 ret = add_root_to_pending(buf, extent_cache, pending,
9793                                     seen, nodes, rec->objectid);
9794                 if (ret < 0)
9795                         break;
9796                 /*
9797                  * To rebuild extent tree, we need deal with snapshot
9798                  * one by one, otherwise we deal with node firstly which
9799                  * can maximize readahead.
9800                  */
9801                 while (1) {
9802                         ret = run_next_block(root, bits, bits_nr, &last,
9803                                              pending, seen, reada, nodes,
9804                                              extent_cache, chunk_cache,
9805                                              dev_cache, block_group_cache,
9806                                              dev_extent_cache, rec);
9807                         if (ret != 0)
9808                                 break;
9809                 }
9810                 free_extent_buffer(buf);
9811                 list_del(&rec->list);
9812                 free(rec);
9813                 if (ret < 0)
9814                         break;
9815         }
9816         while (ret >= 0) {
9817                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9818                                      reada, nodes, extent_cache, chunk_cache,
9819                                      dev_cache, block_group_cache,
9820                                      dev_extent_cache, NULL);
9821                 if (ret != 0) {
9822                         if (ret > 0)
9823                                 ret = 0;
9824                         break;
9825                 }
9826         }
9827         return ret;
9828 }
9829
9830 static int check_chunks_and_extents(struct btrfs_root *root)
9831 {
9832         struct rb_root dev_cache;
9833         struct cache_tree chunk_cache;
9834         struct block_group_tree block_group_cache;
9835         struct device_extent_tree dev_extent_cache;
9836         struct cache_tree extent_cache;
9837         struct cache_tree seen;
9838         struct cache_tree pending;
9839         struct cache_tree reada;
9840         struct cache_tree nodes;
9841         struct extent_io_tree excluded_extents;
9842         struct cache_tree corrupt_blocks;
9843         struct btrfs_path path;
9844         struct btrfs_key key;
9845         struct btrfs_key found_key;
9846         int ret, err = 0;
9847         struct block_info *bits;
9848         int bits_nr;
9849         struct extent_buffer *leaf;
9850         int slot;
9851         struct btrfs_root_item ri;
9852         struct list_head dropping_trees;
9853         struct list_head normal_trees;
9854         struct btrfs_root *root1;
9855         u64 objectid;
9856         u32 level_size;
9857         u8 level;
9858
9859         dev_cache = RB_ROOT;
9860         cache_tree_init(&chunk_cache);
9861         block_group_tree_init(&block_group_cache);
9862         device_extent_tree_init(&dev_extent_cache);
9863
9864         cache_tree_init(&extent_cache);
9865         cache_tree_init(&seen);
9866         cache_tree_init(&pending);
9867         cache_tree_init(&nodes);
9868         cache_tree_init(&reada);
9869         cache_tree_init(&corrupt_blocks);
9870         extent_io_tree_init(&excluded_extents);
9871         INIT_LIST_HEAD(&dropping_trees);
9872         INIT_LIST_HEAD(&normal_trees);
9873
9874         if (repair) {
9875                 root->fs_info->excluded_extents = &excluded_extents;
9876                 root->fs_info->fsck_extent_cache = &extent_cache;
9877                 root->fs_info->free_extent_hook = free_extent_hook;
9878                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9879         }
9880
9881         bits_nr = 1024;
9882         bits = malloc(bits_nr * sizeof(struct block_info));
9883         if (!bits) {
9884                 perror("malloc");
9885                 exit(1);
9886         }
9887
9888         if (ctx.progress_enabled) {
9889                 ctx.tp = TASK_EXTENTS;
9890                 task_start(ctx.info);
9891         }
9892
9893 again:
9894         root1 = root->fs_info->tree_root;
9895         level = btrfs_header_level(root1->node);
9896         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9897                                     root1->node->start, 0, level, 0,
9898                                     root1->fs_info->nodesize, NULL);
9899         if (ret < 0)
9900                 goto out;
9901         root1 = root->fs_info->chunk_root;
9902         level = btrfs_header_level(root1->node);
9903         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9904                                     root1->node->start, 0, level, 0,
9905                                     root1->fs_info->nodesize, NULL);
9906         if (ret < 0)
9907                 goto out;
9908         btrfs_init_path(&path);
9909         key.offset = 0;
9910         key.objectid = 0;
9911         key.type = BTRFS_ROOT_ITEM_KEY;
9912         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9913                                         &key, &path, 0, 0);
9914         if (ret < 0)
9915                 goto out;
9916         while(1) {
9917                 leaf = path.nodes[0];
9918                 slot = path.slots[0];
9919                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9920                         ret = btrfs_next_leaf(root, &path);
9921                         if (ret != 0)
9922                                 break;
9923                         leaf = path.nodes[0];
9924                         slot = path.slots[0];
9925                 }
9926                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9927                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9928                         unsigned long offset;
9929                         u64 last_snapshot;
9930
9931                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9932                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9933                         last_snapshot = btrfs_root_last_snapshot(&ri);
9934                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9935                                 level = btrfs_root_level(&ri);
9936                                 level_size = root->fs_info->nodesize;
9937                                 ret = add_root_item_to_list(&normal_trees,
9938                                                 found_key.objectid,
9939                                                 btrfs_root_bytenr(&ri),
9940                                                 last_snapshot, level,
9941                                                 0, level_size, NULL);
9942                                 if (ret < 0)
9943                                         goto out;
9944                         } else {
9945                                 level = btrfs_root_level(&ri);
9946                                 level_size = root->fs_info->nodesize;
9947                                 objectid = found_key.objectid;
9948                                 btrfs_disk_key_to_cpu(&found_key,
9949                                                       &ri.drop_progress);
9950                                 ret = add_root_item_to_list(&dropping_trees,
9951                                                 objectid,
9952                                                 btrfs_root_bytenr(&ri),
9953                                                 last_snapshot, level,
9954                                                 ri.drop_level,
9955                                                 level_size, &found_key);
9956                                 if (ret < 0)
9957                                         goto out;
9958                         }
9959                 }
9960                 path.slots[0]++;
9961         }
9962         btrfs_release_path(&path);
9963
9964         /*
9965          * check_block can return -EAGAIN if it fixes something, please keep
9966          * this in mind when dealing with return values from these functions, if
9967          * we get -EAGAIN we want to fall through and restart the loop.
9968          */
9969         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9970                                   &seen, &reada, &nodes, &extent_cache,
9971                                   &chunk_cache, &dev_cache, &block_group_cache,
9972                                   &dev_extent_cache);
9973         if (ret < 0) {
9974                 if (ret == -EAGAIN)
9975                         goto loop;
9976                 goto out;
9977         }
9978         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9979                                   &pending, &seen, &reada, &nodes,
9980                                   &extent_cache, &chunk_cache, &dev_cache,
9981                                   &block_group_cache, &dev_extent_cache);
9982         if (ret < 0) {
9983                 if (ret == -EAGAIN)
9984                         goto loop;
9985                 goto out;
9986         }
9987
9988         ret = check_chunks(&chunk_cache, &block_group_cache,
9989                            &dev_extent_cache, NULL, NULL, NULL, 0);
9990         if (ret) {
9991                 if (ret == -EAGAIN)
9992                         goto loop;
9993                 err = ret;
9994         }
9995
9996         ret = check_extent_refs(root, &extent_cache);
9997         if (ret < 0) {
9998                 if (ret == -EAGAIN)
9999                         goto loop;
10000                 goto out;
10001         }
10002
10003         ret = check_devices(&dev_cache, &dev_extent_cache);
10004         if (ret && err)
10005                 ret = err;
10006
10007 out:
10008         task_stop(ctx.info);
10009         if (repair) {
10010                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10011                 extent_io_tree_cleanup(&excluded_extents);
10012                 root->fs_info->fsck_extent_cache = NULL;
10013                 root->fs_info->free_extent_hook = NULL;
10014                 root->fs_info->corrupt_blocks = NULL;
10015                 root->fs_info->excluded_extents = NULL;
10016         }
10017         free(bits);
10018         free_chunk_cache_tree(&chunk_cache);
10019         free_device_cache_tree(&dev_cache);
10020         free_block_group_tree(&block_group_cache);
10021         free_device_extent_tree(&dev_extent_cache);
10022         free_extent_cache_tree(&seen);
10023         free_extent_cache_tree(&pending);
10024         free_extent_cache_tree(&reada);
10025         free_extent_cache_tree(&nodes);
10026         free_root_item_list(&normal_trees);
10027         free_root_item_list(&dropping_trees);
10028         return ret;
10029 loop:
10030         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10031         free_extent_cache_tree(&seen);
10032         free_extent_cache_tree(&pending);
10033         free_extent_cache_tree(&reada);
10034         free_extent_cache_tree(&nodes);
10035         free_chunk_cache_tree(&chunk_cache);
10036         free_block_group_tree(&block_group_cache);
10037         free_device_cache_tree(&dev_cache);
10038         free_device_extent_tree(&dev_extent_cache);
10039         free_extent_record_cache(&extent_cache);
10040         free_root_item_list(&normal_trees);
10041         free_root_item_list(&dropping_trees);
10042         extent_io_tree_cleanup(&excluded_extents);
10043         goto again;
10044 }
10045
10046 /*
10047  * Check backrefs of a tree block given by @bytenr or @eb.
10048  *
10049  * @root:       the root containing the @bytenr or @eb
10050  * @eb:         tree block extent buffer, can be NULL
10051  * @bytenr:     bytenr of the tree block to search
10052  * @level:      tree level of the tree block
10053  * @owner:      owner of the tree block
10054  *
10055  * Return >0 for any error found and output error message
10056  * Return 0 for no error found
10057  */
10058 static int check_tree_block_ref(struct btrfs_root *root,
10059                                 struct extent_buffer *eb, u64 bytenr,
10060                                 int level, u64 owner)
10061 {
10062         struct btrfs_key key;
10063         struct btrfs_root *extent_root = root->fs_info->extent_root;
10064         struct btrfs_path path;
10065         struct btrfs_extent_item *ei;
10066         struct btrfs_extent_inline_ref *iref;
10067         struct extent_buffer *leaf;
10068         unsigned long end;
10069         unsigned long ptr;
10070         int slot;
10071         int skinny_level;
10072         int type;
10073         u32 nodesize = root->fs_info->nodesize;
10074         u32 item_size;
10075         u64 offset;
10076         int tree_reloc_root = 0;
10077         int found_ref = 0;
10078         int err = 0;
10079         int ret;
10080
10081         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10082             btrfs_header_bytenr(root->node) == bytenr)
10083                 tree_reloc_root = 1;
10084
10085         btrfs_init_path(&path);
10086         key.objectid = bytenr;
10087         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10088                 key.type = BTRFS_METADATA_ITEM_KEY;
10089         else
10090                 key.type = BTRFS_EXTENT_ITEM_KEY;
10091         key.offset = (u64)-1;
10092
10093         /* Search for the backref in extent tree */
10094         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10095         if (ret < 0) {
10096                 err |= BACKREF_MISSING;
10097                 goto out;
10098         }
10099         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10100         if (ret) {
10101                 err |= BACKREF_MISSING;
10102                 goto out;
10103         }
10104
10105         leaf = path.nodes[0];
10106         slot = path.slots[0];
10107         btrfs_item_key_to_cpu(leaf, &key, slot);
10108
10109         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10110
10111         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10112                 skinny_level = (int)key.offset;
10113                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10114         } else {
10115                 struct btrfs_tree_block_info *info;
10116
10117                 info = (struct btrfs_tree_block_info *)(ei + 1);
10118                 skinny_level = btrfs_tree_block_level(leaf, info);
10119                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10120         }
10121
10122         if (eb) {
10123                 u64 header_gen;
10124                 u64 extent_gen;
10125
10126                 if (!(btrfs_extent_flags(leaf, ei) &
10127                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10128                         error(
10129                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10130                                 key.objectid, nodesize,
10131                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10132                         err = BACKREF_MISMATCH;
10133                 }
10134                 header_gen = btrfs_header_generation(eb);
10135                 extent_gen = btrfs_extent_generation(leaf, ei);
10136                 if (header_gen != extent_gen) {
10137                         error(
10138         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10139                                 key.objectid, nodesize, header_gen,
10140                                 extent_gen);
10141                         err = BACKREF_MISMATCH;
10142                 }
10143                 if (level != skinny_level) {
10144                         error(
10145                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10146                                 key.objectid, nodesize, level, skinny_level);
10147                         err = BACKREF_MISMATCH;
10148                 }
10149                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10150                         error(
10151                         "extent[%llu %u] is referred by other roots than %llu",
10152                                 key.objectid, nodesize, root->objectid);
10153                         err = BACKREF_MISMATCH;
10154                 }
10155         }
10156
10157         /*
10158          * Iterate the extent/metadata item to find the exact backref
10159          */
10160         item_size = btrfs_item_size_nr(leaf, slot);
10161         ptr = (unsigned long)iref;
10162         end = (unsigned long)ei + item_size;
10163         while (ptr < end) {
10164                 iref = (struct btrfs_extent_inline_ref *)ptr;
10165                 type = btrfs_extent_inline_ref_type(leaf, iref);
10166                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10167
10168                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10169                         (offset == root->objectid || offset == owner)) {
10170                         found_ref = 1;
10171                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10172                         /*
10173                          * Backref of tree reloc root points to itself, no need
10174                          * to check backref any more.
10175                          */
10176                         if (tree_reloc_root)
10177                                 found_ref = 1;
10178                         else
10179                         /* Check if the backref points to valid referencer */
10180                                 found_ref = !check_tree_block_ref(root, NULL,
10181                                                 offset, level + 1, owner);
10182                 }
10183
10184                 if (found_ref)
10185                         break;
10186                 ptr += btrfs_extent_inline_ref_size(type);
10187         }
10188
10189         /*
10190          * Inlined extent item doesn't have what we need, check
10191          * TREE_BLOCK_REF_KEY
10192          */
10193         if (!found_ref) {
10194                 btrfs_release_path(&path);
10195                 key.objectid = bytenr;
10196                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10197                 key.offset = root->objectid;
10198
10199                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10200                 if (!ret)
10201                         found_ref = 1;
10202         }
10203         if (!found_ref)
10204                 err |= BACKREF_MISSING;
10205 out:
10206         btrfs_release_path(&path);
10207         if (eb && (err & BACKREF_MISSING))
10208                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10209                         bytenr, nodesize, owner, level);
10210         return err;
10211 }
10212
10213 /*
10214  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10215  *
10216  * Return >0 any error found and output error message
10217  * Return 0 for no error found
10218  */
10219 static int check_extent_data_item(struct btrfs_root *root,
10220                                   struct extent_buffer *eb, int slot)
10221 {
10222         struct btrfs_file_extent_item *fi;
10223         struct btrfs_path path;
10224         struct btrfs_root *extent_root = root->fs_info->extent_root;
10225         struct btrfs_key fi_key;
10226         struct btrfs_key dbref_key;
10227         struct extent_buffer *leaf;
10228         struct btrfs_extent_item *ei;
10229         struct btrfs_extent_inline_ref *iref;
10230         struct btrfs_extent_data_ref *dref;
10231         u64 owner;
10232         u64 disk_bytenr;
10233         u64 disk_num_bytes;
10234         u64 extent_num_bytes;
10235         u64 extent_flags;
10236         u32 item_size;
10237         unsigned long end;
10238         unsigned long ptr;
10239         int type;
10240         u64 ref_root;
10241         int found_dbackref = 0;
10242         int err = 0;
10243         int ret;
10244
10245         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10246         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10247
10248         /* Nothing to check for hole and inline data extents */
10249         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10250             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10251                 return 0;
10252
10253         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10254         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10255         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10256
10257         /* Check unaligned disk_num_bytes and num_bytes */
10258         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10259                 error(
10260 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10261                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10262                         root->fs_info->sectorsize);
10263                 err |= BYTES_UNALIGNED;
10264         } else {
10265                 data_bytes_allocated += disk_num_bytes;
10266         }
10267         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10268                 error(
10269 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10270                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10271                         root->fs_info->sectorsize);
10272                 err |= BYTES_UNALIGNED;
10273         } else {
10274                 data_bytes_referenced += extent_num_bytes;
10275         }
10276         owner = btrfs_header_owner(eb);
10277
10278         /* Check the extent item of the file extent in extent tree */
10279         btrfs_init_path(&path);
10280         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10281         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10282         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10283
10284         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10285         if (ret)
10286                 goto out;
10287
10288         leaf = path.nodes[0];
10289         slot = path.slots[0];
10290         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10291
10292         extent_flags = btrfs_extent_flags(leaf, ei);
10293
10294         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10295                 error(
10296                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10297                     disk_bytenr, disk_num_bytes,
10298                     BTRFS_EXTENT_FLAG_DATA);
10299                 err |= BACKREF_MISMATCH;
10300         }
10301
10302         /* Check data backref inside that extent item */
10303         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10304         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10305         ptr = (unsigned long)iref;
10306         end = (unsigned long)ei + item_size;
10307         while (ptr < end) {
10308                 iref = (struct btrfs_extent_inline_ref *)ptr;
10309                 type = btrfs_extent_inline_ref_type(leaf, iref);
10310                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10311
10312                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10313                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10314                         if (ref_root == owner || ref_root == root->objectid)
10315                                 found_dbackref = 1;
10316                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10317                         found_dbackref = !check_tree_block_ref(root, NULL,
10318                                 btrfs_extent_inline_ref_offset(leaf, iref),
10319                                 0, owner);
10320                 }
10321
10322                 if (found_dbackref)
10323                         break;
10324                 ptr += btrfs_extent_inline_ref_size(type);
10325         }
10326
10327         if (!found_dbackref) {
10328                 btrfs_release_path(&path);
10329
10330                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10331                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10332                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10333                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10334                                 fi_key.objectid, fi_key.offset);
10335
10336                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10337                                         &dbref_key, &path, 0, 0);
10338                 if (!ret) {
10339                         found_dbackref = 1;
10340                         goto out;
10341                 }
10342
10343                 btrfs_release_path(&path);
10344
10345                 /*
10346                  * Neither inlined nor EXTENT_DATA_REF found, try
10347                  * SHARED_DATA_REF as last chance.
10348                  */
10349                 dbref_key.objectid = disk_bytenr;
10350                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10351                 dbref_key.offset = eb->start;
10352
10353                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10354                                         &dbref_key, &path, 0, 0);
10355                 if (!ret) {
10356                         found_dbackref = 1;
10357                         goto out;
10358                 }
10359         }
10360
10361 out:
10362         if (!found_dbackref)
10363                 err |= BACKREF_MISSING;
10364         btrfs_release_path(&path);
10365         if (err & BACKREF_MISSING) {
10366                 error("data extent[%llu %llu] backref lost",
10367                       disk_bytenr, disk_num_bytes);
10368         }
10369         return err;
10370 }
10371
10372 /*
10373  * Get real tree block level for the case like shared block
10374  * Return >= 0 as tree level
10375  * Return <0 for error
10376  */
10377 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10378 {
10379         struct extent_buffer *eb;
10380         struct btrfs_path path;
10381         struct btrfs_key key;
10382         struct btrfs_extent_item *ei;
10383         u64 flags;
10384         u64 transid;
10385         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10386         u8 backref_level;
10387         u8 header_level;
10388         int ret;
10389
10390         /* Search extent tree for extent generation and level */
10391         key.objectid = bytenr;
10392         key.type = BTRFS_METADATA_ITEM_KEY;
10393         key.offset = (u64)-1;
10394
10395         btrfs_init_path(&path);
10396         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10397         if (ret < 0)
10398                 goto release_out;
10399         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10400         if (ret < 0)
10401                 goto release_out;
10402         if (ret > 0) {
10403                 ret = -ENOENT;
10404                 goto release_out;
10405         }
10406
10407         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10408         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10409                             struct btrfs_extent_item);
10410         flags = btrfs_extent_flags(path.nodes[0], ei);
10411         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10412                 ret = -ENOENT;
10413                 goto release_out;
10414         }
10415
10416         /* Get transid for later read_tree_block() check */
10417         transid = btrfs_extent_generation(path.nodes[0], ei);
10418
10419         /* Get backref level as one source */
10420         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10421                 backref_level = key.offset;
10422         } else {
10423                 struct btrfs_tree_block_info *info;
10424
10425                 info = (struct btrfs_tree_block_info *)(ei + 1);
10426                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10427         }
10428         btrfs_release_path(&path);
10429
10430         /* Get level from tree block as an alternative source */
10431         eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10432         if (!extent_buffer_uptodate(eb)) {
10433                 free_extent_buffer(eb);
10434                 return -EIO;
10435         }
10436         header_level = btrfs_header_level(eb);
10437         free_extent_buffer(eb);
10438
10439         if (header_level != backref_level)
10440                 return -EIO;
10441         return header_level;
10442
10443 release_out:
10444         btrfs_release_path(&path);
10445         return ret;
10446 }
10447
10448 /*
10449  * Check if a tree block backref is valid (points to a valid tree block)
10450  * if level == -1, level will be resolved
10451  * Return >0 for any error found and print error message
10452  */
10453 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10454                                     u64 bytenr, int level)
10455 {
10456         struct btrfs_root *root;
10457         struct btrfs_key key;
10458         struct btrfs_path path;
10459         struct extent_buffer *eb;
10460         struct extent_buffer *node;
10461         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10462         int err = 0;
10463         int ret;
10464
10465         /* Query level for level == -1 special case */
10466         if (level == -1)
10467                 level = query_tree_block_level(fs_info, bytenr);
10468         if (level < 0) {
10469                 err |= REFERENCER_MISSING;
10470                 goto out;
10471         }
10472
10473         key.objectid = root_id;
10474         key.type = BTRFS_ROOT_ITEM_KEY;
10475         key.offset = (u64)-1;
10476
10477         root = btrfs_read_fs_root(fs_info, &key);
10478         if (IS_ERR(root)) {
10479                 err |= REFERENCER_MISSING;
10480                 goto out;
10481         }
10482
10483         /* Read out the tree block to get item/node key */
10484         eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10485         if (!extent_buffer_uptodate(eb)) {
10486                 err |= REFERENCER_MISSING;
10487                 free_extent_buffer(eb);
10488                 goto out;
10489         }
10490
10491         /* Empty tree, no need to check key */
10492         if (!btrfs_header_nritems(eb) && !level) {
10493                 free_extent_buffer(eb);
10494                 goto out;
10495         }
10496
10497         if (level)
10498                 btrfs_node_key_to_cpu(eb, &key, 0);
10499         else
10500                 btrfs_item_key_to_cpu(eb, &key, 0);
10501
10502         free_extent_buffer(eb);
10503
10504         btrfs_init_path(&path);
10505         path.lowest_level = level;
10506         /* Search with the first key, to ensure we can reach it */
10507         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10508         if (ret < 0) {
10509                 err |= REFERENCER_MISSING;
10510                 goto release_out;
10511         }
10512
10513         node = path.nodes[level];
10514         if (btrfs_header_bytenr(node) != bytenr) {
10515                 error(
10516         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10517                         bytenr, nodesize, bytenr,
10518                         btrfs_header_bytenr(node));
10519                 err |= REFERENCER_MISMATCH;
10520         }
10521         if (btrfs_header_level(node) != level) {
10522                 error(
10523         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10524                         bytenr, nodesize, level,
10525                         btrfs_header_level(node));
10526                 err |= REFERENCER_MISMATCH;
10527         }
10528
10529 release_out:
10530         btrfs_release_path(&path);
10531 out:
10532         if (err & REFERENCER_MISSING) {
10533                 if (level < 0)
10534                         error("extent [%llu %d] lost referencer (owner: %llu)",
10535                                 bytenr, nodesize, root_id);
10536                 else
10537                         error(
10538                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10539                                 bytenr, nodesize, root_id, level);
10540         }
10541
10542         return err;
10543 }
10544
10545 /*
10546  * Check if tree block @eb is tree reloc root.
10547  * Return 0 if it's not or any problem happens
10548  * Return 1 if it's a tree reloc root
10549  */
10550 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10551                                  struct extent_buffer *eb)
10552 {
10553         struct btrfs_root *tree_reloc_root;
10554         struct btrfs_key key;
10555         u64 bytenr = btrfs_header_bytenr(eb);
10556         u64 owner = btrfs_header_owner(eb);
10557         int ret = 0;
10558
10559         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10560         key.offset = owner;
10561         key.type = BTRFS_ROOT_ITEM_KEY;
10562
10563         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10564         if (IS_ERR(tree_reloc_root))
10565                 return 0;
10566
10567         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10568                 ret = 1;
10569         btrfs_free_fs_root(tree_reloc_root);
10570         return ret;
10571 }
10572
10573 /*
10574  * Check referencer for shared block backref
10575  * If level == -1, this function will resolve the level.
10576  */
10577 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10578                                      u64 parent, u64 bytenr, int level)
10579 {
10580         struct extent_buffer *eb;
10581         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10582         u32 nr;
10583         int found_parent = 0;
10584         int i;
10585
10586         eb = read_tree_block(fs_info, parent, nodesize, 0);
10587         if (!extent_buffer_uptodate(eb))
10588                 goto out;
10589
10590         if (level == -1)
10591                 level = query_tree_block_level(fs_info, bytenr);
10592         if (level < 0)
10593                 goto out;
10594
10595         /* It's possible it's a tree reloc root */
10596         if (parent == bytenr) {
10597                 if (is_tree_reloc_root(fs_info, eb))
10598                         found_parent = 1;
10599                 goto out;
10600         }
10601
10602         if (level + 1 != btrfs_header_level(eb))
10603                 goto out;
10604
10605         nr = btrfs_header_nritems(eb);
10606         for (i = 0; i < nr; i++) {
10607                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10608                         found_parent = 1;
10609                         break;
10610                 }
10611         }
10612 out:
10613         free_extent_buffer(eb);
10614         if (!found_parent) {
10615                 error(
10616         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10617                         bytenr, nodesize, parent, level);
10618                 return REFERENCER_MISSING;
10619         }
10620         return 0;
10621 }
10622
10623 /*
10624  * Check referencer for normal (inlined) data ref
10625  * If len == 0, it will be resolved by searching in extent tree
10626  */
10627 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10628                                      u64 root_id, u64 objectid, u64 offset,
10629                                      u64 bytenr, u64 len, u32 count)
10630 {
10631         struct btrfs_root *root;
10632         struct btrfs_root *extent_root = fs_info->extent_root;
10633         struct btrfs_key key;
10634         struct btrfs_path path;
10635         struct extent_buffer *leaf;
10636         struct btrfs_file_extent_item *fi;
10637         u32 found_count = 0;
10638         int slot;
10639         int ret = 0;
10640
10641         if (!len) {
10642                 key.objectid = bytenr;
10643                 key.type = BTRFS_EXTENT_ITEM_KEY;
10644                 key.offset = (u64)-1;
10645
10646                 btrfs_init_path(&path);
10647                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10648                 if (ret < 0)
10649                         goto out;
10650                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10651                 if (ret)
10652                         goto out;
10653                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10654                 if (key.objectid != bytenr ||
10655                     key.type != BTRFS_EXTENT_ITEM_KEY)
10656                         goto out;
10657                 len = key.offset;
10658                 btrfs_release_path(&path);
10659         }
10660         key.objectid = root_id;
10661         key.type = BTRFS_ROOT_ITEM_KEY;
10662         key.offset = (u64)-1;
10663         btrfs_init_path(&path);
10664
10665         root = btrfs_read_fs_root(fs_info, &key);
10666         if (IS_ERR(root))
10667                 goto out;
10668
10669         key.objectid = objectid;
10670         key.type = BTRFS_EXTENT_DATA_KEY;
10671         /*
10672          * It can be nasty as data backref offset is
10673          * file offset - file extent offset, which is smaller or
10674          * equal to original backref offset.  The only special case is
10675          * overflow.  So we need to special check and do further search.
10676          */
10677         key.offset = offset & (1ULL << 63) ? 0 : offset;
10678
10679         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10680         if (ret < 0)
10681                 goto out;
10682
10683         /*
10684          * Search afterwards to get correct one
10685          * NOTE: As we must do a comprehensive check on the data backref to
10686          * make sure the dref count also matches, we must iterate all file
10687          * extents for that inode.
10688          */
10689         while (1) {
10690                 leaf = path.nodes[0];
10691                 slot = path.slots[0];
10692
10693                 if (slot >= btrfs_header_nritems(leaf))
10694                         goto next;
10695                 btrfs_item_key_to_cpu(leaf, &key, slot);
10696                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10697                         break;
10698                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10699                 /*
10700                  * Except normal disk bytenr and disk num bytes, we still
10701                  * need to do extra check on dbackref offset as
10702                  * dbackref offset = file_offset - file_extent_offset
10703                  */
10704                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10705                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10706                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10707                     offset)
10708                         found_count++;
10709
10710 next:
10711                 ret = btrfs_next_item(root, &path);
10712                 if (ret)
10713                         break;
10714         }
10715 out:
10716         btrfs_release_path(&path);
10717         if (found_count != count) {
10718                 error(
10719 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10720                         bytenr, len, root_id, objectid, offset, count, found_count);
10721                 return REFERENCER_MISSING;
10722         }
10723         return 0;
10724 }
10725
10726 /*
10727  * Check if the referencer of a shared data backref exists
10728  */
10729 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10730                                      u64 parent, u64 bytenr)
10731 {
10732         struct extent_buffer *eb;
10733         struct btrfs_key key;
10734         struct btrfs_file_extent_item *fi;
10735         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10736         u32 nr;
10737         int found_parent = 0;
10738         int i;
10739
10740         eb = read_tree_block(fs_info, parent, nodesize, 0);
10741         if (!extent_buffer_uptodate(eb))
10742                 goto out;
10743
10744         nr = btrfs_header_nritems(eb);
10745         for (i = 0; i < nr; i++) {
10746                 btrfs_item_key_to_cpu(eb, &key, i);
10747                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10748                         continue;
10749
10750                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10751                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10752                         continue;
10753
10754                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10755                         found_parent = 1;
10756                         break;
10757                 }
10758         }
10759
10760 out:
10761         free_extent_buffer(eb);
10762         if (!found_parent) {
10763                 error("shared extent %llu referencer lost (parent: %llu)",
10764                         bytenr, parent);
10765                 return REFERENCER_MISSING;
10766         }
10767         return 0;
10768 }
10769
10770 /*
10771  * This function will check a given extent item, including its backref and
10772  * itself (like crossing stripe boundary and type)
10773  *
10774  * Since we don't use extent_record anymore, introduce new error bit
10775  */
10776 static int check_extent_item(struct btrfs_fs_info *fs_info,
10777                              struct extent_buffer *eb, int slot)
10778 {
10779         struct btrfs_extent_item *ei;
10780         struct btrfs_extent_inline_ref *iref;
10781         struct btrfs_extent_data_ref *dref;
10782         unsigned long end;
10783         unsigned long ptr;
10784         int type;
10785         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10786         u32 item_size = btrfs_item_size_nr(eb, slot);
10787         u64 flags;
10788         u64 offset;
10789         int metadata = 0;
10790         int level;
10791         struct btrfs_key key;
10792         int ret;
10793         int err = 0;
10794
10795         btrfs_item_key_to_cpu(eb, &key, slot);
10796         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10797                 bytes_used += key.offset;
10798         else
10799                 bytes_used += nodesize;
10800
10801         if (item_size < sizeof(*ei)) {
10802                 /*
10803                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10804                  * old thing when on disk format is still un-determined.
10805                  * No need to care about it anymore
10806                  */
10807                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10808                 return -ENOTTY;
10809         }
10810
10811         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10812         flags = btrfs_extent_flags(eb, ei);
10813
10814         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10815                 metadata = 1;
10816         if (metadata && check_crossing_stripes(global_info, key.objectid,
10817                                                eb->len)) {
10818                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10819                       key.objectid, key.objectid + nodesize);
10820                 err |= CROSSING_STRIPE_BOUNDARY;
10821         }
10822
10823         ptr = (unsigned long)(ei + 1);
10824
10825         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10826                 /* Old EXTENT_ITEM metadata */
10827                 struct btrfs_tree_block_info *info;
10828
10829                 info = (struct btrfs_tree_block_info *)ptr;
10830                 level = btrfs_tree_block_level(eb, info);
10831                 ptr += sizeof(struct btrfs_tree_block_info);
10832         } else {
10833                 /* New METADATA_ITEM */
10834                 level = key.offset;
10835         }
10836         end = (unsigned long)ei + item_size;
10837
10838 next:
10839         /* Reached extent item end normally */
10840         if (ptr == end)
10841                 goto out;
10842
10843         /* Beyond extent item end, wrong item size */
10844         if (ptr > end) {
10845                 err |= ITEM_SIZE_MISMATCH;
10846                 error("extent item at bytenr %llu slot %d has wrong size",
10847                         eb->start, slot);
10848                 goto out;
10849         }
10850
10851         /* Now check every backref in this extent item */
10852         iref = (struct btrfs_extent_inline_ref *)ptr;
10853         type = btrfs_extent_inline_ref_type(eb, iref);
10854         offset = btrfs_extent_inline_ref_offset(eb, iref);
10855         switch (type) {
10856         case BTRFS_TREE_BLOCK_REF_KEY:
10857                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10858                                                level);
10859                 err |= ret;
10860                 break;
10861         case BTRFS_SHARED_BLOCK_REF_KEY:
10862                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10863                                                  level);
10864                 err |= ret;
10865                 break;
10866         case BTRFS_EXTENT_DATA_REF_KEY:
10867                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10868                 ret = check_extent_data_backref(fs_info,
10869                                 btrfs_extent_data_ref_root(eb, dref),
10870                                 btrfs_extent_data_ref_objectid(eb, dref),
10871                                 btrfs_extent_data_ref_offset(eb, dref),
10872                                 key.objectid, key.offset,
10873                                 btrfs_extent_data_ref_count(eb, dref));
10874                 err |= ret;
10875                 break;
10876         case BTRFS_SHARED_DATA_REF_KEY:
10877                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10878                 err |= ret;
10879                 break;
10880         default:
10881                 error("extent[%llu %d %llu] has unknown ref type: %d",
10882                         key.objectid, key.type, key.offset, type);
10883                 err |= UNKNOWN_TYPE;
10884                 goto out;
10885         }
10886
10887         ptr += btrfs_extent_inline_ref_size(type);
10888         goto next;
10889
10890 out:
10891         return err;
10892 }
10893
10894 /*
10895  * Check if a dev extent item is referred correctly by its chunk
10896  */
10897 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10898                                  struct extent_buffer *eb, int slot)
10899 {
10900         struct btrfs_root *chunk_root = fs_info->chunk_root;
10901         struct btrfs_dev_extent *ptr;
10902         struct btrfs_path path;
10903         struct btrfs_key chunk_key;
10904         struct btrfs_key devext_key;
10905         struct btrfs_chunk *chunk;
10906         struct extent_buffer *l;
10907         int num_stripes;
10908         u64 length;
10909         int i;
10910         int found_chunk = 0;
10911         int ret;
10912
10913         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10914         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10915         length = btrfs_dev_extent_length(eb, ptr);
10916
10917         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10918         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10919         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10920
10921         btrfs_init_path(&path);
10922         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10923         if (ret)
10924                 goto out;
10925
10926         l = path.nodes[0];
10927         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10928         ret = btrfs_check_chunk_valid(chunk_root, l, chunk, path.slots[0],
10929                                       chunk_key.offset);
10930         if (ret < 0)
10931                 goto out;
10932
10933         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10934                 goto out;
10935
10936         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10937         for (i = 0; i < num_stripes; i++) {
10938                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10939                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10940
10941                 if (devid == devext_key.objectid &&
10942                     offset == devext_key.offset) {
10943                         found_chunk = 1;
10944                         break;
10945                 }
10946         }
10947 out:
10948         btrfs_release_path(&path);
10949         if (!found_chunk) {
10950                 error(
10951                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10952                         devext_key.objectid, devext_key.offset, length);
10953                 return REFERENCER_MISSING;
10954         }
10955         return 0;
10956 }
10957
10958 /*
10959  * Check if the used space is correct with the dev item
10960  */
10961 static int check_dev_item(struct btrfs_fs_info *fs_info,
10962                           struct extent_buffer *eb, int slot)
10963 {
10964         struct btrfs_root *dev_root = fs_info->dev_root;
10965         struct btrfs_dev_item *dev_item;
10966         struct btrfs_path path;
10967         struct btrfs_key key;
10968         struct btrfs_dev_extent *ptr;
10969         u64 dev_id;
10970         u64 used;
10971         u64 total = 0;
10972         int ret;
10973
10974         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10975         dev_id = btrfs_device_id(eb, dev_item);
10976         used = btrfs_device_bytes_used(eb, dev_item);
10977
10978         key.objectid = dev_id;
10979         key.type = BTRFS_DEV_EXTENT_KEY;
10980         key.offset = 0;
10981
10982         btrfs_init_path(&path);
10983         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10984         if (ret < 0) {
10985                 btrfs_item_key_to_cpu(eb, &key, slot);
10986                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10987                         key.objectid, key.type, key.offset);
10988                 btrfs_release_path(&path);
10989                 return REFERENCER_MISSING;
10990         }
10991
10992         /* Iterate dev_extents to calculate the used space of a device */
10993         while (1) {
10994                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10995                         goto next;
10996
10997                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10998                 if (key.objectid > dev_id)
10999                         break;
11000                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11001                         goto next;
11002
11003                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11004                                      struct btrfs_dev_extent);
11005                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11006 next:
11007                 ret = btrfs_next_item(dev_root, &path);
11008                 if (ret)
11009                         break;
11010         }
11011         btrfs_release_path(&path);
11012
11013         if (used != total) {
11014                 btrfs_item_key_to_cpu(eb, &key, slot);
11015                 error(
11016 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11017                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11018                         BTRFS_DEV_EXTENT_KEY, dev_id);
11019                 return ACCOUNTING_MISMATCH;
11020         }
11021         return 0;
11022 }
11023
11024 /*
11025  * Check a block group item with its referener (chunk) and its used space
11026  * with extent/metadata item
11027  */
11028 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11029                                   struct extent_buffer *eb, int slot)
11030 {
11031         struct btrfs_root *extent_root = fs_info->extent_root;
11032         struct btrfs_root *chunk_root = fs_info->chunk_root;
11033         struct btrfs_block_group_item *bi;
11034         struct btrfs_block_group_item bg_item;
11035         struct btrfs_path path;
11036         struct btrfs_key bg_key;
11037         struct btrfs_key chunk_key;
11038         struct btrfs_key extent_key;
11039         struct btrfs_chunk *chunk;
11040         struct extent_buffer *leaf;
11041         struct btrfs_extent_item *ei;
11042         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11043         u64 flags;
11044         u64 bg_flags;
11045         u64 used;
11046         u64 total = 0;
11047         int ret;
11048         int err = 0;
11049
11050         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11051         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11052         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11053         used = btrfs_block_group_used(&bg_item);
11054         bg_flags = btrfs_block_group_flags(&bg_item);
11055
11056         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11057         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11058         chunk_key.offset = bg_key.objectid;
11059
11060         btrfs_init_path(&path);
11061         /* Search for the referencer chunk */
11062         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11063         if (ret) {
11064                 error(
11065                 "block group[%llu %llu] did not find the related chunk item",
11066                         bg_key.objectid, bg_key.offset);
11067                 err |= REFERENCER_MISSING;
11068         } else {
11069                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11070                                         struct btrfs_chunk);
11071                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11072                                                 bg_key.offset) {
11073                         error(
11074         "block group[%llu %llu] related chunk item length does not match",
11075                                 bg_key.objectid, bg_key.offset);
11076                         err |= REFERENCER_MISMATCH;
11077                 }
11078         }
11079         btrfs_release_path(&path);
11080
11081         /* Search from the block group bytenr */
11082         extent_key.objectid = bg_key.objectid;
11083         extent_key.type = 0;
11084         extent_key.offset = 0;
11085
11086         btrfs_init_path(&path);
11087         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11088         if (ret < 0)
11089                 goto out;
11090
11091         /* Iterate extent tree to account used space */
11092         while (1) {
11093                 leaf = path.nodes[0];
11094
11095                 /* Search slot can point to the last item beyond leaf nritems */
11096                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11097                         goto next;
11098
11099                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11100                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11101                         break;
11102
11103                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11104                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11105                         goto next;
11106                 if (extent_key.objectid < bg_key.objectid)
11107                         goto next;
11108
11109                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11110                         total += nodesize;
11111                 else
11112                         total += extent_key.offset;
11113
11114                 ei = btrfs_item_ptr(leaf, path.slots[0],
11115                                     struct btrfs_extent_item);
11116                 flags = btrfs_extent_flags(leaf, ei);
11117                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11118                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11119                                 error(
11120                         "bad extent[%llu, %llu) type mismatch with chunk",
11121                                         extent_key.objectid,
11122                                         extent_key.objectid + extent_key.offset);
11123                                 err |= CHUNK_TYPE_MISMATCH;
11124                         }
11125                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11126                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11127                                     BTRFS_BLOCK_GROUP_METADATA))) {
11128                                 error(
11129                         "bad extent[%llu, %llu) type mismatch with chunk",
11130                                         extent_key.objectid,
11131                                         extent_key.objectid + nodesize);
11132                                 err |= CHUNK_TYPE_MISMATCH;
11133                         }
11134                 }
11135 next:
11136                 ret = btrfs_next_item(extent_root, &path);
11137                 if (ret)
11138                         break;
11139         }
11140
11141 out:
11142         btrfs_release_path(&path);
11143
11144         if (total != used) {
11145                 error(
11146                 "block group[%llu %llu] used %llu but extent items used %llu",
11147                         bg_key.objectid, bg_key.offset, used, total);
11148                 err |= ACCOUNTING_MISMATCH;
11149         }
11150         return err;
11151 }
11152
11153 /*
11154  * Check a chunk item.
11155  * Including checking all referred dev_extents and block group
11156  */
11157 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11158                             struct extent_buffer *eb, int slot)
11159 {
11160         struct btrfs_root *extent_root = fs_info->extent_root;
11161         struct btrfs_root *dev_root = fs_info->dev_root;
11162         struct btrfs_path path;
11163         struct btrfs_key chunk_key;
11164         struct btrfs_key bg_key;
11165         struct btrfs_key devext_key;
11166         struct btrfs_chunk *chunk;
11167         struct extent_buffer *leaf;
11168         struct btrfs_block_group_item *bi;
11169         struct btrfs_block_group_item bg_item;
11170         struct btrfs_dev_extent *ptr;
11171         u64 length;
11172         u64 chunk_end;
11173         u64 stripe_len;
11174         u64 type;
11175         int num_stripes;
11176         u64 offset;
11177         u64 objectid;
11178         int i;
11179         int ret;
11180         int err = 0;
11181
11182         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11183         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11184         length = btrfs_chunk_length(eb, chunk);
11185         chunk_end = chunk_key.offset + length;
11186         ret = btrfs_check_chunk_valid(extent_root, eb, chunk, slot,
11187                                       chunk_key.offset);
11188         if (ret < 0) {
11189                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11190                         chunk_end);
11191                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11192                 goto out;
11193         }
11194         type = btrfs_chunk_type(eb, chunk);
11195
11196         bg_key.objectid = chunk_key.offset;
11197         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11198         bg_key.offset = length;
11199
11200         btrfs_init_path(&path);
11201         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11202         if (ret) {
11203                 error(
11204                 "chunk[%llu %llu) did not find the related block group item",
11205                         chunk_key.offset, chunk_end);
11206                 err |= REFERENCER_MISSING;
11207         } else{
11208                 leaf = path.nodes[0];
11209                 bi = btrfs_item_ptr(leaf, path.slots[0],
11210                                     struct btrfs_block_group_item);
11211                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11212                                    sizeof(bg_item));
11213                 if (btrfs_block_group_flags(&bg_item) != type) {
11214                         error(
11215 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11216                                 chunk_key.offset, chunk_end, type,
11217                                 btrfs_block_group_flags(&bg_item));
11218                         err |= REFERENCER_MISSING;
11219                 }
11220         }
11221
11222         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11223         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11224         for (i = 0; i < num_stripes; i++) {
11225                 btrfs_release_path(&path);
11226                 btrfs_init_path(&path);
11227                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11228                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11229                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11230
11231                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11232                                         0, 0);
11233                 if (ret)
11234                         goto not_match_dev;
11235
11236                 leaf = path.nodes[0];
11237                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11238                                      struct btrfs_dev_extent);
11239                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11240                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11241                 if (objectid != chunk_key.objectid ||
11242                     offset != chunk_key.offset ||
11243                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11244                         goto not_match_dev;
11245                 continue;
11246 not_match_dev:
11247                 err |= BACKREF_MISSING;
11248                 error(
11249                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11250                         chunk_key.objectid, chunk_end, i);
11251                 continue;
11252         }
11253         btrfs_release_path(&path);
11254 out:
11255         return err;
11256 }
11257
11258 /*
11259  * Main entry function to check known items and update related accounting info
11260  */
11261 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11262 {
11263         struct btrfs_fs_info *fs_info = root->fs_info;
11264         struct btrfs_key key;
11265         int slot = 0;
11266         int type;
11267         struct btrfs_extent_data_ref *dref;
11268         int ret;
11269         int err = 0;
11270
11271 next:
11272         btrfs_item_key_to_cpu(eb, &key, slot);
11273         type = key.type;
11274
11275         switch (type) {
11276         case BTRFS_EXTENT_DATA_KEY:
11277                 ret = check_extent_data_item(root, eb, slot);
11278                 err |= ret;
11279                 break;
11280         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11281                 ret = check_block_group_item(fs_info, eb, slot);
11282                 err |= ret;
11283                 break;
11284         case BTRFS_DEV_ITEM_KEY:
11285                 ret = check_dev_item(fs_info, eb, slot);
11286                 err |= ret;
11287                 break;
11288         case BTRFS_CHUNK_ITEM_KEY:
11289                 ret = check_chunk_item(fs_info, eb, slot);
11290                 err |= ret;
11291                 break;
11292         case BTRFS_DEV_EXTENT_KEY:
11293                 ret = check_dev_extent_item(fs_info, eb, slot);
11294                 err |= ret;
11295                 break;
11296         case BTRFS_EXTENT_ITEM_KEY:
11297         case BTRFS_METADATA_ITEM_KEY:
11298                 ret = check_extent_item(fs_info, eb, slot);
11299                 err |= ret;
11300                 break;
11301         case BTRFS_EXTENT_CSUM_KEY:
11302                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11303                 break;
11304         case BTRFS_TREE_BLOCK_REF_KEY:
11305                 ret = check_tree_block_backref(fs_info, key.offset,
11306                                                key.objectid, -1);
11307                 err |= ret;
11308                 break;
11309         case BTRFS_EXTENT_DATA_REF_KEY:
11310                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11311                 ret = check_extent_data_backref(fs_info,
11312                                 btrfs_extent_data_ref_root(eb, dref),
11313                                 btrfs_extent_data_ref_objectid(eb, dref),
11314                                 btrfs_extent_data_ref_offset(eb, dref),
11315                                 key.objectid, 0,
11316                                 btrfs_extent_data_ref_count(eb, dref));
11317                 err |= ret;
11318                 break;
11319         case BTRFS_SHARED_BLOCK_REF_KEY:
11320                 ret = check_shared_block_backref(fs_info, key.offset,
11321                                                  key.objectid, -1);
11322                 err |= ret;
11323                 break;
11324         case BTRFS_SHARED_DATA_REF_KEY:
11325                 ret = check_shared_data_backref(fs_info, key.offset,
11326                                                 key.objectid);
11327                 err |= ret;
11328                 break;
11329         default:
11330                 break;
11331         }
11332
11333         if (++slot < btrfs_header_nritems(eb))
11334                 goto next;
11335
11336         return err;
11337 }
11338
11339 /*
11340  * Helper function for later fs/subvol tree check.  To determine if a tree
11341  * block should be checked.
11342  * This function will ensure only the direct referencer with lowest rootid to
11343  * check a fs/subvolume tree block.
11344  *
11345  * Backref check at extent tree would detect errors like missing subvolume
11346  * tree, so we can do aggressive check to reduce duplicated checks.
11347  */
11348 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11349 {
11350         struct btrfs_root *extent_root = root->fs_info->extent_root;
11351         struct btrfs_key key;
11352         struct btrfs_path path;
11353         struct extent_buffer *leaf;
11354         int slot;
11355         struct btrfs_extent_item *ei;
11356         unsigned long ptr;
11357         unsigned long end;
11358         int type;
11359         u32 item_size;
11360         u64 offset;
11361         struct btrfs_extent_inline_ref *iref;
11362         int ret;
11363
11364         btrfs_init_path(&path);
11365         key.objectid = btrfs_header_bytenr(eb);
11366         key.type = BTRFS_METADATA_ITEM_KEY;
11367         key.offset = (u64)-1;
11368
11369         /*
11370          * Any failure in backref resolving means we can't determine
11371          * whom the tree block belongs to.
11372          * So in that case, we need to check that tree block
11373          */
11374         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11375         if (ret < 0)
11376                 goto need_check;
11377
11378         ret = btrfs_previous_extent_item(extent_root, &path,
11379                                          btrfs_header_bytenr(eb));
11380         if (ret)
11381                 goto need_check;
11382
11383         leaf = path.nodes[0];
11384         slot = path.slots[0];
11385         btrfs_item_key_to_cpu(leaf, &key, slot);
11386         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11387
11388         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11389                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11390         } else {
11391                 struct btrfs_tree_block_info *info;
11392
11393                 info = (struct btrfs_tree_block_info *)(ei + 1);
11394                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11395         }
11396
11397         item_size = btrfs_item_size_nr(leaf, slot);
11398         ptr = (unsigned long)iref;
11399         end = (unsigned long)ei + item_size;
11400         while (ptr < end) {
11401                 iref = (struct btrfs_extent_inline_ref *)ptr;
11402                 type = btrfs_extent_inline_ref_type(leaf, iref);
11403                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11404
11405                 /*
11406                  * We only check the tree block if current root is
11407                  * the lowest referencer of it.
11408                  */
11409                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11410                     offset < root->objectid) {
11411                         btrfs_release_path(&path);
11412                         return 0;
11413                 }
11414
11415                 ptr += btrfs_extent_inline_ref_size(type);
11416         }
11417         /*
11418          * Normally we should also check keyed tree block ref, but that may be
11419          * very time consuming.  Inlined ref should already make us skip a lot
11420          * of refs now.  So skip search keyed tree block ref.
11421          */
11422
11423 need_check:
11424         btrfs_release_path(&path);
11425         return 1;
11426 }
11427
11428 /*
11429  * Traversal function for tree block. We will do:
11430  * 1) Skip shared fs/subvolume tree blocks
11431  * 2) Update related bytes accounting
11432  * 3) Pre-order traversal
11433  */
11434 static int traverse_tree_block(struct btrfs_root *root,
11435                                 struct extent_buffer *node)
11436 {
11437         struct extent_buffer *eb;
11438         struct btrfs_key key;
11439         struct btrfs_key drop_key;
11440         int level;
11441         u64 nr;
11442         int i;
11443         int err = 0;
11444         int ret;
11445
11446         /*
11447          * Skip shared fs/subvolume tree block, in that case they will
11448          * be checked by referencer with lowest rootid
11449          */
11450         if (is_fstree(root->objectid) && !should_check(root, node))
11451                 return 0;
11452
11453         /* Update bytes accounting */
11454         total_btree_bytes += node->len;
11455         if (fs_root_objectid(btrfs_header_owner(node)))
11456                 total_fs_tree_bytes += node->len;
11457         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11458                 total_extent_tree_bytes += node->len;
11459         if (!found_old_backref &&
11460             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11461             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11462             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11463                 found_old_backref = 1;
11464
11465         /* pre-order tranversal, check itself first */
11466         level = btrfs_header_level(node);
11467         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11468                                    btrfs_header_level(node),
11469                                    btrfs_header_owner(node));
11470         err |= ret;
11471         if (err)
11472                 error(
11473         "check %s failed root %llu bytenr %llu level %d, force continue check",
11474                         level ? "node":"leaf", root->objectid,
11475                         btrfs_header_bytenr(node), btrfs_header_level(node));
11476
11477         if (!level) {
11478                 btree_space_waste += btrfs_leaf_free_space(root, node);
11479                 ret = check_leaf_items(root, node);
11480                 err |= ret;
11481                 return err;
11482         }
11483
11484         nr = btrfs_header_nritems(node);
11485         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11486         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11487                 sizeof(struct btrfs_key_ptr);
11488
11489         /* Then check all its children */
11490         for (i = 0; i < nr; i++) {
11491                 u64 blocknr = btrfs_node_blockptr(node, i);
11492
11493                 btrfs_node_key_to_cpu(node, &key, i);
11494                 if (level == root->root_item.drop_level &&
11495                     is_dropped_key(&key, &drop_key))
11496                         continue;
11497
11498                 /*
11499                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11500                  * to call the function itself.
11501                  */
11502                 eb = read_tree_block(root->fs_info, blocknr,
11503                                 root->fs_info->nodesize, 0);
11504                 if (extent_buffer_uptodate(eb)) {
11505                         ret = traverse_tree_block(root, eb);
11506                         err |= ret;
11507                 }
11508                 free_extent_buffer(eb);
11509         }
11510
11511         return err;
11512 }
11513
11514 /*
11515  * Low memory usage version check_chunks_and_extents.
11516  */
11517 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11518 {
11519         struct btrfs_path path;
11520         struct btrfs_key key;
11521         struct btrfs_root *root1;
11522         struct btrfs_root *cur_root;
11523         int err = 0;
11524         int ret;
11525
11526         root1 = root->fs_info->chunk_root;
11527         ret = traverse_tree_block(root1, root1->node);
11528         err |= ret;
11529
11530         root1 = root->fs_info->tree_root;
11531         ret = traverse_tree_block(root1, root1->node);
11532         err |= ret;
11533
11534         btrfs_init_path(&path);
11535         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11536         key.offset = 0;
11537         key.type = BTRFS_ROOT_ITEM_KEY;
11538
11539         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11540         if (ret) {
11541                 error("cannot find extent treet in tree_root");
11542                 goto out;
11543         }
11544
11545         while (1) {
11546                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11547                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11548                         goto next;
11549                 key.offset = (u64)-1;
11550
11551                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11552                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11553                                         &key);
11554                 else
11555                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11556                 if (IS_ERR(cur_root) || !cur_root) {
11557                         error("failed to read tree: %lld", key.objectid);
11558                         goto next;
11559                 }
11560
11561                 ret = traverse_tree_block(cur_root, cur_root->node);
11562                 err |= ret;
11563
11564                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11565                         btrfs_free_fs_root(cur_root);
11566 next:
11567                 ret = btrfs_next_item(root1, &path);
11568                 if (ret)
11569                         goto out;
11570         }
11571
11572 out:
11573         btrfs_release_path(&path);
11574         return err;
11575 }
11576
11577 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11578                            struct btrfs_root *root, int overwrite)
11579 {
11580         struct extent_buffer *c;
11581         struct extent_buffer *old = root->node;
11582         int level;
11583         int ret;
11584         struct btrfs_disk_key disk_key = {0,0,0};
11585
11586         level = 0;
11587
11588         if (overwrite) {
11589                 c = old;
11590                 extent_buffer_get(c);
11591                 goto init;
11592         }
11593         c = btrfs_alloc_free_block(trans, root,
11594                                    root->fs_info->nodesize,
11595                                    root->root_key.objectid,
11596                                    &disk_key, level, 0, 0);
11597         if (IS_ERR(c)) {
11598                 c = old;
11599                 extent_buffer_get(c);
11600                 overwrite = 1;
11601         }
11602 init:
11603         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11604         btrfs_set_header_level(c, level);
11605         btrfs_set_header_bytenr(c, c->start);
11606         btrfs_set_header_generation(c, trans->transid);
11607         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11608         btrfs_set_header_owner(c, root->root_key.objectid);
11609
11610         write_extent_buffer(c, root->fs_info->fsid,
11611                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11612
11613         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11614                             btrfs_header_chunk_tree_uuid(c),
11615                             BTRFS_UUID_SIZE);
11616
11617         btrfs_mark_buffer_dirty(c);
11618         /*
11619          * this case can happen in the following case:
11620          *
11621          * 1.overwrite previous root.
11622          *
11623          * 2.reinit reloc data root, this is because we skip pin
11624          * down reloc data tree before which means we can allocate
11625          * same block bytenr here.
11626          */
11627         if (old->start == c->start) {
11628                 btrfs_set_root_generation(&root->root_item,
11629                                           trans->transid);
11630                 root->root_item.level = btrfs_header_level(root->node);
11631                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11632                                         &root->root_key, &root->root_item);
11633                 if (ret) {
11634                         free_extent_buffer(c);
11635                         return ret;
11636                 }
11637         }
11638         free_extent_buffer(old);
11639         root->node = c;
11640         add_root_to_dirty_list(root);
11641         return 0;
11642 }
11643
11644 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11645                                 struct extent_buffer *eb, int tree_root)
11646 {
11647         struct extent_buffer *tmp;
11648         struct btrfs_root_item *ri;
11649         struct btrfs_key key;
11650         u64 bytenr;
11651         u32 nodesize;
11652         int level = btrfs_header_level(eb);
11653         int nritems;
11654         int ret;
11655         int i;
11656
11657         /*
11658          * If we have pinned this block before, don't pin it again.
11659          * This can not only avoid forever loop with broken filesystem
11660          * but also give us some speedups.
11661          */
11662         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11663                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11664                 return 0;
11665
11666         btrfs_pin_extent(fs_info, eb->start, eb->len);
11667
11668         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11669         nritems = btrfs_header_nritems(eb);
11670         for (i = 0; i < nritems; i++) {
11671                 if (level == 0) {
11672                         btrfs_item_key_to_cpu(eb, &key, i);
11673                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11674                                 continue;
11675                         /* Skip the extent root and reloc roots */
11676                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11677                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11678                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11679                                 continue;
11680                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11681                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11682
11683                         /*
11684                          * If at any point we start needing the real root we
11685                          * will have to build a stump root for the root we are
11686                          * in, but for now this doesn't actually use the root so
11687                          * just pass in extent_root.
11688                          */
11689                         tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11690                         if (!extent_buffer_uptodate(tmp)) {
11691                                 fprintf(stderr, "Error reading root block\n");
11692                                 return -EIO;
11693                         }
11694                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11695                         free_extent_buffer(tmp);
11696                         if (ret)
11697                                 return ret;
11698                 } else {
11699                         bytenr = btrfs_node_blockptr(eb, i);
11700
11701                         /* If we aren't the tree root don't read the block */
11702                         if (level == 1 && !tree_root) {
11703                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11704                                 continue;
11705                         }
11706
11707                         tmp = read_tree_block(fs_info, bytenr,
11708                                               nodesize, 0);
11709                         if (!extent_buffer_uptodate(tmp)) {
11710                                 fprintf(stderr, "Error reading tree block\n");
11711                                 return -EIO;
11712                         }
11713                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11714                         free_extent_buffer(tmp);
11715                         if (ret)
11716                                 return ret;
11717                 }
11718         }
11719
11720         return 0;
11721 }
11722
11723 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11724 {
11725         int ret;
11726
11727         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11728         if (ret)
11729                 return ret;
11730
11731         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11732 }
11733
11734 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11735 {
11736         struct btrfs_block_group_cache *cache;
11737         struct btrfs_path path;
11738         struct extent_buffer *leaf;
11739         struct btrfs_chunk *chunk;
11740         struct btrfs_key key;
11741         int ret;
11742         u64 start;
11743
11744         btrfs_init_path(&path);
11745         key.objectid = 0;
11746         key.type = BTRFS_CHUNK_ITEM_KEY;
11747         key.offset = 0;
11748         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11749         if (ret < 0) {
11750                 btrfs_release_path(&path);
11751                 return ret;
11752         }
11753
11754         /*
11755          * We do this in case the block groups were screwed up and had alloc
11756          * bits that aren't actually set on the chunks.  This happens with
11757          * restored images every time and could happen in real life I guess.
11758          */
11759         fs_info->avail_data_alloc_bits = 0;
11760         fs_info->avail_metadata_alloc_bits = 0;
11761         fs_info->avail_system_alloc_bits = 0;
11762
11763         /* First we need to create the in-memory block groups */
11764         while (1) {
11765                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11766                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11767                         if (ret < 0) {
11768                                 btrfs_release_path(&path);
11769                                 return ret;
11770                         }
11771                         if (ret) {
11772                                 ret = 0;
11773                                 break;
11774                         }
11775                 }
11776                 leaf = path.nodes[0];
11777                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11778                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11779                         path.slots[0]++;
11780                         continue;
11781                 }
11782
11783                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11784                 btrfs_add_block_group(fs_info, 0,
11785                                       btrfs_chunk_type(leaf, chunk),
11786                                       key.objectid, key.offset,
11787                                       btrfs_chunk_length(leaf, chunk));
11788                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11789                                  key.offset + btrfs_chunk_length(leaf, chunk));
11790                 path.slots[0]++;
11791         }
11792         start = 0;
11793         while (1) {
11794                 cache = btrfs_lookup_first_block_group(fs_info, start);
11795                 if (!cache)
11796                         break;
11797                 cache->cached = 1;
11798                 start = cache->key.objectid + cache->key.offset;
11799         }
11800
11801         btrfs_release_path(&path);
11802         return 0;
11803 }
11804
11805 static int reset_balance(struct btrfs_trans_handle *trans,
11806                          struct btrfs_fs_info *fs_info)
11807 {
11808         struct btrfs_root *root = fs_info->tree_root;
11809         struct btrfs_path path;
11810         struct extent_buffer *leaf;
11811         struct btrfs_key key;
11812         int del_slot, del_nr = 0;
11813         int ret;
11814         int found = 0;
11815
11816         btrfs_init_path(&path);
11817         key.objectid = BTRFS_BALANCE_OBJECTID;
11818         key.type = BTRFS_BALANCE_ITEM_KEY;
11819         key.offset = 0;
11820         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11821         if (ret) {
11822                 if (ret > 0)
11823                         ret = 0;
11824                 if (!ret)
11825                         goto reinit_data_reloc;
11826                 else
11827                         goto out;
11828         }
11829
11830         ret = btrfs_del_item(trans, root, &path);
11831         if (ret)
11832                 goto out;
11833         btrfs_release_path(&path);
11834
11835         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11836         key.type = BTRFS_ROOT_ITEM_KEY;
11837         key.offset = 0;
11838         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11839         if (ret < 0)
11840                 goto out;
11841         while (1) {
11842                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11843                         if (!found)
11844                                 break;
11845
11846                         if (del_nr) {
11847                                 ret = btrfs_del_items(trans, root, &path,
11848                                                       del_slot, del_nr);
11849                                 del_nr = 0;
11850                                 if (ret)
11851                                         goto out;
11852                         }
11853                         key.offset++;
11854                         btrfs_release_path(&path);
11855
11856                         found = 0;
11857                         ret = btrfs_search_slot(trans, root, &key, &path,
11858                                                 -1, 1);
11859                         if (ret < 0)
11860                                 goto out;
11861                         continue;
11862                 }
11863                 found = 1;
11864                 leaf = path.nodes[0];
11865                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11866                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11867                         break;
11868                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11869                         path.slots[0]++;
11870                         continue;
11871                 }
11872                 if (!del_nr) {
11873                         del_slot = path.slots[0];
11874                         del_nr = 1;
11875                 } else {
11876                         del_nr++;
11877                 }
11878                 path.slots[0]++;
11879         }
11880
11881         if (del_nr) {
11882                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11883                 if (ret)
11884                         goto out;
11885         }
11886         btrfs_release_path(&path);
11887
11888 reinit_data_reloc:
11889         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11890         key.type = BTRFS_ROOT_ITEM_KEY;
11891         key.offset = (u64)-1;
11892         root = btrfs_read_fs_root(fs_info, &key);
11893         if (IS_ERR(root)) {
11894                 fprintf(stderr, "Error reading data reloc tree\n");
11895                 ret = PTR_ERR(root);
11896                 goto out;
11897         }
11898         record_root_in_trans(trans, root);
11899         ret = btrfs_fsck_reinit_root(trans, root, 0);
11900         if (ret)
11901                 goto out;
11902         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11903 out:
11904         btrfs_release_path(&path);
11905         return ret;
11906 }
11907
11908 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11909                               struct btrfs_fs_info *fs_info)
11910 {
11911         u64 start = 0;
11912         int ret;
11913
11914         /*
11915          * The only reason we don't do this is because right now we're just
11916          * walking the trees we find and pinning down their bytes, we don't look
11917          * at any of the leaves.  In order to do mixed groups we'd have to check
11918          * the leaves of any fs roots and pin down the bytes for any file
11919          * extents we find.  Not hard but why do it if we don't have to?
11920          */
11921         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11922                 fprintf(stderr, "We don't support re-initing the extent tree "
11923                         "for mixed block groups yet, please notify a btrfs "
11924                         "developer you want to do this so they can add this "
11925                         "functionality.\n");
11926                 return -EINVAL;
11927         }
11928
11929         /*
11930          * first we need to walk all of the trees except the extent tree and pin
11931          * down the bytes that are in use so we don't overwrite any existing
11932          * metadata.
11933          */
11934         ret = pin_metadata_blocks(fs_info);
11935         if (ret) {
11936                 fprintf(stderr, "error pinning down used bytes\n");
11937                 return ret;
11938         }
11939
11940         /*
11941          * Need to drop all the block groups since we're going to recreate all
11942          * of them again.
11943          */
11944         btrfs_free_block_groups(fs_info);
11945         ret = reset_block_groups(fs_info);
11946         if (ret) {
11947                 fprintf(stderr, "error resetting the block groups\n");
11948                 return ret;
11949         }
11950
11951         /* Ok we can allocate now, reinit the extent root */
11952         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11953         if (ret) {
11954                 fprintf(stderr, "extent root initialization failed\n");
11955                 /*
11956                  * When the transaction code is updated we should end the
11957                  * transaction, but for now progs only knows about commit so
11958                  * just return an error.
11959                  */
11960                 return ret;
11961         }
11962
11963         /*
11964          * Now we have all the in-memory block groups setup so we can make
11965          * allocations properly, and the metadata we care about is safe since we
11966          * pinned all of it above.
11967          */
11968         while (1) {
11969                 struct btrfs_block_group_cache *cache;
11970
11971                 cache = btrfs_lookup_first_block_group(fs_info, start);
11972                 if (!cache)
11973                         break;
11974                 start = cache->key.objectid + cache->key.offset;
11975                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11976                                         &cache->key, &cache->item,
11977                                         sizeof(cache->item));
11978                 if (ret) {
11979                         fprintf(stderr, "Error adding block group\n");
11980                         return ret;
11981                 }
11982                 btrfs_extent_post_op(trans, fs_info->extent_root);
11983         }
11984
11985         ret = reset_balance(trans, fs_info);
11986         if (ret)
11987                 fprintf(stderr, "error resetting the pending balance\n");
11988
11989         return ret;
11990 }
11991
11992 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11993 {
11994         struct btrfs_path path;
11995         struct btrfs_trans_handle *trans;
11996         struct btrfs_key key;
11997         int ret;
11998
11999         printf("Recowing metadata block %llu\n", eb->start);
12000         key.objectid = btrfs_header_owner(eb);
12001         key.type = BTRFS_ROOT_ITEM_KEY;
12002         key.offset = (u64)-1;
12003
12004         root = btrfs_read_fs_root(root->fs_info, &key);
12005         if (IS_ERR(root)) {
12006                 fprintf(stderr, "Couldn't find owner root %llu\n",
12007                         key.objectid);
12008                 return PTR_ERR(root);
12009         }
12010
12011         trans = btrfs_start_transaction(root, 1);
12012         if (IS_ERR(trans))
12013                 return PTR_ERR(trans);
12014
12015         btrfs_init_path(&path);
12016         path.lowest_level = btrfs_header_level(eb);
12017         if (path.lowest_level)
12018                 btrfs_node_key_to_cpu(eb, &key, 0);
12019         else
12020                 btrfs_item_key_to_cpu(eb, &key, 0);
12021
12022         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12023         btrfs_commit_transaction(trans, root);
12024         btrfs_release_path(&path);
12025         return ret;
12026 }
12027
12028 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12029 {
12030         struct btrfs_path path;
12031         struct btrfs_trans_handle *trans;
12032         struct btrfs_key key;
12033         int ret;
12034
12035         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12036                bad->key.type, bad->key.offset);
12037         key.objectid = bad->root_id;
12038         key.type = BTRFS_ROOT_ITEM_KEY;
12039         key.offset = (u64)-1;
12040
12041         root = btrfs_read_fs_root(root->fs_info, &key);
12042         if (IS_ERR(root)) {
12043                 fprintf(stderr, "Couldn't find owner root %llu\n",
12044                         key.objectid);
12045                 return PTR_ERR(root);
12046         }
12047
12048         trans = btrfs_start_transaction(root, 1);
12049         if (IS_ERR(trans))
12050                 return PTR_ERR(trans);
12051
12052         btrfs_init_path(&path);
12053         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12054         if (ret) {
12055                 if (ret > 0)
12056                         ret = 0;
12057                 goto out;
12058         }
12059         ret = btrfs_del_item(trans, root, &path);
12060 out:
12061         btrfs_commit_transaction(trans, root);
12062         btrfs_release_path(&path);
12063         return ret;
12064 }
12065
12066 static int zero_log_tree(struct btrfs_root *root)
12067 {
12068         struct btrfs_trans_handle *trans;
12069         int ret;
12070
12071         trans = btrfs_start_transaction(root, 1);
12072         if (IS_ERR(trans)) {
12073                 ret = PTR_ERR(trans);
12074                 return ret;
12075         }
12076         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12077         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12078         ret = btrfs_commit_transaction(trans, root);
12079         return ret;
12080 }
12081
12082 static int populate_csum(struct btrfs_trans_handle *trans,
12083                          struct btrfs_root *csum_root, char *buf, u64 start,
12084                          u64 len)
12085 {
12086         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12087         u64 offset = 0;
12088         u64 sectorsize;
12089         int ret = 0;
12090
12091         while (offset < len) {
12092                 sectorsize = fs_info->sectorsize;
12093                 ret = read_extent_data(fs_info, buf, start + offset,
12094                                        &sectorsize, 0);
12095                 if (ret)
12096                         break;
12097                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12098                                             start + offset, buf, sectorsize);
12099                 if (ret)
12100                         break;
12101                 offset += sectorsize;
12102         }
12103         return ret;
12104 }
12105
12106 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12107                                       struct btrfs_root *csum_root,
12108                                       struct btrfs_root *cur_root)
12109 {
12110         struct btrfs_path path;
12111         struct btrfs_key key;
12112         struct extent_buffer *node;
12113         struct btrfs_file_extent_item *fi;
12114         char *buf = NULL;
12115         u64 start = 0;
12116         u64 len = 0;
12117         int slot = 0;
12118         int ret = 0;
12119
12120         buf = malloc(cur_root->fs_info->sectorsize);
12121         if (!buf)
12122                 return -ENOMEM;
12123
12124         btrfs_init_path(&path);
12125         key.objectid = 0;
12126         key.offset = 0;
12127         key.type = 0;
12128         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12129         if (ret < 0)
12130                 goto out;
12131         /* Iterate all regular file extents and fill its csum */
12132         while (1) {
12133                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12134
12135                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12136                         goto next;
12137                 node = path.nodes[0];
12138                 slot = path.slots[0];
12139                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12140                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12141                         goto next;
12142                 start = btrfs_file_extent_disk_bytenr(node, fi);
12143                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12144
12145                 ret = populate_csum(trans, csum_root, buf, start, len);
12146                 if (ret == -EEXIST)
12147                         ret = 0;
12148                 if (ret < 0)
12149                         goto out;
12150 next:
12151                 /*
12152                  * TODO: if next leaf is corrupted, jump to nearest next valid
12153                  * leaf.
12154                  */
12155                 ret = btrfs_next_item(cur_root, &path);
12156                 if (ret < 0)
12157                         goto out;
12158                 if (ret > 0) {
12159                         ret = 0;
12160                         goto out;
12161                 }
12162         }
12163
12164 out:
12165         btrfs_release_path(&path);
12166         free(buf);
12167         return ret;
12168 }
12169
12170 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12171                                   struct btrfs_root *csum_root)
12172 {
12173         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12174         struct btrfs_path path;
12175         struct btrfs_root *tree_root = fs_info->tree_root;
12176         struct btrfs_root *cur_root;
12177         struct extent_buffer *node;
12178         struct btrfs_key key;
12179         int slot = 0;
12180         int ret = 0;
12181
12182         btrfs_init_path(&path);
12183         key.objectid = BTRFS_FS_TREE_OBJECTID;
12184         key.offset = 0;
12185         key.type = BTRFS_ROOT_ITEM_KEY;
12186         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12187         if (ret < 0)
12188                 goto out;
12189         if (ret > 0) {
12190                 ret = -ENOENT;
12191                 goto out;
12192         }
12193
12194         while (1) {
12195                 node = path.nodes[0];
12196                 slot = path.slots[0];
12197                 btrfs_item_key_to_cpu(node, &key, slot);
12198                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12199                         goto out;
12200                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12201                         goto next;
12202                 if (!is_fstree(key.objectid))
12203                         goto next;
12204                 key.offset = (u64)-1;
12205
12206                 cur_root = btrfs_read_fs_root(fs_info, &key);
12207                 if (IS_ERR(cur_root) || !cur_root) {
12208                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12209                                 key.objectid);
12210                         goto out;
12211                 }
12212                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12213                                 cur_root);
12214                 if (ret < 0)
12215                         goto out;
12216 next:
12217                 ret = btrfs_next_item(tree_root, &path);
12218                 if (ret > 0) {
12219                         ret = 0;
12220                         goto out;
12221                 }
12222                 if (ret < 0)
12223                         goto out;
12224         }
12225
12226 out:
12227         btrfs_release_path(&path);
12228         return ret;
12229 }
12230
12231 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12232                                       struct btrfs_root *csum_root)
12233 {
12234         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12235         struct btrfs_path path;
12236         struct btrfs_extent_item *ei;
12237         struct extent_buffer *leaf;
12238         char *buf;
12239         struct btrfs_key key;
12240         int ret;
12241
12242         btrfs_init_path(&path);
12243         key.objectid = 0;
12244         key.type = BTRFS_EXTENT_ITEM_KEY;
12245         key.offset = 0;
12246         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12247         if (ret < 0) {
12248                 btrfs_release_path(&path);
12249                 return ret;
12250         }
12251
12252         buf = malloc(csum_root->fs_info->sectorsize);
12253         if (!buf) {
12254                 btrfs_release_path(&path);
12255                 return -ENOMEM;
12256         }
12257
12258         while (1) {
12259                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12260                         ret = btrfs_next_leaf(extent_root, &path);
12261                         if (ret < 0)
12262                                 break;
12263                         if (ret) {
12264                                 ret = 0;
12265                                 break;
12266                         }
12267                 }
12268                 leaf = path.nodes[0];
12269
12270                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12271                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12272                         path.slots[0]++;
12273                         continue;
12274                 }
12275
12276                 ei = btrfs_item_ptr(leaf, path.slots[0],
12277                                     struct btrfs_extent_item);
12278                 if (!(btrfs_extent_flags(leaf, ei) &
12279                       BTRFS_EXTENT_FLAG_DATA)) {
12280                         path.slots[0]++;
12281                         continue;
12282                 }
12283
12284                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12285                                     key.offset);
12286                 if (ret)
12287                         break;
12288                 path.slots[0]++;
12289         }
12290
12291         btrfs_release_path(&path);
12292         free(buf);
12293         return ret;
12294 }
12295
12296 /*
12297  * Recalculate the csum and put it into the csum tree.
12298  *
12299  * Extent tree init will wipe out all the extent info, so in that case, we
12300  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12301  * will use fs/subvol trees to init the csum tree.
12302  */
12303 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12304                           struct btrfs_root *csum_root,
12305                           int search_fs_tree)
12306 {
12307         if (search_fs_tree)
12308                 return fill_csum_tree_from_fs(trans, csum_root);
12309         else
12310                 return fill_csum_tree_from_extent(trans, csum_root);
12311 }
12312
12313 static void free_roots_info_cache(void)
12314 {
12315         if (!roots_info_cache)
12316                 return;
12317
12318         while (!cache_tree_empty(roots_info_cache)) {
12319                 struct cache_extent *entry;
12320                 struct root_item_info *rii;
12321
12322                 entry = first_cache_extent(roots_info_cache);
12323                 if (!entry)
12324                         break;
12325                 remove_cache_extent(roots_info_cache, entry);
12326                 rii = container_of(entry, struct root_item_info, cache_extent);
12327                 free(rii);
12328         }
12329
12330         free(roots_info_cache);
12331         roots_info_cache = NULL;
12332 }
12333
12334 static int build_roots_info_cache(struct btrfs_fs_info *info)
12335 {
12336         int ret = 0;
12337         struct btrfs_key key;
12338         struct extent_buffer *leaf;
12339         struct btrfs_path path;
12340
12341         if (!roots_info_cache) {
12342                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12343                 if (!roots_info_cache)
12344                         return -ENOMEM;
12345                 cache_tree_init(roots_info_cache);
12346         }
12347
12348         btrfs_init_path(&path);
12349         key.objectid = 0;
12350         key.type = BTRFS_EXTENT_ITEM_KEY;
12351         key.offset = 0;
12352         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12353         if (ret < 0)
12354                 goto out;
12355         leaf = path.nodes[0];
12356
12357         while (1) {
12358                 struct btrfs_key found_key;
12359                 struct btrfs_extent_item *ei;
12360                 struct btrfs_extent_inline_ref *iref;
12361                 int slot = path.slots[0];
12362                 int type;
12363                 u64 flags;
12364                 u64 root_id;
12365                 u8 level;
12366                 struct cache_extent *entry;
12367                 struct root_item_info *rii;
12368
12369                 if (slot >= btrfs_header_nritems(leaf)) {
12370                         ret = btrfs_next_leaf(info->extent_root, &path);
12371                         if (ret < 0) {
12372                                 break;
12373                         } else if (ret) {
12374                                 ret = 0;
12375                                 break;
12376                         }
12377                         leaf = path.nodes[0];
12378                         slot = path.slots[0];
12379                 }
12380
12381                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12382
12383                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12384                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12385                         goto next;
12386
12387                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12388                 flags = btrfs_extent_flags(leaf, ei);
12389
12390                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12391                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12392                         goto next;
12393
12394                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12395                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12396                         level = found_key.offset;
12397                 } else {
12398                         struct btrfs_tree_block_info *binfo;
12399
12400                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12401                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12402                         level = btrfs_tree_block_level(leaf, binfo);
12403                 }
12404
12405                 /*
12406                  * For a root extent, it must be of the following type and the
12407                  * first (and only one) iref in the item.
12408                  */
12409                 type = btrfs_extent_inline_ref_type(leaf, iref);
12410                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12411                         goto next;
12412
12413                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12414                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12415                 if (!entry) {
12416                         rii = malloc(sizeof(struct root_item_info));
12417                         if (!rii) {
12418                                 ret = -ENOMEM;
12419                                 goto out;
12420                         }
12421                         rii->cache_extent.start = root_id;
12422                         rii->cache_extent.size = 1;
12423                         rii->level = (u8)-1;
12424                         entry = &rii->cache_extent;
12425                         ret = insert_cache_extent(roots_info_cache, entry);
12426                         ASSERT(ret == 0);
12427                 } else {
12428                         rii = container_of(entry, struct root_item_info,
12429                                            cache_extent);
12430                 }
12431
12432                 ASSERT(rii->cache_extent.start == root_id);
12433                 ASSERT(rii->cache_extent.size == 1);
12434
12435                 if (level > rii->level || rii->level == (u8)-1) {
12436                         rii->level = level;
12437                         rii->bytenr = found_key.objectid;
12438                         rii->gen = btrfs_extent_generation(leaf, ei);
12439                         rii->node_count = 1;
12440                 } else if (level == rii->level) {
12441                         rii->node_count++;
12442                 }
12443 next:
12444                 path.slots[0]++;
12445         }
12446
12447 out:
12448         btrfs_release_path(&path);
12449
12450         return ret;
12451 }
12452
12453 static int maybe_repair_root_item(struct btrfs_path *path,
12454                                   const struct btrfs_key *root_key,
12455                                   const int read_only_mode)
12456 {
12457         const u64 root_id = root_key->objectid;
12458         struct cache_extent *entry;
12459         struct root_item_info *rii;
12460         struct btrfs_root_item ri;
12461         unsigned long offset;
12462
12463         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12464         if (!entry) {
12465                 fprintf(stderr,
12466                         "Error: could not find extent items for root %llu\n",
12467                         root_key->objectid);
12468                 return -ENOENT;
12469         }
12470
12471         rii = container_of(entry, struct root_item_info, cache_extent);
12472         ASSERT(rii->cache_extent.start == root_id);
12473         ASSERT(rii->cache_extent.size == 1);
12474
12475         if (rii->node_count != 1) {
12476                 fprintf(stderr,
12477                         "Error: could not find btree root extent for root %llu\n",
12478                         root_id);
12479                 return -ENOENT;
12480         }
12481
12482         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12483         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12484
12485         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12486             btrfs_root_level(&ri) != rii->level ||
12487             btrfs_root_generation(&ri) != rii->gen) {
12488
12489                 /*
12490                  * If we're in repair mode but our caller told us to not update
12491                  * the root item, i.e. just check if it needs to be updated, don't
12492                  * print this message, since the caller will call us again shortly
12493                  * for the same root item without read only mode (the caller will
12494                  * open a transaction first).
12495                  */
12496                 if (!(read_only_mode && repair))
12497                         fprintf(stderr,
12498                                 "%sroot item for root %llu,"
12499                                 " current bytenr %llu, current gen %llu, current level %u,"
12500                                 " new bytenr %llu, new gen %llu, new level %u\n",
12501                                 (read_only_mode ? "" : "fixing "),
12502                                 root_id,
12503                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12504                                 btrfs_root_level(&ri),
12505                                 rii->bytenr, rii->gen, rii->level);
12506
12507                 if (btrfs_root_generation(&ri) > rii->gen) {
12508                         fprintf(stderr,
12509                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12510                                 root_id, btrfs_root_generation(&ri), rii->gen);
12511                         return -EINVAL;
12512                 }
12513
12514                 if (!read_only_mode) {
12515                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12516                         btrfs_set_root_level(&ri, rii->level);
12517                         btrfs_set_root_generation(&ri, rii->gen);
12518                         write_extent_buffer(path->nodes[0], &ri,
12519                                             offset, sizeof(ri));
12520                 }
12521
12522                 return 1;
12523         }
12524
12525         return 0;
12526 }
12527
12528 /*
12529  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12530  * caused read-only snapshots to be corrupted if they were created at a moment
12531  * when the source subvolume/snapshot had orphan items. The issue was that the
12532  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12533  * node instead of the post orphan cleanup root node.
12534  * So this function, and its callees, just detects and fixes those cases. Even
12535  * though the regression was for read-only snapshots, this function applies to
12536  * any snapshot/subvolume root.
12537  * This must be run before any other repair code - not doing it so, makes other
12538  * repair code delete or modify backrefs in the extent tree for example, which
12539  * will result in an inconsistent fs after repairing the root items.
12540  */
12541 static int repair_root_items(struct btrfs_fs_info *info)
12542 {
12543         struct btrfs_path path;
12544         struct btrfs_key key;
12545         struct extent_buffer *leaf;
12546         struct btrfs_trans_handle *trans = NULL;
12547         int ret = 0;
12548         int bad_roots = 0;
12549         int need_trans = 0;
12550
12551         btrfs_init_path(&path);
12552
12553         ret = build_roots_info_cache(info);
12554         if (ret)
12555                 goto out;
12556
12557         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12558         key.type = BTRFS_ROOT_ITEM_KEY;
12559         key.offset = 0;
12560
12561 again:
12562         /*
12563          * Avoid opening and committing transactions if a leaf doesn't have
12564          * any root items that need to be fixed, so that we avoid rotating
12565          * backup roots unnecessarily.
12566          */
12567         if (need_trans) {
12568                 trans = btrfs_start_transaction(info->tree_root, 1);
12569                 if (IS_ERR(trans)) {
12570                         ret = PTR_ERR(trans);
12571                         goto out;
12572                 }
12573         }
12574
12575         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12576                                 0, trans ? 1 : 0);
12577         if (ret < 0)
12578                 goto out;
12579         leaf = path.nodes[0];
12580
12581         while (1) {
12582                 struct btrfs_key found_key;
12583
12584                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12585                         int no_more_keys = find_next_key(&path, &key);
12586
12587                         btrfs_release_path(&path);
12588                         if (trans) {
12589                                 ret = btrfs_commit_transaction(trans,
12590                                                                info->tree_root);
12591                                 trans = NULL;
12592                                 if (ret < 0)
12593                                         goto out;
12594                         }
12595                         need_trans = 0;
12596                         if (no_more_keys)
12597                                 break;
12598                         goto again;
12599                 }
12600
12601                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12602
12603                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12604                         goto next;
12605                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12606                         goto next;
12607
12608                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12609                 if (ret < 0)
12610                         goto out;
12611                 if (ret) {
12612                         if (!trans && repair) {
12613                                 need_trans = 1;
12614                                 key = found_key;
12615                                 btrfs_release_path(&path);
12616                                 goto again;
12617                         }
12618                         bad_roots++;
12619                 }
12620 next:
12621                 path.slots[0]++;
12622         }
12623         ret = 0;
12624 out:
12625         free_roots_info_cache();
12626         btrfs_release_path(&path);
12627         if (trans)
12628                 btrfs_commit_transaction(trans, info->tree_root);
12629         if (ret < 0)
12630                 return ret;
12631
12632         return bad_roots;
12633 }
12634
12635 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12636 {
12637         struct btrfs_trans_handle *trans;
12638         struct btrfs_block_group_cache *bg_cache;
12639         u64 current = 0;
12640         int ret = 0;
12641
12642         /* Clear all free space cache inodes and its extent data */
12643         while (1) {
12644                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12645                 if (!bg_cache)
12646                         break;
12647                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12648                 if (ret < 0)
12649                         return ret;
12650                 current = bg_cache->key.objectid + bg_cache->key.offset;
12651         }
12652
12653         /* Don't forget to set cache_generation to -1 */
12654         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12655         if (IS_ERR(trans)) {
12656                 error("failed to update super block cache generation");
12657                 return PTR_ERR(trans);
12658         }
12659         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12660         btrfs_commit_transaction(trans, fs_info->tree_root);
12661
12662         return ret;
12663 }
12664
12665 const char * const cmd_check_usage[] = {
12666         "btrfs check [options] <device>",
12667         "Check structural integrity of a filesystem (unmounted).",
12668         "Check structural integrity of an unmounted filesystem. Verify internal",
12669         "trees' consistency and item connectivity. In the repair mode try to",
12670         "fix the problems found. ",
12671         "WARNING: the repair mode is considered dangerous",
12672         "",
12673         "-s|--super <superblock>     use this superblock copy",
12674         "-b|--backup                 use the first valid backup root copy",
12675         "--repair                    try to repair the filesystem",
12676         "--readonly                  run in read-only mode (default)",
12677         "--init-csum-tree            create a new CRC tree",
12678         "--init-extent-tree          create a new extent tree",
12679         "--mode <MODE>               allows choice of memory/IO trade-offs",
12680         "                            where MODE is one of:",
12681         "                            original - read inodes and extents to memory (requires",
12682         "                                       more memory, does less IO)",
12683         "                            lowmem   - try to use less memory but read blocks again",
12684         "                                       when needed",
12685         "--check-data-csum           verify checksums of data blocks",
12686         "-Q|--qgroup-report          print a report on qgroup consistency",
12687         "-E|--subvol-extents <subvolid>",
12688         "                            print subvolume extents and sharing state",
12689         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12690         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12691         "-p|--progress               indicate progress",
12692         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12693         NULL
12694 };
12695
12696 int cmd_check(int argc, char **argv)
12697 {
12698         struct cache_tree root_cache;
12699         struct btrfs_root *root;
12700         struct btrfs_fs_info *info;
12701         u64 bytenr = 0;
12702         u64 subvolid = 0;
12703         u64 tree_root_bytenr = 0;
12704         u64 chunk_root_bytenr = 0;
12705         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12706         int ret;
12707         int err = 0;
12708         u64 num;
12709         int init_csum_tree = 0;
12710         int readonly = 0;
12711         int clear_space_cache = 0;
12712         int qgroup_report = 0;
12713         int qgroups_repaired = 0;
12714         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12715
12716         while(1) {
12717                 int c;
12718                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12719                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12720                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12721                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12722                 static const struct option long_options[] = {
12723                         { "super", required_argument, NULL, 's' },
12724                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12725                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12726                         { "init-csum-tree", no_argument, NULL,
12727                                 GETOPT_VAL_INIT_CSUM },
12728                         { "init-extent-tree", no_argument, NULL,
12729                                 GETOPT_VAL_INIT_EXTENT },
12730                         { "check-data-csum", no_argument, NULL,
12731                                 GETOPT_VAL_CHECK_CSUM },
12732                         { "backup", no_argument, NULL, 'b' },
12733                         { "subvol-extents", required_argument, NULL, 'E' },
12734                         { "qgroup-report", no_argument, NULL, 'Q' },
12735                         { "tree-root", required_argument, NULL, 'r' },
12736                         { "chunk-root", required_argument, NULL,
12737                                 GETOPT_VAL_CHUNK_TREE },
12738                         { "progress", no_argument, NULL, 'p' },
12739                         { "mode", required_argument, NULL,
12740                                 GETOPT_VAL_MODE },
12741                         { "clear-space-cache", required_argument, NULL,
12742                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12743                         { NULL, 0, NULL, 0}
12744                 };
12745
12746                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12747                 if (c < 0)
12748                         break;
12749                 switch(c) {
12750                         case 'a': /* ignored */ break;
12751                         case 'b':
12752                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12753                                 break;
12754                         case 's':
12755                                 num = arg_strtou64(optarg);
12756                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12757                                         error(
12758                                         "super mirror should be less than %d",
12759                                                 BTRFS_SUPER_MIRROR_MAX);
12760                                         exit(1);
12761                                 }
12762                                 bytenr = btrfs_sb_offset(((int)num));
12763                                 printf("using SB copy %llu, bytenr %llu\n", num,
12764                                        (unsigned long long)bytenr);
12765                                 break;
12766                         case 'Q':
12767                                 qgroup_report = 1;
12768                                 break;
12769                         case 'E':
12770                                 subvolid = arg_strtou64(optarg);
12771                                 break;
12772                         case 'r':
12773                                 tree_root_bytenr = arg_strtou64(optarg);
12774                                 break;
12775                         case GETOPT_VAL_CHUNK_TREE:
12776                                 chunk_root_bytenr = arg_strtou64(optarg);
12777                                 break;
12778                         case 'p':
12779                                 ctx.progress_enabled = true;
12780                                 break;
12781                         case '?':
12782                         case 'h':
12783                                 usage(cmd_check_usage);
12784                         case GETOPT_VAL_REPAIR:
12785                                 printf("enabling repair mode\n");
12786                                 repair = 1;
12787                                 ctree_flags |= OPEN_CTREE_WRITES;
12788                                 break;
12789                         case GETOPT_VAL_READONLY:
12790                                 readonly = 1;
12791                                 break;
12792                         case GETOPT_VAL_INIT_CSUM:
12793                                 printf("Creating a new CRC tree\n");
12794                                 init_csum_tree = 1;
12795                                 repair = 1;
12796                                 ctree_flags |= OPEN_CTREE_WRITES;
12797                                 break;
12798                         case GETOPT_VAL_INIT_EXTENT:
12799                                 init_extent_tree = 1;
12800                                 ctree_flags |= (OPEN_CTREE_WRITES |
12801                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12802                                 repair = 1;
12803                                 break;
12804                         case GETOPT_VAL_CHECK_CSUM:
12805                                 check_data_csum = 1;
12806                                 break;
12807                         case GETOPT_VAL_MODE:
12808                                 check_mode = parse_check_mode(optarg);
12809                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12810                                         error("unknown mode: %s", optarg);
12811                                         exit(1);
12812                                 }
12813                                 break;
12814                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12815                                 if (strcmp(optarg, "v1") == 0) {
12816                                         clear_space_cache = 1;
12817                                 } else if (strcmp(optarg, "v2") == 0) {
12818                                         clear_space_cache = 2;
12819                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12820                                 } else {
12821                                         error(
12822                 "invalid argument to --clear-space-cache, must be v1 or v2");
12823                                         exit(1);
12824                                 }
12825                                 ctree_flags |= OPEN_CTREE_WRITES;
12826                                 break;
12827                 }
12828         }
12829
12830         if (check_argc_exact(argc - optind, 1))
12831                 usage(cmd_check_usage);
12832
12833         if (ctx.progress_enabled) {
12834                 ctx.tp = TASK_NOTHING;
12835                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12836         }
12837
12838         /* This check is the only reason for --readonly to exist */
12839         if (readonly && repair) {
12840                 error("repair options are not compatible with --readonly");
12841                 exit(1);
12842         }
12843
12844         /*
12845          * Not supported yet
12846          */
12847         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12848                 error("low memory mode doesn't support repair yet");
12849                 exit(1);
12850         }
12851
12852         radix_tree_init();
12853         cache_tree_init(&root_cache);
12854
12855         if((ret = check_mounted(argv[optind])) < 0) {
12856                 error("could not check mount status: %s", strerror(-ret));
12857                 err |= !!ret;
12858                 goto err_out;
12859         } else if(ret) {
12860                 error("%s is currently mounted, aborting", argv[optind]);
12861                 ret = -EBUSY;
12862                 err |= !!ret;
12863                 goto err_out;
12864         }
12865
12866         /* only allow partial opening under repair mode */
12867         if (repair)
12868                 ctree_flags |= OPEN_CTREE_PARTIAL;
12869
12870         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12871                                   chunk_root_bytenr, ctree_flags);
12872         if (!info) {
12873                 error("cannot open file system");
12874                 ret = -EIO;
12875                 err |= !!ret;
12876                 goto err_out;
12877         }
12878
12879         global_info = info;
12880         root = info->fs_root;
12881         if (clear_space_cache == 1) {
12882                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12883                         error(
12884                 "free space cache v2 detected, use --clear-space-cache v2");
12885                         ret = 1;
12886                         goto close_out;
12887                 }
12888                 printf("Clearing free space cache\n");
12889                 ret = clear_free_space_cache(info);
12890                 if (ret) {
12891                         error("failed to clear free space cache");
12892                         ret = 1;
12893                 } else {
12894                         printf("Free space cache cleared\n");
12895                 }
12896                 goto close_out;
12897         } else if (clear_space_cache == 2) {
12898                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12899                         printf("no free space cache v2 to clear\n");
12900                         ret = 0;
12901                         goto close_out;
12902                 }
12903                 printf("Clear free space cache v2\n");
12904                 ret = btrfs_clear_free_space_tree(info);
12905                 if (ret) {
12906                         error("failed to clear free space cache v2: %d", ret);
12907                         ret = 1;
12908                 } else {
12909                         printf("free space cache v2 cleared\n");
12910                 }
12911                 goto close_out;
12912         }
12913
12914         /*
12915          * repair mode will force us to commit transaction which
12916          * will make us fail to load log tree when mounting.
12917          */
12918         if (repair && btrfs_super_log_root(info->super_copy)) {
12919                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12920                 if (!ret) {
12921                         ret = 1;
12922                         err |= !!ret;
12923                         goto close_out;
12924                 }
12925                 ret = zero_log_tree(root);
12926                 err |= !!ret;
12927                 if (ret) {
12928                         error("failed to zero log tree: %d", ret);
12929                         goto close_out;
12930                 }
12931         }
12932
12933         uuid_unparse(info->super_copy->fsid, uuidbuf);
12934         if (qgroup_report) {
12935                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12936                        uuidbuf);
12937                 ret = qgroup_verify_all(info);
12938                 err |= !!ret;
12939                 if (ret == 0)
12940                         report_qgroups(1);
12941                 goto close_out;
12942         }
12943         if (subvolid) {
12944                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12945                        subvolid, argv[optind], uuidbuf);
12946                 ret = print_extent_state(info, subvolid);
12947                 err |= !!ret;
12948                 goto close_out;
12949         }
12950         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12951
12952         if (!extent_buffer_uptodate(info->tree_root->node) ||
12953             !extent_buffer_uptodate(info->dev_root->node) ||
12954             !extent_buffer_uptodate(info->chunk_root->node)) {
12955                 error("critical roots corrupted, unable to check the filesystem");
12956                 err |= !!ret;
12957                 ret = -EIO;
12958                 goto close_out;
12959         }
12960
12961         if (init_extent_tree || init_csum_tree) {
12962                 struct btrfs_trans_handle *trans;
12963
12964                 trans = btrfs_start_transaction(info->extent_root, 0);
12965                 if (IS_ERR(trans)) {
12966                         error("error starting transaction");
12967                         ret = PTR_ERR(trans);
12968                         err |= !!ret;
12969                         goto close_out;
12970                 }
12971
12972                 if (init_extent_tree) {
12973                         printf("Creating a new extent tree\n");
12974                         ret = reinit_extent_tree(trans, info);
12975                         err |= !!ret;
12976                         if (ret)
12977                                 goto close_out;
12978                 }
12979
12980                 if (init_csum_tree) {
12981                         printf("Reinitialize checksum tree\n");
12982                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12983                         if (ret) {
12984                                 error("checksum tree initialization failed: %d",
12985                                                 ret);
12986                                 ret = -EIO;
12987                                 err |= !!ret;
12988                                 goto close_out;
12989                         }
12990
12991                         ret = fill_csum_tree(trans, info->csum_root,
12992                                              init_extent_tree);
12993                         err |= !!ret;
12994                         if (ret) {
12995                                 error("checksum tree refilling failed: %d", ret);
12996                                 return -EIO;
12997                         }
12998                 }
12999                 /*
13000                  * Ok now we commit and run the normal fsck, which will add
13001                  * extent entries for all of the items it finds.
13002                  */
13003                 ret = btrfs_commit_transaction(trans, info->extent_root);
13004                 err |= !!ret;
13005                 if (ret)
13006                         goto close_out;
13007         }
13008         if (!extent_buffer_uptodate(info->extent_root->node)) {
13009                 error("critical: extent_root, unable to check the filesystem");
13010                 ret = -EIO;
13011                 err |= !!ret;
13012                 goto close_out;
13013         }
13014         if (!extent_buffer_uptodate(info->csum_root->node)) {
13015                 error("critical: csum_root, unable to check the filesystem");
13016                 ret = -EIO;
13017                 err |= !!ret;
13018                 goto close_out;
13019         }
13020
13021         if (!ctx.progress_enabled)
13022                 fprintf(stderr, "checking extents\n");
13023         if (check_mode == CHECK_MODE_LOWMEM)
13024                 ret = check_chunks_and_extents_v2(root);
13025         else
13026                 ret = check_chunks_and_extents(root);
13027         err |= !!ret;
13028         if (ret)
13029                 error(
13030                 "errors found in extent allocation tree or chunk allocation");
13031
13032         ret = repair_root_items(info);
13033         err |= !!ret;
13034         if (ret < 0) {
13035                 error("failed to repair root items: %s", strerror(-ret));
13036                 goto close_out;
13037         }
13038         if (repair) {
13039                 fprintf(stderr, "Fixed %d roots.\n", ret);
13040                 ret = 0;
13041         } else if (ret > 0) {
13042                 fprintf(stderr,
13043                        "Found %d roots with an outdated root item.\n",
13044                        ret);
13045                 fprintf(stderr,
13046                         "Please run a filesystem check with the option --repair to fix them.\n");
13047                 ret = 1;
13048                 err |= !!ret;
13049                 goto close_out;
13050         }
13051
13052         if (!ctx.progress_enabled) {
13053                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13054                         fprintf(stderr, "checking free space tree\n");
13055                 else
13056                         fprintf(stderr, "checking free space cache\n");
13057         }
13058         ret = check_space_cache(root);
13059         err |= !!ret;
13060         if (ret) {
13061                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13062                         error("errors found in free space tree");
13063                 else
13064                         error("errors found in free space cache");
13065                 goto out;
13066         }
13067
13068         /*
13069          * We used to have to have these hole extents in between our real
13070          * extents so if we don't have this flag set we need to make sure there
13071          * are no gaps in the file extents for inodes, otherwise we can just
13072          * ignore it when this happens.
13073          */
13074         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13075         if (!ctx.progress_enabled)
13076                 fprintf(stderr, "checking fs roots\n");
13077         if (check_mode == CHECK_MODE_LOWMEM)
13078                 ret = check_fs_roots_v2(root->fs_info);
13079         else
13080                 ret = check_fs_roots(root, &root_cache);
13081         err |= !!ret;
13082         if (ret) {
13083                 error("errors found in fs roots");
13084                 goto out;
13085         }
13086
13087         fprintf(stderr, "checking csums\n");
13088         ret = check_csums(root);
13089         err |= !!ret;
13090         if (ret) {
13091                 error("errors found in csum tree");
13092                 goto out;
13093         }
13094
13095         fprintf(stderr, "checking root refs\n");
13096         /* For low memory mode, check_fs_roots_v2 handles root refs */
13097         if (check_mode != CHECK_MODE_LOWMEM) {
13098                 ret = check_root_refs(root, &root_cache);
13099                 err |= !!ret;
13100                 if (ret) {
13101                         error("errors found in root refs");
13102                         goto out;
13103                 }
13104         }
13105
13106         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13107                 struct extent_buffer *eb;
13108
13109                 eb = list_first_entry(&root->fs_info->recow_ebs,
13110                                       struct extent_buffer, recow);
13111                 list_del_init(&eb->recow);
13112                 ret = recow_extent_buffer(root, eb);
13113                 err |= !!ret;
13114                 if (ret) {
13115                         error("fails to fix transid errors");
13116                         break;
13117                 }
13118         }
13119
13120         while (!list_empty(&delete_items)) {
13121                 struct bad_item *bad;
13122
13123                 bad = list_first_entry(&delete_items, struct bad_item, list);
13124                 list_del_init(&bad->list);
13125                 if (repair) {
13126                         ret = delete_bad_item(root, bad);
13127                         err |= !!ret;
13128                 }
13129                 free(bad);
13130         }
13131
13132         if (info->quota_enabled) {
13133                 fprintf(stderr, "checking quota groups\n");
13134                 ret = qgroup_verify_all(info);
13135                 err |= !!ret;
13136                 if (ret) {
13137                         error("failed to check quota groups");
13138                         goto out;
13139                 }
13140                 report_qgroups(0);
13141                 ret = repair_qgroups(info, &qgroups_repaired);
13142                 err |= !!ret;
13143                 if (err) {
13144                         error("failed to repair quota groups");
13145                         goto out;
13146                 }
13147                 ret = 0;
13148         }
13149
13150         if (!list_empty(&root->fs_info->recow_ebs)) {
13151                 error("transid errors in file system");
13152                 ret = 1;
13153                 err |= !!ret;
13154         }
13155 out:
13156         if (found_old_backref) { /*
13157                  * there was a disk format change when mixed
13158                  * backref was in testing tree. The old format
13159                  * existed about one week.
13160                  */
13161                 printf("\n * Found old mixed backref format. "
13162                        "The old format is not supported! *"
13163                        "\n * Please mount the FS in readonly mode, "
13164                        "backup data and re-format the FS. *\n\n");
13165                 err |= 1;
13166         }
13167         printf("found %llu bytes used, ",
13168                (unsigned long long)bytes_used);
13169         if (err)
13170                 printf("error(s) found\n");
13171         else
13172                 printf("no error found\n");
13173         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13174         printf("total tree bytes: %llu\n",
13175                (unsigned long long)total_btree_bytes);
13176         printf("total fs tree bytes: %llu\n",
13177                (unsigned long long)total_fs_tree_bytes);
13178         printf("total extent tree bytes: %llu\n",
13179                (unsigned long long)total_extent_tree_bytes);
13180         printf("btree space waste bytes: %llu\n",
13181                (unsigned long long)btree_space_waste);
13182         printf("file data blocks allocated: %llu\n referenced %llu\n",
13183                 (unsigned long long)data_bytes_allocated,
13184                 (unsigned long long)data_bytes_referenced);
13185
13186         free_qgroup_counts();
13187         free_root_recs_tree(&root_cache);
13188 close_out:
13189         close_ctree(root);
13190 err_out:
13191         if (ctx.progress_enabled)
13192                 task_deinit(ctx.info);
13193
13194         return err;
13195 }